Hi !
I am creating a ladder like autoencoder network for video prediction.
I create 2 class : one for the encoder and one for the decoder.
I need these network be separate during the inference time to access some value.
but I need to train the 2 networks with the same loss :
- I pass the 2 net parameters to the same optimizer like :
param_S1 = list(E1.parameters()) + list(D1.parameters())
net_optimizer_S1 = torch.optim.SGD(param_S1, lr=0.01, momentum=0.9)
- then I train the network : The inference work fine, the loss calculation too but when the code get to the .backward() procedure, it gave :
File "RobNET_V2.py", line 181, in <module>
loss_S1.backward()
File "/usr/local/lib/python3.5/dist-packages/torch/tensor.py", line 93, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/usr/local/lib/python3.5/dist-packages/torch/autograd/__init__.py", line 90, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
can you help me please ^^’ ?
my code :
import cv2
import numpy as np
import torch
import torch.nn.init as init
import torch.nn as nn
from torch import optim
from torchvision import transforms
#-------------------------------------------------------------------------------
# Predictive Network
#-------------------------------------------------------------------------------
# Local network size
INPUT = 32*32
HIDDEN = 8*8
OUTPUT = INPUT
# Readout size
READ = HIDDEN
OUT_READ = 2
class INPUT_ENCODER(nn.Module):
def __init__(self, Encoder_size, Hidden_size):
super(INPUT_ENCODER, self).__init__()
# first encoder input Xt
self.E1 = nn.Linear(Encoder_size, Hidden_size)
# secondary feedback input
self.ES1 = nn.Linear(Encoder_size, Hidden_size)
# recurrent input H1-1
self.ER1 = nn.Linear(Hidden_size, Hidden_size)
# activation Sigmoid
self.ACT = nn.Sigmoid()
def forward(self, Xt, FB1, Last_H1):
# Encoder pass
out_E1 = self.E1(Xt)
out_ES1 = self.ES1(FB1)
out_ER1 = self.ER1(last_H1)
sum_H1 = out_E1 + out_ES1 + out_ER1
H1 = self.ACT(sum_H1)
# output stack
return H1
class LAYER_ENCODER(nn.Module):
def __init__(self, Encoder_size, Hidden_size):
super(LAYER_ENCODER, self).__init__()
# first encoder input Hl-1
self.EN = nn.Linear(Hidden_size, Hidden_size)
# secondary feedback input
self.ESN = nn.Linear(Encoder_size, Hidden_size)
# recurrent input Hn-1
self.ERN = nn.Linear(Hidden_size, Hidden_size)
# activation Sigmoid
self.ACT = nn.Sigmoid()
def forward(self, Ht, FBN, Last_Hn):
# Encoder pass
out_EN = self.EN(Ht)
out_ESN = self.ESN(FBN)
out_ERN = self.ERN(Last_Hn)
sum_Hn = out_EN + out_ESN + out_ERN
Hn = self.ACT(sum_Hn)
# output stack
return Hn
class LAYER_DECODER(nn.Module):
def __init__(self, Decoder_size, Hidden_size):
super(LAYER_DECODER, self).__init__()
# first Decoder input Hn
self.DN = nn.Linear(Hidden_size, Decoder_size)
# secondary superior input
self.DSN = nn.Linear(Decoder_size, Decoder_size)
# activation Sigmoid
self.ACT = nn.Sigmoid()
def forward(self, Ht, superior_in):
# Encoder pass
out_DN = self.DN(Ht)
out_DSN = self.DSN(superior_in)
sum_Yn = out_DN + out_DSN
Yn = self.ACT(sum_Yn)
# output stack
return Yn
#-------------------------------------------------------------------------------
# Motor/action network
#-------------------------------------------------------------------------------
class Readout(nn.Module):
def __init__(self, Hidden_size, external_out):
# supervised readout layer for task specific function
super(Readout, self).__init__()
# Simple 2 linear layers readout
self.readout = nn.Linear(Hidden_size, external_out)
self.act_readout = nn.Sigmoid()
def forward(self, input_read):
return self.readout(self.act_readout(input_read))
#-------------------------------------------------------------------------------
# Initialisation part :
#-------------------------------------------------------------------------------
# init stacked autoencoder => 3 stacks (3 encoder / 3 decoder)
E1 = INPUT_ENCODER(INPUT, HIDDEN)
E2 = LAYER_ENCODER(INPUT, HIDDEN)
E3 = LAYER_ENCODER(INPUT, HIDDEN)
D1 = LAYER_DECODER(OUTPUT, HIDDEN)
D2 = LAYER_DECODER(OUTPUT, HIDDEN)
D3 = LAYER_DECODER(OUTPUT, HIDDEN)
print(E1, E2, E3, D1, D2, D3)
# init readout for high level command part
# here we want to control a simple robot by given an UR/Tau command to
# the low-level trajectory/speed/torque controller
READOUT = Readout(HIDDEN, OUT_READ)
print(READOUT)
# init the loss function
loss_function = nn.MSELoss(reduction='elementwise_mean')
# init stacked AE parameters
param_S1 = list(E1.parameters()) + list(D1.parameters())
param_S2 = list(E2.parameters()) + list(D2.parameters())
param_S3 = list(E3.parameters()) + list(D3.parameters())
# init the optimization fonction
net_optimizer_S1 = torch.optim.SGD(param_S1, lr=0.01, momentum=0.9)
net_optimizer_S2 = torch.optim.SGD(param_S2, lr=0.01, momentum=0.9)
net_optimizer_S3 = torch.optim.SGD(param_S3, lr=0.01, momentum=0.9)
#net_optimizer = optim.Adam(RobNET_ALPHA.parameters(), lr=1e-3)
readout_optimizer = torch.optim.SGD(READOUT.parameters(), lr=0.01, momentum=0.9)
# init the recurrent part of the network
last_H1 = torch.zeros(HIDDEN)
last_H2 = torch.zeros(HIDDEN)
last_H3 = torch.zeros(HIDDEN)
# init the feedback
last_Y1 = torch.zeros(OUTPUT)
last_Y2 = torch.zeros(OUTPUT)
last_Y3 = torch.zeros(OUTPUT)
# no superior input for decoder 3
SUP_L3 = torch.zeros(OUTPUT)
#-------------------------------------------------------------------------------
# Learning/inference algorithm :
#-------------------------------------------------------------------------------
cam = cv2.VideoCapture(0)
while(1):
# zero the parameter gradients
net_optimizer_S1.zero_grad()
net_optimizer_S2.zero_grad()
net_optimizer_S3.zero_grad()
# capture state Xt
ret, frame = cam.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame = cv2.resize(frame, (32,32))
Xt = torch.from_numpy(frame)
Xt = Xt.type(torch.FloatTensor)
Xt = Xt.view(INPUT)
# forward process
# decoder pass
H1 = E1.forward(Xt, last_Y1, last_H1)
H2 = E2.forward(H1, last_Y2, last_H2)
H3 = E3.forward(H2, last_Y3, last_H3)
# encoder pass
Y3 = D3.forward(H3, SUP_L3)
Y2 = D2.forward(H2, Y3)
Y1 = D1.forward(H1, Y2)
# calculate error loss
# train first AE
# TODO: element 0 of tensors does not require grad and does not have a grad_fn
# maybe => loss = Variable(loss, requires_grad = True) ?
loss_S1 = loss_function(last_Y1, Xt)
loss_S1.backward()
net_optimizer_S1.step()
# train second AE
loss_S2 = loss_function(last_Y2, Y2)
loss_S2.backward()
net_optimizer_S2.step()
# train third AE
loss_S3 = loss_function(last_Y3, Y3)
loss_S3.backward()
net_optimizer_S3.step()
# update recurrent element
last_H1 = H1
last_H2 = H2
last_H3 = H3
# update feedback
last_Y1 = Y1
last_Y2 = Y2
last_Y3 = Y3