I’m trying to add a new layer to an existing network (as the first layer) and train it on the original input. When I add a convolutional layer everything works perfectly but when I change it to linear it doesn’t work. Any ideas why?
Here is the whole network:
class ActorCritic(torch.nn.Module): #original model
def __init__(self, num_inputs, action_space):
super(ActorCritic, self).__init__()
self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
self.lstm = nn.LSTMCell(32 * 3 * 3, 256)
num_outputs = action_space.n
self.critic_linear = nn.Linear(256, 1)
self.actor_linear = nn.Linear(256, num_outputs)
def forward(self, inputs):
inputs, (hx, cx) = inputs
x = F.elu(self.conv1(inputs))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
x = F.elu(self.conv4(x))
x = x.view(-1, 32 * 3 * 3)
hx, cx = self.lstm(x, (hx, cx))
x = hx
return self.critic_linear(x), self.actor_linear(x), (hx, cx)
class TLModel(torch.nn.Module): #new model
def __init__(self, pretrained_model, num_inputs):
super(TLModel, self).__init__()
self.new_layer = nn.Linear(1*1*42*42, 1*1*42*42)
self.pretrained_model = pretrained_model
def forward(self, inputs):
inputs, (hx, cx) = inputs
x = F.elu(self.new_layer(inputs.view(-1, 1*1*42*42)))
return self.pretrained_model.forward((x.view(1,1,42,42), (hx, cx)))
I tried different activation functions (not just elu). it works with conv:
class TLModel(torch.nn.Module):
def __init__(self, pretrained_model, num_inputs):
super(TLModel, self).__init__()
self.new_layer = nn.Conv2d(num_inputs, num_inputs, 1)
self.pretrained_model = pretrained_model
def forward(self, inputs):
inputs, (hx, cx) = inputs
x = F.elu(self.new_layer(inputs))
return self.pretrained_model.forward((x, (hx, cx)))
The number of inputs is 1 and the size of an input is 1x1x42x42