I’m trying to add a function to my model that lets me add neurons after it’s already been built. I have the weights working just fine, but the biases are a problem. For some reason, the changes I make to the bias (increasing the size of it) don’t stick around. So it works the first time I add a neuron, but not the second time.
Here’s my code for the model:
class QNet(nn.Module):
def __init__(self, input_size, output_size):
super().__init__()
#
# Build the model
#
self.layers = nn.ModuleList()
self.layer_sizes = []
# Add hidden layers
self.num_layers = 0
# Randomized hidden layers
for i in range(randrange(1, 3)):
size = randrange(32, 256)
self.layers.append(nn.Linear(input_size, size))
input_size = size # For the next layer
self.layer_sizes.append(input_size)
self.num_layers += 1
# Output layer
self.layers.append(nn.Linear(input_size, output_size))
self.layer_sizes.append(input_size)
self.num_layers += 1
#
# Set whether or not to use gpu
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#
# Set optimizer and loss
self.optimizer = optim.Adam(self.parameters(), lr=LR)
self.criterion = nn.MSELoss()
self.model_info()
def forward(self, x):
'''Get output from model.'''
#for layer in self.layers: x = layer(x)
for i, layer in enumerate(self.layers): x = layer(x) if i==self.num_layers else layer(F.relu(x))
return x
def add_neurons(self, coords, num_neurons, duplicate=False):
'''Add new neurons to a layer in the model'''
#
# Update weights
# Copy the current weights
weights = [layer.weight.data for layer in self.layers]
# Make the new weights you'll be adding for the I/O layers
hl_input = torch.zeros([num_neurons, weights[coords].shape[1]])
if duplicate: hl_input = weights[coords][1, (weights[coords].shape[1]-num_neurons):weights[coords].shape[1]]
else: nn.init.xavier_uniform_(hl_input, gain=nn.init.calculate_gain('relu'))
hl_output = torch.zeros([weights[1].shape[0], num_neurons])
if duplicate: hl_output = weights[coords][0, (weights[coords+1].shape[1]-num_neurons):weights[coords].shape[1]]
else: nn.init.xavier_uniform_(hl_input, gain=nn.init.calculate_gain('relu'))
# Concatenate the old I/O weights with the new I/O weights
new_wi = torch.cat([weights[coords], hl_input], dim=0)
new_wo = torch.cat([weights[coords+1], hl_output], dim=1)
# Reset weight and grad variables to new size
self.layers[coords] = nn.Linear(weights[coords].shape[1], self.layer_sizes[coords])
self.layers[coords+1] = nn.Linear(self.layer_sizes[coords], weights[coords+1].shape[0])
# Set the weight data to new values
self.layers[coords].weight = torch.nn.Parameter(new_wi)
self.layers[coords+1].weight = torch.nn.Parameter(new_wo)
self.model_info()
#
# Update bias
biases = [layer.bias.data for layer in self.layers]
print(biases[coords].shape)
# Make the new weights you'll be adding for the I/O layers
hl_b = torch.zeros(num_neurons)
if duplicate: hl_b = biases[coords][(biases[coords].shape[0]-num_neurons):biases[coords].shape[0]]
else: nn.init.normal_(hl_b)
# Concatenate the old I/O weights with the new I/O weights
new_b = torch.cat([biases[coords], hl_b], dim=0)
# Set the weight data to new values
self.layers[coords].bias = torch.nn.Parameter(new_b)
self.model_info()
print("==============================================================================")
print("==============================================================================")
print("==============================================================================")
print("==============================================================================")
def model_info(self):
'''Get your model's information (layers, sizes, parameters, etc).'''
# Break
print("\n==================================================")
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in self.state_dict():
print(param_tensor, "\t", self.state_dict()[param_tensor].size())
print("==================================================\n")
And here’s my console output:
==================================================
Model's state_dict:
layers.0.weight torch.Size([133, 19])
layers.0.bias torch.Size([133])
layers.1.weight torch.Size([105, 133])
layers.1.bias torch.Size([105])
layers.2.weight torch.Size([3, 105])
layers.2.bias torch.Size([3])
==================================================
==================================================
Model's state_dict:
layers.0.weight torch.Size([134, 19])
layers.0.bias torch.Size([133])
layers.1.weight torch.Size([105, 134])
layers.1.bias torch.Size([105])
layers.2.weight torch.Size([3, 105])
layers.2.bias torch.Size([3])
==================================================
torch.Size([133])
==================================================
Model's state_dict:
layers.0.weight torch.Size([134, 19])
layers.0.bias torch.Size([134])
layers.1.weight torch.Size([105, 134])
layers.1.bias torch.Size([105])
layers.2.weight torch.Size([3, 105])
layers.2.bias torch.Size([3])
==================================================
==============================================================================
==============================================================================
==============================================================================
==============================================================================
==================================================
Model's state_dict:
layers.0.weight torch.Size([135, 19])
layers.0.bias torch.Size([133])
layers.1.weight torch.Size([105, 135])
layers.1.bias torch.Size([105])
layers.2.weight torch.Size([3, 105])
layers.2.bias torch.Size([3])
==================================================
torch.Size([133])
==================================================
Model's state_dict:
layers.0.weight torch.Size([135, 19])
layers.0.bias torch.Size([134])
layers.1.weight torch.Size([105, 135])
layers.1.bias torch.Size([105])
layers.2.weight torch.Size([3, 105])
layers.2.bias torch.Size([3])
==================================================
As you can see, it works fine the first time, but the bias doesn’t stay updated. So the second time, it causes a mismatch between the weights and biases that gives this error:
Traceback (most recent call last):
File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/main.py", line 6, in <module>
start_menu.main_menu()
File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/menus.py", line 141, in main_menu
self.game_type_selection_menu()
File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/menus.py", line 221, in game_type_selection_menu
run_game.run_dqn()
File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/run.py", line 138, in run_dqn
agent = self._run_episode(agent)
File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/run.py", line 180, in _run_episode
final_move = agent.get_action(state_old)
File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/agent.py", line 155, in get_action
prediction = self.model(state0)
File "/home/grant/anaconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/model.py", line 63, in forward
for i, layer in enumerate(self.layers): x = layer(x) if i==self.num_layers else layer(F.relu(x))
File "/home/grant/anaconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/grant/anaconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: The size of tensor a (135) must match the size of tensor b (134) at non-singleton dimension 0
Could somebody explain what I need to do to make the biases stay updated? Or at least try to set me on the right track to figuring it out?