Bias not staying updated after changing it

I’m trying to add a function to my model that lets me add neurons after it’s already been built. I have the weights working just fine, but the biases are a problem. For some reason, the changes I make to the bias (increasing the size of it) don’t stick around. So it works the first time I add a neuron, but not the second time.

Here’s my code for the model:

class QNet(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()

        # 
        # Build the model
        #
        self.layers = nn.ModuleList()
        self.layer_sizes = []

        # Add hidden layers
        self.num_layers = 0
        # Randomized hidden layers
        for i in range(randrange(1, 3)):
            size = randrange(32, 256)
            self.layers.append(nn.Linear(input_size, size))
            input_size = size  # For the next layer
            self.layer_sizes.append(input_size)
            self.num_layers += 1
        
        # Output layer
        self.layers.append(nn.Linear(input_size, output_size))
        self.layer_sizes.append(input_size)
        self.num_layers += 1

        # 
        # Set whether or not to use gpu
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        
        #
        # Set optimizer and loss
        self.optimizer = optim.Adam(self.parameters(), lr=LR)
        self.criterion = nn.MSELoss()

        self.model_info()


    def forward(self, x):
        '''Get output from model.'''
        #for layer in self.layers: x = layer(x)
        for i, layer in enumerate(self.layers): x = layer(x) if i==self.num_layers else layer(F.relu(x))
        return x


    def add_neurons(self, coords, num_neurons, duplicate=False):
        '''Add new neurons to a layer in the model'''
        #
        # Update weights

        # Copy the current weights
        weights = [layer.weight.data for layer in self.layers]

        # Make the new weights you'll be adding for the I/O layers
        hl_input = torch.zeros([num_neurons, weights[coords].shape[1]])
        if duplicate: hl_input = weights[coords][1, (weights[coords].shape[1]-num_neurons):weights[coords].shape[1]]
        else: nn.init.xavier_uniform_(hl_input, gain=nn.init.calculate_gain('relu'))

        hl_output = torch.zeros([weights[1].shape[0], num_neurons])
        if duplicate: hl_output = weights[coords][0, (weights[coords+1].shape[1]-num_neurons):weights[coords].shape[1]]
        else: nn.init.xavier_uniform_(hl_input, gain=nn.init.calculate_gain('relu'))

        # Concatenate the old I/O weights with the new I/O weights
        new_wi = torch.cat([weights[coords], hl_input], dim=0)
        new_wo = torch.cat([weights[coords+1], hl_output], dim=1)

        # Reset weight and grad variables to new size
        self.layers[coords] = nn.Linear(weights[coords].shape[1], self.layer_sizes[coords])
        self.layers[coords+1] = nn.Linear(self.layer_sizes[coords], weights[coords+1].shape[0])

        # Set the weight data to new values
        self.layers[coords].weight = torch.nn.Parameter(new_wi)
        self.layers[coords+1].weight = torch.nn.Parameter(new_wo)

        self.model_info()
        #
        # Update bias
        biases = [layer.bias.data for layer in self.layers]

        print(biases[coords].shape)

        # Make the new weights you'll be adding for the I/O layers
        hl_b = torch.zeros(num_neurons)
        if duplicate: hl_b = biases[coords][(biases[coords].shape[0]-num_neurons):biases[coords].shape[0]]
        else: nn.init.normal_(hl_b)

        # Concatenate the old I/O weights with the new I/O weights
        new_b = torch.cat([biases[coords], hl_b], dim=0)

        # Set the weight data to new values
        self.layers[coords].bias = torch.nn.Parameter(new_b)

        self.model_info()

        print("==============================================================================")
        print("==============================================================================")
        print("==============================================================================")
        print("==============================================================================")


def model_info(self):
        '''Get your model's information (layers, sizes, parameters, etc).'''
        # Break
        print("\n==================================================")

        # Print model's state_dict
        print("Model's state_dict:")
        for param_tensor in self.state_dict():
            print(param_tensor, "\t", self.state_dict()[param_tensor].size())
        print("==================================================\n")

And here’s my console output:

==================================================
Model's state_dict:
layers.0.weight 	 torch.Size([133, 19])
layers.0.bias 	 torch.Size([133])
layers.1.weight 	 torch.Size([105, 133])
layers.1.bias 	 torch.Size([105])
layers.2.weight 	 torch.Size([3, 105])
layers.2.bias 	 torch.Size([3])
==================================================


==================================================
Model's state_dict:
layers.0.weight 	 torch.Size([134, 19])
layers.0.bias 	 torch.Size([133])
layers.1.weight 	 torch.Size([105, 134])
layers.1.bias 	 torch.Size([105])
layers.2.weight 	 torch.Size([3, 105])
layers.2.bias 	 torch.Size([3])
==================================================

torch.Size([133])

==================================================
Model's state_dict:
layers.0.weight 	 torch.Size([134, 19])
layers.0.bias 	 torch.Size([134])
layers.1.weight 	 torch.Size([105, 134])
layers.1.bias 	 torch.Size([105])
layers.2.weight 	 torch.Size([3, 105])
layers.2.bias 	 torch.Size([3])
==================================================

==============================================================================
==============================================================================
==============================================================================
==============================================================================

==================================================
Model's state_dict:
layers.0.weight 	 torch.Size([135, 19])
layers.0.bias 	 torch.Size([133])
layers.1.weight 	 torch.Size([105, 135])
layers.1.bias 	 torch.Size([105])
layers.2.weight 	 torch.Size([3, 105])
layers.2.bias 	 torch.Size([3])
==================================================

torch.Size([133])

==================================================
Model's state_dict:
layers.0.weight 	 torch.Size([135, 19])
layers.0.bias 	 torch.Size([134])
layers.1.weight 	 torch.Size([105, 135])
layers.1.bias 	 torch.Size([105])
layers.2.weight 	 torch.Size([3, 105])
layers.2.bias 	 torch.Size([3])
==================================================

As you can see, it works fine the first time, but the bias doesn’t stay updated. So the second time, it causes a mismatch between the weights and biases that gives this error:

Traceback (most recent call last):
  File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/main.py", line 6, in <module>
    start_menu.main_menu()
  File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/menus.py", line 141, in main_menu
    self.game_type_selection_menu()
  File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/menus.py", line 221, in game_type_selection_menu
    run_game.run_dqn()
  File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/run.py", line 138, in run_dqn
    agent = self._run_episode(agent)
  File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/run.py", line 180, in _run_episode
    final_move = agent.get_action(state_old)
  File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/agent.py", line 155, in get_action
    prediction = self.model(state0)
  File "/home/grant/anaconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/grant/Documents/GitHub/DeepLearningSnake/Python/model.py", line 63, in forward
    for i, layer in enumerate(self.layers): x = layer(x) if i==self.num_layers else layer(F.relu(x))
  File "/home/grant/anaconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/grant/anaconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 103, in forward
    return F.linear(input, self.weight, self.bias)
RuntimeError: The size of tensor a (135) must match the size of tensor b (134) at non-singleton dimension 0

Could somebody explain what I need to do to make the biases stay updated? Or at least try to set me on the right track to figuring it out?

I believe this is happening because this bias is actually being overwritten each time via self.layers[coords] = nn.Linear(weights[coords].shape[1], self.layer_sizes[coords]) and layer_sizes doesn’t appear to be updated. Something like simply moving biases = [layer.bias.data for layer in self.layers] before this line should fix this issue.

However, if there is training done in the loop, I would want to be careful with what happens with the optimizer (e.g., is it correctly tracking the parameters once they are added, etc…).

1 Like

@eqy was right. Here’s the modified code (tested and is working) for anybody who wants to do something like this:

def add_neurons(self, coords, num_neurons, duplicate=True):
    '''Add new neurons to a layer in the model'''
    #
    # Copy the current weights and biases
    weights = [layer.weight.data for layer in self.layers]
    biases = [layer.bias.data for layer in self.layers]

    #
    # Get modified weights

    # Make the new weights you'll be adding for the I/O layers
    hl_input = torch.zeros([num_neurons, weights[coords].shape[1]])
    if duplicate: hl_input = weights[coords][(weights[coords].shape[0]-num_neurons):(weights[coords].shape[0]), :]
    else: nn.init.xavier_uniform_(hl_input, gain=nn.init.calculate_gain('relu'))

    hl_output = torch.zeros([weights[coords+1].shape[0], num_neurons])
    if duplicate: hl_output = weights[coords+1][:, (weights[coords+1].shape[1]-num_neurons):(weights[coords+1].shape[1])]
    else: nn.init.xavier_uniform_(hl_output, gain=nn.init.calculate_gain('relu'))

    # Concatenate the old I/O weights with the new I/O weights
    new_wi = torch.cat([weights[coords], hl_input], dim=0)
    new_wo = torch.cat([weights[coords+1], hl_output], dim=1)

    #
    # Get modified bias

    # Make the new weights you'll be adding for the I/O layers
    hl_b = torch.zeros(num_neurons)
    if duplicate: hl_b = biases[coords][(biases[coords].shape[0]-num_neurons):biases[coords].shape[0]]
    else: nn.init.normal_(hl_b)

    # Concatenate the old I/O weights with the new I/O weights
    new_b = torch.cat([biases[coords], hl_b], dim=0)

    #
    # Update the weights and biases
    self.layers[coords].weight = torch.nn.Parameter(new_wi)
    self.layers[coords+1].weight = torch.nn.Parameter(new_wo)
    self.layers[coords].bias = torch.nn.Parameter(new_b)