RuntimeError: Input type (torch.cuda.FloatTensor) and bias type (torch.FloatTensor) should be the same

enterthevoidf22 · October 2, 2020, 11:09am

i got this error when doing the cs231n assignment2 on jupyter.
i checked and all my parameters and inputs are sent to device, which is cuda:0.

the error message :

RuntimeError                              Traceback (most recent call last)
<ipython-input-33-851ef5a1f987> in <module>
     29 
     30 params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
---> 31 train_part2(three_layer_convnet, params, learning_rate)

<ipython-input-32-3e676edf85f7> in train_part2(model_fn, params, learning_rate)
     23         for i in params:
     24             print(i.device)
---> 25         scores = model_fn(x, params)
     26         loss = F.cross_entropy(scores, y)
     27 

<ipython-input-28-279d7a4f4cea> in three_layer_convnet(x, params)
     52     out = conv1(x)
     53     out = relu(out)
---> 54     out = conv2(out)
     55     out = relu(out)
     56     out = flatten(out)

~\.conda\envs\torch_env\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

~\.conda\envs\torch_env\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
    343 
    344     def forward(self, input):
--> 345         return self.conv2d_forward(input, self.weight)
    346 
    347 class Conv3d(_ConvNd):

~\.conda\envs\torch_env\lib\site-packages\torch\nn\modules\conv.py in conv2d_forward(self, input, weight)
    340                             _pair(0), self.dilation, self.groups)
    341         return F.conv2d(input, weight, self.bias, self.stride,
--> 342                         self.padding, self.dilation, self.groups)
    343 
    344     def forward(self, input):

RuntimeError: Input type (torch.cuda.FloatTensor) and bias type (torch.FloatTensor) should be the same

code leading to error:

def train_part2(model_fn, params, learning_rate):
    """
    Train a model on CIFAR-10.
    
    Inputs:
    - model_fn: A Python function that performs the forward pass of the model.
      It should have the signature scores = model_fn(x, params) where x is a
      PyTorch Tensor of image data, params is a list of PyTorch Tensors giving
      model weights, and scores is a PyTorch Tensor of shape (N, C) giving
      scores for the elements in x.
    - params: List of PyTorch Tensors giving weights for the model
    - learning_rate: Python scalar giving the learning rate to use for SGD
    
    Returns: Nothing
    """
    for t, (x, y) in enumerate(loader_train):
        # Move the data to the proper device (GPU or CPU)
        x = x.to(device=device, dtype=dtype)
        y = y.to(device=device, dtype=torch.long)

        # Forward pass: compute scores and loss
        print(x.device)
        for i in params:
            print(i.device)
        scores = model_fn(x, params)
        loss = F.cross_entropy(scores, y)

        # Backward pass: PyTorch figures out which Tensors in the computational
        # graph has requires_grad=True and uses backpropagation to compute the
        # gradient of the loss with respect to these Tensors, and stores the
        # gradients in the .grad attribute of each Tensor.
        loss.backward()

        # Update parameters. We don't want to backpropagate through the
        # parameter updates, so we scope the updates under a torch.no_grad()
        # context manager to prevent a computational graph from being built.
        with torch.no_grad():
            for w in params:
                w -= learning_rate * w.grad

                # Manually zero the gradients after running the backward pass
                w.grad.zero_()

        if t % print_every == 0:
            print('Iteration %d, loss = %.4f' % (t, loss.item()))
            check_accuracy_part2(loader_val, model_fn, params)
            print()

def random_weight(shape):
    """
    Create random Tensors for weights; setting requires_grad=True means that we
    want to compute gradients for these Tensors during the backward pass.
    We use Kaiming normalization: sqrt(2 / fan_in)
    """
    if len(shape) == 2:  # FC weight
        fan_in = shape[0]
    else:
        fan_in = np.prod(shape[1:]) # conv weight [out_channel, in_channel, kH, kW]
    # randn is standard normal distribution generator. 
    w = torch.randn(shape, device=device, dtype=dtype) * np.sqrt(2. / fan_in)
    w.requires_grad = True
    return w

learning_rate = 3e-3

channel_1 = 32
channel_2 = 16

conv_w1 = None
conv_b1 = None
conv_w2 = None
conv_b2 = None
fc_w = None
fc_b = None

################################################################################
# TODO: Initialize the parameters of a three-layer ConvNet.                    #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
conv_w1 = random_weight((32,3,5,5))
conv_b1 = random_weight((32,))
conv_w2 = random_weight((16,32,3,3))
conv_b2 = random_weight((16,))
fc_w = random_weight((16 * 32 * 32, 10))
fc_b = random_weight((10,))
pass

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
train_part2(three_layer_convnet, params, learning_rate)

the three_layer_convnet referred above:

def three_layer_convnet(x, params):
    """
    Performs the forward pass of a three-layer convolutional network with the
    architecture defined above.

    Inputs:
    - x: A PyTorch Tensor of shape (N, 3, H, W) giving a minibatch of images
    - params: A list of PyTorch Tensors giving the weights and biases for the
      network; should contain the following:
      - conv_w1: PyTorch Tensor of shape (channel_1, 3, KH1, KW1) giving weights
        for the first convolutional layer
      - conv_b1: PyTorch Tensor of shape (channel_1,) giving biases for the first
        convolutional layer
      - conv_w2: PyTorch Tensor of shape (channel_2, channel_1, KH2, KW2) giving
        weights for the second convolutional layer
      - conv_b2: PyTorch Tensor of shape (channel_2,) giving biases for the second
        convolutional layer
      - fc_w: PyTorch Tensor giving weights for the fully-connected layer. Can you
        figure out what the shape should be?
      - fc_b: PyTorch Tensor giving biases for the fully-connected layer. Can you
        figure out what the shape should be?
    
    Returns:
    - scores: PyTorch Tensor of shape (N, C) giving classification scores for x
    """
    conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
    scores = None
    import torch.nn as nn
    pad1 = 2
    pad2 = 1
    channel_input = x.shape[1]
    channel_1 = conv_w1.shape[0]
    channel_2 = conv_w2.shape[0]
    KW1, KH1 = conv_w1.shape[2:]
    KW2, KH2 = conv_w2.shape[2:]
    
    conv1 = nn.Conv2d(channel_input,channel_1,(KW1,KH1),stride = 1,padding = pad1,padding_mode = 'zeros',bias=True)
    conv1.weight = nn.Parameter(conv_w1)
    conv1.bias = nn.Parameter(conv_b1)
    
    relu = nn.ReLU()
    
    conv2 = nn.Conv2d(channel_1,channel_2,(KW2,KH2),stride = 1,padding = pad2,padding_mode = 'zeros',bias=True)
    conv2.weight = nn.Parameter(conv_w2)
    conv2_bias = nn.Parameter(conv_b2)
    
    
    out = conv1(x)
    out = relu(out)
    out = conv2(out)
    out = relu(out)
    out = flatten(out)
    scores = out.mm(fc_w)+fc_b

    pass

 
    return scores

i checked thoroughly that all the tensors are sent to device, and i have a feeling that might be something wrong with the way i installed cuda or some activation of gpu i didn;t do.
by examining the task manager, i know i have:
GPU 0 intel® HD graphics 630
GPU 1 NVIDIA GeForce GTX 1050

i followed some tutorials to install nvidia driver and activate in in cmd but maybe it was erroneous. that’s just my hunch though.
does anybody have a clue?

Caruso · October 2, 2020, 11:51am

Could you give us the the code of your three_layer_convnet?
Normally you have a model (which contains the parameters) and your data, which you transfer using .to(device) to your desired device (GPU) and the job is more or less done. In your case you have a model three_layer_convnet and params - second you generated yourself. Your params are on the given device, but your three_layer_convnet isn’t. So now it depends on what your three_layer_convnet does; does it use your given params or the parameters of the three_layer_convnet.

enterthevoidf22 · October 3, 2020, 7:50am

def three_layer_convnet(x, params):
“”"
Performs the forward pass of a three-layer convolutional network with the
architecture defined above.

Inputs:
- x: A PyTorch Tensor of shape (N, 3, H, W) giving a minibatch of images
- params: A list of PyTorch Tensors giving the weights and biases for the
  network; should contain the following:
  - conv_w1: PyTorch Tensor of shape (channel_1, 3, KH1, KW1) giving weights
    for the first convolutional layer
  - conv_b1: PyTorch Tensor of shape (channel_1,) giving biases for the first
    convolutional layer
  - conv_w2: PyTorch Tensor of shape (channel_2, channel_1, KH2, KW2) giving
    weights for the second convolutional layer
  - conv_b2: PyTorch Tensor of shape (channel_2,) giving biases for the second
    convolutional layer
  - fc_w: PyTorch Tensor giving weights for the fully-connected layer. Can you
    figure out what the shape should be?
  - fc_b: PyTorch Tensor giving biases for the fully-connected layer. Can you
    figure out what the shape should be?

Returns:
- scores: PyTorch Tensor of shape (N, C) giving classification scores for x
"""
conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
scores = None
import torch.nn as nn
pad1 = 2
pad2 = 1
channel_input = x.shape[1]
channel_1 = conv_w1.shape[0]
channel_2 = conv_w2.shape[0]
KW1, KH1 = conv_w1.shape[2:]
KW2, KH2 = conv_w2.shape[2:]

conv1 = nn.Conv2d(channel_input,channel_1,(KW1,KH1),stride = 1,padding = pad1,padding_mode = 'zeros',bias=True)
conv1.weight = nn.Parameter(conv_w1)
conv1.bias = nn.Parameter(conv_b1)

relu = nn.ReLU()

conv2 = nn.Conv2d(channel_1,channel_2,(KW2,KH2),stride = 1,padding = pad2,padding_mode = 'zeros',bias=True)
conv2.weight = nn.Parameter(conv_w2)
conv2_bias = nn.Parameter(conv_b2)

################################################################################
# TODO: Implement the forward pass for the three-layer ConvNet.                #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

out = conv1(x)
out = relu(out)
out = conv2(out)
out = relu(out)
out = flatten(out)
scores = out.mm(fc_w)+fc_b

pass


return scores

enterthevoidf22 · October 3, 2020, 7:53am

hey, i don’t quite understand. why would my post be deleted?

enterthevoidf22 · October 3, 2020, 8:02am

ok thanks to Caruso i managed to figure out the problem!
it turns out i was missing a dot when i defined the models second bias.
in the three_layer_convnet function there’s a line:
conv2_bias = nn.Parameter(conv_b2)
changed it to
conv2.bias = nn.Parameter(conv_b2)
now it works.

good luck to anyone encountering a similar error!