i got this error when doing the cs231n assignment2 on jupyter.
i checked and all my parameters and inputs are sent to device, which is cuda:0.
the error message :
RuntimeError Traceback (most recent call last)
<ipython-input-33-851ef5a1f987> in <module>
29
30 params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
---> 31 train_part2(three_layer_convnet, params, learning_rate)
<ipython-input-32-3e676edf85f7> in train_part2(model_fn, params, learning_rate)
23 for i in params:
24 print(i.device)
---> 25 scores = model_fn(x, params)
26 loss = F.cross_entropy(scores, y)
27
<ipython-input-28-279d7a4f4cea> in three_layer_convnet(x, params)
52 out = conv1(x)
53 out = relu(out)
---> 54 out = conv2(out)
55 out = relu(out)
56 out = flatten(out)
~\.conda\envs\torch_env\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~\.conda\envs\torch_env\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
343
344 def forward(self, input):
--> 345 return self.conv2d_forward(input, self.weight)
346
347 class Conv3d(_ConvNd):
~\.conda\envs\torch_env\lib\site-packages\torch\nn\modules\conv.py in conv2d_forward(self, input, weight)
340 _pair(0), self.dilation, self.groups)
341 return F.conv2d(input, weight, self.bias, self.stride,
--> 342 self.padding, self.dilation, self.groups)
343
344 def forward(self, input):
RuntimeError: Input type (torch.cuda.FloatTensor) and bias type (torch.FloatTensor) should be the same
code leading to error:
def train_part2(model_fn, params, learning_rate):
"""
Train a model on CIFAR-10.
Inputs:
- model_fn: A Python function that performs the forward pass of the model.
It should have the signature scores = model_fn(x, params) where x is a
PyTorch Tensor of image data, params is a list of PyTorch Tensors giving
model weights, and scores is a PyTorch Tensor of shape (N, C) giving
scores for the elements in x.
- params: List of PyTorch Tensors giving weights for the model
- learning_rate: Python scalar giving the learning rate to use for SGD
Returns: Nothing
"""
for t, (x, y) in enumerate(loader_train):
# Move the data to the proper device (GPU or CPU)
x = x.to(device=device, dtype=dtype)
y = y.to(device=device, dtype=torch.long)
# Forward pass: compute scores and loss
print(x.device)
for i in params:
print(i.device)
scores = model_fn(x, params)
loss = F.cross_entropy(scores, y)
# Backward pass: PyTorch figures out which Tensors in the computational
# graph has requires_grad=True and uses backpropagation to compute the
# gradient of the loss with respect to these Tensors, and stores the
# gradients in the .grad attribute of each Tensor.
loss.backward()
# Update parameters. We don't want to backpropagate through the
# parameter updates, so we scope the updates under a torch.no_grad()
# context manager to prevent a computational graph from being built.
with torch.no_grad():
for w in params:
w -= learning_rate * w.grad
# Manually zero the gradients after running the backward pass
w.grad.zero_()
if t % print_every == 0:
print('Iteration %d, loss = %.4f' % (t, loss.item()))
check_accuracy_part2(loader_val, model_fn, params)
print()
def random_weight(shape):
"""
Create random Tensors for weights; setting requires_grad=True means that we
want to compute gradients for these Tensors during the backward pass.
We use Kaiming normalization: sqrt(2 / fan_in)
"""
if len(shape) == 2: # FC weight
fan_in = shape[0]
else:
fan_in = np.prod(shape[1:]) # conv weight [out_channel, in_channel, kH, kW]
# randn is standard normal distribution generator.
w = torch.randn(shape, device=device, dtype=dtype) * np.sqrt(2. / fan_in)
w.requires_grad = True
return w
learning_rate = 3e-3
channel_1 = 32
channel_2 = 16
conv_w1 = None
conv_b1 = None
conv_w2 = None
conv_b2 = None
fc_w = None
fc_b = None
################################################################################
# TODO: Initialize the parameters of a three-layer ConvNet. #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
conv_w1 = random_weight((32,3,5,5))
conv_b1 = random_weight((32,))
conv_w2 = random_weight((16,32,3,3))
conv_b2 = random_weight((16,))
fc_w = random_weight((16 * 32 * 32, 10))
fc_b = random_weight((10,))
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
################################################################################
# END OF YOUR CODE #
################################################################################
params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
train_part2(three_layer_convnet, params, learning_rate)
the three_layer_convnet referred above:
def three_layer_convnet(x, params):
"""
Performs the forward pass of a three-layer convolutional network with the
architecture defined above.
Inputs:
- x: A PyTorch Tensor of shape (N, 3, H, W) giving a minibatch of images
- params: A list of PyTorch Tensors giving the weights and biases for the
network; should contain the following:
- conv_w1: PyTorch Tensor of shape (channel_1, 3, KH1, KW1) giving weights
for the first convolutional layer
- conv_b1: PyTorch Tensor of shape (channel_1,) giving biases for the first
convolutional layer
- conv_w2: PyTorch Tensor of shape (channel_2, channel_1, KH2, KW2) giving
weights for the second convolutional layer
- conv_b2: PyTorch Tensor of shape (channel_2,) giving biases for the second
convolutional layer
- fc_w: PyTorch Tensor giving weights for the fully-connected layer. Can you
figure out what the shape should be?
- fc_b: PyTorch Tensor giving biases for the fully-connected layer. Can you
figure out what the shape should be?
Returns:
- scores: PyTorch Tensor of shape (N, C) giving classification scores for x
"""
conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
scores = None
import torch.nn as nn
pad1 = 2
pad2 = 1
channel_input = x.shape[1]
channel_1 = conv_w1.shape[0]
channel_2 = conv_w2.shape[0]
KW1, KH1 = conv_w1.shape[2:]
KW2, KH2 = conv_w2.shape[2:]
conv1 = nn.Conv2d(channel_input,channel_1,(KW1,KH1),stride = 1,padding = pad1,padding_mode = 'zeros',bias=True)
conv1.weight = nn.Parameter(conv_w1)
conv1.bias = nn.Parameter(conv_b1)
relu = nn.ReLU()
conv2 = nn.Conv2d(channel_1,channel_2,(KW2,KH2),stride = 1,padding = pad2,padding_mode = 'zeros',bias=True)
conv2.weight = nn.Parameter(conv_w2)
conv2_bias = nn.Parameter(conv_b2)
out = conv1(x)
out = relu(out)
out = conv2(out)
out = relu(out)
out = flatten(out)
scores = out.mm(fc_w)+fc_b
pass
return scores
i checked thoroughly that all the tensors are sent to device, and i have a feeling that might be something wrong with the way i installed cuda or some activation of gpu i didn;t do.
by examining the task manager, i know i have:
GPU 0 intel® HD graphics 630
GPU 1 NVIDIA GeForce GTX 1050
i followed some tutorials to install nvidia driver and activate in in cmd but maybe it was erroneous. that’s just my hunch though.
does anybody have a clue?