Apologies, I know there are several topics on this but I can’t seem to resolve this issue! Thanks in advance for any help.
I’m attempting to train a CNN and am getting a RuntimeError: expected scalar type Long but found Float
error on the first forward step:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_39/1736748445.py in <module>
6 for t in range(epochs):
7 print(f"Epoch {t+1}\n-------------------------------")
----> 8 train(train_dataloader, DigitCNN, loss_fn, optimizer)
9 evaluate(val_dataloader, DigitCNN)
10 print("Done!")
/tmp/ipykernel_39/154611817.py in train(dataloader, model, loss_fn, optimizer)
6
7 # Compute prediction error
----> 8 pred = model(X)
9 loss = loss_fn(pred, y)
10
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/tmp/ipykernel_39/2556345630.py in forward(self, x)
8
9 def forward(self, x):
---> 10 x = self.pool(relu(self.conv1(x)))
11 x = self.pool(relu(self.conv2(x)))
12 x = x.view(-1, 320) # Equivalent of a flatten
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
441
442 def forward(self, input: Tensor) -> Tensor:
--> 443 return self._conv_forward(input, self.weight, self.bias)
444
445 class Conv3d(_ConvNd):
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
438 _pair(0), self.dilation, self.groups)
439 return F.conv2d(input, weight, bias, self.stride,
--> 440 self.padding, self.dilation, self.groups)
441
442 def forward(self, input: Tensor) -> Tensor:
RuntimeError: expected scalar type Long but found Float
I’ve looked up several other articles on this error and as a result tried casting both the input tensor and the target tensor as LongTensor before creating a dataset, and trying float instead - but it always seems to result in the same error!
My code is:
def pixels_to_tensors(pixel_csv):
'''
Takes 784 input pixels across n images as a csv file, and converts it into
a tensor of shape (n, 1, 28, 28). Any training labels are separated out.
'''
df_pixels = pd.read_csv('/kaggle/input/digit-recognizer/' + pixel_csv + '.csv')
df_pixels = shuffle(df_pixels)
# Separate labels from pixel data for training data
if df_pixels.columns[0] == 'label':
labels = df_pixels['label']
df_pixels = df_pixels.drop('label', axis=1)
else:
labels = None
# Create 28x28 tensors
assert df_pixels.shape[1] == 28*28, f'Incompatible number of pixels: {df_pixels.shape[1]}, was expecting 784 (28x28)'
arr_reshaped = df_pixels.values.reshape(-1, 1, 28, 28)
tensors = torch.from_numpy(arr_reshaped).type(torch.LongTensor)
return tensors, labels
train_val_tensors, train_val_labels = pixels_to_tensors('train')
# Split into train and eval sets
cutoff = 0.8
ind_cutoff = int(len(train_val_tensors)*cutoff)
train_tensors, train_labels = train_val_tensors[:ind_cutoff], train_val_labels.values[:ind_cutoff]
val_tensors, val_labels = train_val_tensors[ind_cutoff:], train_val_labels.values[ind_cutoff:]
train_dataset = TensorDataset(train_tensors, torch.from_numpy(train_labels).type(torch.LongTensor))
val_dataset = TensorDataset(val_tensors, torch.from_numpy(val_labels).type(torch.LongTensor))
# Create data loaders
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
# Define CNN
class MultiLayerCNN(nn.Module):
def __init__(self):
super(MultiLayerCNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5)
self.pool = nn.MaxPool2d(2)
self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5)
self.fc = nn.Linear(in_features=320, out_features=10)
def forward(self, x):
x = self.pool(relu(self.conv1(x)))
x = self.pool(relu(self.conv2(x)))
x = x.view(-1, 320) # Equivalent of a flatten
x = self.fc(x)
x = nn.functional.log_softmax(x,dim=1)
return x
DigitCNN = MultiLayerCNN()
# Define training loop
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
# Run training loop
epochs = 2
learning_rate = 1e-3
optimizer = torch.optim.Adam(DigitCNN.parameters(),lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, DigitCNN, loss_fn, optimizer)
evaluate(val_dataloader, DigitCNN)
print("Done!")