I was trying to run a simple model on a dataset where I loaded my dataset into a np.float32 array and the target labels into a np.int32 array. Now, PyTorch would automatically keep this types when converting them into tensors via from_numpy
(e.g., the data would be Float and the labels would be Int). However, the loss function expects Longs instead of Ints. (Or maybe I made a mistake somewhere). I
(On a side note, would you recommend using doubles and longs over floats and ints performance-wise?)
I posted a simplified example below, where I have to cast the target array to long
(loss = F.nll_loss(output, target.long())
); otherwise, I get a TypeError:
TypeError: FloatClassNLLCriterion_updateOutput
received an invalid combination of arguments -
got (int, torch.FloatTensor, torch.IntTensor, torch.FloatTensor, bool, NoneType, torch.FloatTensor),
but expected (int state, torch.FloatTensor input, torch.LongTensor target, torch.FloatTensor output, bool sizeAverage, [torch.FloatTensor weights or None], torch.FloatTensor total_weight)
I was wondering if this is desired behavior (i.e., that the the loss function expects LongTensors)?
(PS: Is there a tensor attribute to return the type, e.g., sth like NumPys my_array.dtype
?)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 20, kernel_size=5)
self.conv2 = nn.Conv2d(20, 32, kernel_size=5)
self.conv2_drop = nn.Dropout2d(p=0.5)
self.fc1 = nn.Linear(800, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 800)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = F.relu(self.fc2(x))
return F.log_softmax(x)
model = Net()
if torch.cuda.is_available():
model.cuda()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
batch_size=64
model.train()
for step in range(1000):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
data = train_data32[offset:(offset + batch_size), :, :, :]
target = train_labels[offset:(offset + batch_size)]
print('orig data type', data.dtype)
print('orig data type', target.dtype)
if torch.cuda.is_available():
data, target = data.cuda(), target.cuda()
data, target = Variable(torch.from_numpy(data)), Variable(torch.from_numpy(target))
optimizer.zero_grad()
print('input batch dim:', data.size(), 'type', )
output = model(data)
print('output batch dim:', output.size())
print('target batch dim:', target.size())
loss = F.nll_loss(output, target.long())
loss.backward()
optimizer.step()
break