Hi,
I am desperately trying to figure out what’s wrong with my code. I am trying to get the index of the max log-probability as shown. The weird thing is that I get this RuntimeError after many Epochs (sometimes 7/12/25 …) but it usually runs smoothly for the first epochs.
I am quite new to PyTorch maybe it’s something obvious but I just can’t seem to figure out what could cause the problem after running for that many Epochs.
Python 3.5.2
Torch 0.3.1
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~/SDT_CIFAR/main.py in <module>()
122
123 for epoch in range(1, args.epochs + 1):
--> 124 model.train_(trainloader, epoch)
125 accuracy = model.test_(testloader, epoch)
126 save_result(accuracy)
~/SDT_CIFAR/model.py in train_(self, train_loader, epoch)
224 loss.backward(retain_variables=True)
225 self.optimizer.step()
--> 226 pred = output.data.max(1)[1] # get the index of the max log-probability
227 correct += pred.eq(target.data).cpu().sum()
228 accuracy = 100. * correct / len(data)
~/.local/lib/python3.5/site-packages/torch/tensor.py in data(self)
405 @property
406 def data(self):
--> 407 raise RuntimeError('cannot call .data on a torch.Tensor: did you intend to use autograd.Variable?')
408
409 # Numpy array interface, to support `numpy.asarray(tensor) -> ndarray`
RuntimeError: cannot call .data on a torch.Tensor: did you intend to use autograd.Variable?
...
def train_(self, train_loader, epoch):
t = time.time()
self.train()
self.define_extras(self.args.batch_size)
for batch_idx, (data, target) in enumerate(train_loader):
correct = 0
if self.args.cuda:
data, target = data.cuda(), target.cuda()
#data = data.view(self.args.batch_size,-1)
target = Variable(target)
target_ = target.view(-1,1)
batch_size = target_.size()[0]
data = data.view(batch_size,-1)
##convert int target to one-hot vector
data = Variable(data)
if not batch_size == self.args.batch_size: #because we have to initialize parameters for batch_size, tensor not matches with batch size cannot be trained
self.define_extras(batch_size)
self.target_onehot.data.zero_()
self.target_onehot.scatter_(1, target_, 1.)
self.optimizer.zero_grad()
loss, output = self.cal_loss(data, self.target_onehot)
loss.backward(retain_variables=True)
self.optimizer.step()
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
accuracy = 100. * correct / len(data)
if batch_idx % self.args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {}/{} ({:.4f}%)'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0],
correct, len(data),
accuracy))
elapsed = time.time() - t
print("elapsed time in this epoch: " + str(elapsed) + " sec")
self.get_node_weights(epoch)
...
Full Code is very similar to this Github link.
I would appreciate any help.
Thanks in advance.