I tried the following code on AlexNet. I’m using it to classify it into 16 different categories. Labels are from 1 to 16 (numerical)
iterations = 30
trainLoss = []
testAcc = []
start = time.time()
for epoch in range(iterations):
epochStart = time.time()
runningLoss = 0
net.train(True) # For training
for data in trainLoader:
inputs,labels = data
# Wrap them in Variable
if use_gpu:
inputs, labels = Variable(inputs.float().cuda()), \
Variable(labels.long().cuda())
else:
inputs, labels = Variable(inputs), Variable(labels.long())
inputs = inputs/1600
# Initialize gradients to zero
optimizer.zero_grad()
# Feed-forward input data through the network
outputs = net(inputs)
# Compute loss/error
loss = criterion(outputs, labels)
# Backpropagate loss and compute gradients
loss.backward()
# Update the network parameters
optimizer.step()
# Accumulate loss per batch
runningLoss += loss.detach()
avgTrainLoss = runningLoss/1200
trainLoss.append(avgTrainLoss)
# Evaluating performance on test set for each epoch
net.train(False) # For testing
inputs = TestImages/1600
if use_gpu:
inputs = Variable(inputs.cuda())
outputs = net(inputs)
_, predicted = torch.max(outputs.data, 1)
predicted = predicted.cpu()
else:
inputs = Variable(inputs)
outputs = net(inputs)
_, predicted = torch.max(outputs.data, 1)
correct = 0
total = 0
total += TestLabels.size(0)
correct += (predicted == TestLabels).sum()
avgTestAcc = correct/400
testAcc.append(avgTestAcc)
# Plotting Loss vs Epochs
fig1 = plt.figure(1)
plt.plot(range(epoch+1),trainLoss,'r--',label='train')
if epoch==0:
plt.legend(loc='upper left')
plt.xlabel('Epochs')
plt.ylabel('Loss')
# Plotting testing accuracy vs Epochs
fig2 = plt.figure(2)
plt.plot(range(epoch+1),testAcc,'g-',label='test')
if epoch==0:
plt.legend(loc='upper left')
plt.xlabel('Epochs')
plt.ylabel('Testing accuracy')
epochEnd = time.time()-epochStart
print('At Iteration: {:.0f} /{:.0f} ; Training Loss: {:.6f} ; Testing Acc: {:.3f} ; Time consumed: {:.0f}m {:.0f}s '\
.format(epoch + 1,iterations,avgTrainLoss,avgTestAcc*100,epochEnd//60,epochEnd%60))
end = time.time()-start
print('Training completed in {:.0f}m {:.0f}s'.format(end//60,end%60))
However I receive the following error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-13-bd2bf36f7806> in <module>
24 loss = criterion(outputs, labels)
25 # Backpropagate loss and compute gradients
---> 26 loss.backward()
27 # Update the network parameters
28 optimizer.step()
~\anaconda3\envs\pytorch_env\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph)
219 retain_graph=retain_graph,
220 create_graph=create_graph)
--> 221 torch.autograd.backward(self, gradient, retain_graph, create_graph)
222
223 def register_hook(self, hook):
~\anaconda3\envs\pytorch_env\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
128 retain_graph = create_graph
129
--> 130 Variable._execution_engine.run_backward(
131 tensors, grad_tensors_, retain_graph, create_graph,
132 allow_unreachable=True) # allow_unreachable flag
RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`
Could anyone let me know what is wrong here please?
Thanks in advance