I have been running the following code on a batch size of 32 first then 16 then 8 and it keeps returning the same runtime error. I have a GTX1650 which has 8 GB memory. Is this not enough?
iterations = 30
trainLoss = []
testAcc = []
start = time.time()
for epoch in range(iterations):
epochStart = time.time()
runningLoss = 0
net.train(True) # For training
for data in trainLoader:
inputs,labels = data
# Wrap them in Variable
if use_gpu:
inputs, labels = Variable(inputs.float().cuda()), \
Variable(labels.long().cuda())
else:
inputs, labels = Variable(inputs), Variable(labels.long())
inputs = inputs/1600
# Initialize gradients to zero
optimizer.zero_grad()
# Feed-forward input data through the network
outputs = net(inputs)
# Compute loss/error
loss = criterion(outputs, labels)
# Backpropagate loss and compute gradients
loss.backward()
# Update the network parameters
optimizer.step()
# Accumulate loss per batch
runningLoss += loss.detach()
avgTrainLoss = runningLoss/1200
trainLoss.append(avgTrainLoss)
# Evaluating performance on test set for each epoch
net.train(False) # For testing
inputs = TestImages/1600
if use_gpu:
inputs = Variable(inputs.cuda())
outputs = net(inputs)
_, predicted = torch.max(outputs.data, 1)
predicted = predicted.cpu()
else:
inputs = Variable(inputs)
outputs = net(inputs)
_, predicted = torch.max(outputs.data, 1)
correct = 0
total = 0
total += TestLabels.size(0)
correct += (predicted == TestLabels).sum()
avgTestAcc = correct/400
testAcc.append(avgTestAcc)
# Plotting Loss vs Epochs
fig1 = plt.figure(1)
plt.plot(range(epoch+1),trainLoss,'r--',label='train')
if epoch==0:
plt.legend(loc='upper left')
plt.xlabel('Epochs')
plt.ylabel('Loss')
# Plotting testing accuracy vs Epochs
fig2 = plt.figure(2)
plt.plot(range(epoch+1),testAcc,'g-',label='test')
if epoch==0:
plt.legend(loc='upper left')
plt.xlabel('Epochs')
plt.ylabel('Testing accuracy')
epochEnd = time.time()-epochStart
print('At Iteration: {:.0f} /{:.0f} ; Training Loss: {:.6f} ; Testing Acc: {:.3f} ; Time consumed: {:.0f}m {:.0f}s '\
.format(epoch + 1,iterations,avgTrainLoss,avgTestAcc*100,epochEnd//60,epochEnd%60))
end = time.time()-start
print('Training completed in {:.0f}m {:.0f}s'.format(end//60,end%60))
The error that gets returned is
RuntimeError: CUDA out of memory. Tried to allocate 88.00 MiB (GPU 0; 4.00 GiB total capacity; 2.27 GiB already allocated; 38.45 MiB free; 2.33 GiB reserved in total by PyTorch)
Thanks a lot for helping in advance. I’m new here so please let me know if there is anything I should do to improve the question