Hello all
I tried to run a convolutional neural network on GPU. for the first time it run and I got the results but for the second time I got the below error while I didn’t change my code at all and it is weird.
$Torch: not enough memory: you tried to allocate 0GB. Buy new RAM!
I couldn’t figure out why this might be happening.
how can I solve this problem?
Any help will be appreciated
here is my code:
-- coding: utf-8 --
“”"
Created on Thu Nov 7 13:25:00 2019
@author: Vahid
“”"
from torch.autograd import Variable
import torch
import torch.cuda as cuda
import torchvision
from torchvision import transforms
import torch.nn as nn
import matplotlib.pyplot as plt
Parameters
batch_size =64
n_class =4
lr = 0.001
num_epochs =25
transform=transforms.Compose([transforms.Resize((128,128)),
transforms.ToTensor(),
transforms.Normalize((.5,.5,.5),(.5,.5,.5))])
Load Custom Dataset
train_dataset = torchvision.datasets.ImageFolder(‘H:\vahid-spectrogram-class1\train’,
transform=transform)
valid_dataset = torchvision.datasets.ImageFolder(‘H:\vahid-spectrogram-class1\validation’,
transform=transform)
test_dataset = torchvision.datasets.ImageFolder(‘H:\vahid-spectrogram-class1\test’,
transform=transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=True)
Convolutional neural network
class convnet(nn.Module):
def init(self):
super(convnet,self).init()
self.layer1 = nn.Sequential(nn.Conv2d(3,16, 5,1,2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2))
self.layer2 = nn.Sequential(nn.Conv2d(16, 8, 5,1,2),
nn.BatchNorm2d(8),
nn.ReLU(),
nn.MaxPool2d(2, 2))
self.layer3 = nn.Sequential(nn.Conv2d(8,4, 5,1,2),
nn.BatchNorm2d(4),
nn.ReLU(),
nn.MaxPool2d(2, 2))
self.fc1 = nn.Linear(16164, 1000)
self.drop_out = nn.Dropout(p=.75)
self.fc2 = nn.Linear(1000,n_class)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.reshape(out.size(0), -1)
out = self.fc1(out)
out = self.drop_out(out)
out = self.fc2(out)
return out
Model CNN
convmodel = convnet()
if cuda.is_available():
convmodel=convmodel.cuda()
loss
loss_fn = nn.CrossEntropyLoss()
Optimizer
optimizer= torch.optim.Adam(convmodel.parameters(), lr=lr)
LR
lr_sch = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.5)
num_steps = len(train_loader)
valid_num_steps = len(valid_loader)
train_loss = []
valid_loss = []
train_accuracy = []
valid_accuracy = []
for epoch in range(num_epochs):
iter_loss = 0.0
correct = 0
iterations = 0
convmodel.train()
#print(lr_sch.get_lr())
for j, (imgs, lbls) in enumerate(train_loader):
# Convert torch tensor to Variable
imgs= Variable(imgs)
lbls= Variable(lbls)
# If we have GPU, shift the data to GPU
if cuda.is_available():
imgs= imgs.cuda()
lbls =lbls.cuda()
optimizer.zero_grad()
out = convmodel(imgs)
loss_tr = loss_fn(out, lbls)
iter_loss += loss_tr.item()
loss_tr.backward()
optimizer.step()
_, predicted = torch.max(out.data, 1)
correct += (predicted == lbls.data).sum()
#if (j+1) % 1 == 0:
# print('Train, Epoch [{}/{}] Step [{}/{}] Loss_Train: {:.2f} Accuracy_Train:{:.2f}'.format(epoch+1, num_epochs, j+1, num_steps, loss_tr.item(),100*(correct.item()/(batch_size*(j+1)))))
iterations += 1
# Record the training loss
train_loss.append(iter_loss/iterations)
# Record the training accuracy
train_accuracy.append(100*correct/39888)
loss= 0.0
correct = 0
iterations = 0
convmodel.eval()
for j, (items, classes) in enumerate(valid_loader):
items=Variable(items)
classes=Variable(classes)
# If we have GPU, shift the data to GPU
if cuda.is_available():
items = items.cuda()
classes = classes.cuda()
out= convmodel(items) # Do the forward pass
loss += loss_fn(out,classes.data)# Calculate the loss
_, predicted = torch.max(out.data, 1)
correct += (predicted == classes.data).sum()
#if (j+1) % 1 == 0:
#print('validation, Epoch [{}/{}] Step [{}/{}] Loss_Validation: {:.2f} Accuracy_Validation:{:.2f} '.format(epoch+1, num_epochs, j+1, valid_num_steps, loss_va.item(),100*(correct.item()/(batch_size*(j+1)))))
iterations += 1
# Record the validation loss
valid_loss.append(loss/iterations)
# Record the validation accuracy
valid_accuracy.append(correct/8568)
print ('Epoch %d/%d, Tr Loss: %.4f, Tr Acc: %.4f, Val Loss: %.4f, Val Acc: %.4f'
%(epoch+1, num_epochs, train_loss[-1], train_accuracy[-1],
valid_loss[-1], valid_accuracy[-1]))
convmodel.eval()
corrects = 0
num_steps = len(test_loader)
for j, (imgs, lbls) in enumerate(test_loader):
out = convmodel(imgs)
predicted = torch.argmax(out, 1)
corrects += torch.sum(predicted == lbls)
print('Step [{}/{}] Acc {:.4f}: '.format(j+1, num_steps, 100.*corrects/((j+1)*batch_size)))
f = plt.figure(figsize=(10, 8))
plt.plot(train_loss, label=‘training loss’)
plt.plot(valid_loss, label=‘validation loss’)
plt.legend()
plt.show()
In[13]:
Accuracy
f = plt.figure(figsize=(10, 8))
plt.plot(train_accuracy, label=‘training accuracy’)
plt.plot(valid_accuracy, label=‘validation accuracy’)
plt.legend()
plt.show()