I’m using a modified version of the CIFAR10 tutorial to classify some of my own images. I added more convolutional layers because I’m trying to classify higher resolution images (512x384 pixels) and I’m trying to use my GPU to accelerate the training, but it doesn’t seem to be using it. I run nvidia-smi while the network is training and it shows the GPU barely being used at all. The results seem fine, but it just seems to be using just the CPU. I think I’m calling the .cuda() method on all the right objects and I think there should be plenty of work to keep the GPU busy. Here are the relevant portions of the code. Any help would be much appreciated. If it matters, I’m using Ubuntu 16.04 with a GTX 1070 and NVIDIA driver 375.39. Thanks!
#Define a Convolution Neural Network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.verbose = False
self.pool = nn.MaxPool2d(2, 2)
self.conv1 = nn.Conv2d(3, 6, 5)
self.conv2 = nn.Conv2d(6, 10, 5)
self.conv3 = nn.Conv2d(10, 16, 5)
self.conv4 = nn.Conv2d(16, 16, 5)
self.conv5 = nn.Conv2d(16, 18, 5)
self.conv6 = nn.Conv2d(18, 21, 5)
self.fc1 = nn.Linear(168, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 7)
self.cuda() #Convert this module to CUDA
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
x = self.pool(F.relu(self.conv4(x)))
x = self.pool(F.relu(self.conv5(x)))
x = self.pool(F.relu(self.conv6(x)))
x = x.view(-1, 168)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
…
#Load data
trainset = ImageDataset.ImageDataset()
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchSize, shuffle=True, num_workers=2)
testset = ImageDataset.ImageDataset(train=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=3, shuffle=True, num_workers=2)
########################################################################
# 3. Define a Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
########################################################################
# 4. Train the network
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# wrap them in Variable
inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()