I am a newbie to pytorch and I am facing performance issue
Below is my code:
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
import torch.nn.init as init
from torch.autograd import Variable
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("PyTorch: ", torch.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())
#I'm running in google colab
#OS: linux
#Python: 3.6.7 (default, Oct 22 2018, 11:32:17)
#[GCC 8.2.0]
#PyTorch: 1.1.0
#Numpy: 1.16.3
#GPU: ['Tesla T4']
#CUDA Version 10.0.130
#CuDNN Version 7.5.1
#getting the data
x_train, x_test, y_train, y_test = cifar_for_library(channel_first = True)
#cifar_for_library is from common
class create_pytorch_model(nn.Module):
def __init__(self, n_classes=N_CLASSES):
super(create_pytorch_model, self).__init__()
self.conv1 = nn.Conv2d(3, 50, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(50, 50, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(50, 100, kernel_size=3, padding=1)
self.conv4 = nn.Conv2d(100, 100, kernel_size=3, padding=1)
# feature map size is 8*8 by pooling
self.fc1 = nn.Linear(100*8*8, 512)
self.fc2 = nn.Linear(512, n_classes)
def forward(self, x):
# PyTorch requires a flag for training in dropout
x = self.conv2(F.relu(self.conv1(x)))
x = F.relu(F.max_pool2d(x, kernel_size=2, stride=2))
x = F.dropout(x, 0.25, training=self.training)
x = self.conv4(F.relu(self.conv3(x)))
x = F.relu(F.max_pool2d(x, kernel_size=2, stride=2))
x = F.dropout(x, 0.25, training=self.training)
x = x.view(-1, 100*8*8) # reshape Variable
x = F.dropout(F.relu(self.fc1(x)), 0.5, training=self.training)
return self.fc2(x)
pytorch_model = create_pytorch_model().cuda()
optimizer = optim.SGD(pytorch_model.parameters(), lr, momentum)
criterion = nn.CrossEntropyLoss()
pytorch_model.train()
for j in range(10):
for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
data = Variable(torch.FloatTensor(data).cuda())
target = Variable(torch.LongTensor(target).cuda())
optimizer.zero_grad()
output = sym(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
print('Epoch %d' % (j))
Link to my full code(jupyter notebook)
Link to my full code(.py file)
It took 2 mins 15 secs to train with cuda.
While mxnet takes only 40 secs to train with using same data and same data loader.
This makes pytorch 3x slower than mxnet!!!
What am I doing wrong?
Or is this normal?
Any suggestion would be very appreciated.