Hi,
I am grateful for the convenience brought by pytorch. But after switching to the nightly version of 1.0, I got to find that the same code runs much slower that the 0.4.1 version.
I actually submitted an issue in the github, but it still may be my abusing the new version, so I feel I could also ask my question here.
The main part of my code is like this:
The model:
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self, in_dim, out_dim, *args, **kwargs):
super(Model, self).__init__(*args, **kwargs)
vgg16 = torchvision.models.vgg16()
layers = []
layers.append(nn.Conv2d(in_dim, 64, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(64, 64, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.MaxPool2d(3, stride = 2, padding = 1))
layers.append(nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.MaxPool2d(3, stride = 2, padding = 1))
layers.append(nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.MaxPool2d(3, stride = 2, padding = 1))
layers.append(nn.Conv2d(256, 512, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(512, 512, kernel_size = 3, stride = 1, padding = 1))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.MaxPool2d(3, stride = 1, padding = 1))
layers.append(nn.Conv2d(512,
512,
kernel_size = 3,
stride = 1,
padding = 2,
dilation = 2))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(512,
512,
kernel_size = 3,
stride = 1,
padding = 2,
dilation = 2))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(512,
512,
kernel_size = 3,
stride = 1,
padding = 2,
dilation = 2))
layers.append(nn.ReLU(inplace = True))
layers.append(nn.MaxPool2d(3, stride = 1, padding = 1))
self.features = nn.Sequential(*layers)
classifier = []
classifier.append(nn.AvgPool2d(3, stride = 1, padding = 1))
classifier.append(nn.Conv2d(512,
1024,
kernel_size = 3,
stride = 1,
padding = 12,
dilation = 12))
classifier.append(nn.ReLU(inplace = True))
classifier.append(nn.Conv2d(1024, 1024, kernel_size = 1, stride = 1, padding = 0))
classifier.append(nn.ReLU(inplace = True))
classifier.append(nn.Dropout(p = 0.5))
classifier.append(nn.Conv2d(1024, out_dim, kernel_size = 1))
self.classifier = nn.Sequential(*classifier)
self.init_weights()
def forward(self, x):
im = x
x = self.features(x)
x = self.classifier(x)
return x
def init_weights(self):
vgg = torchvision.models.vgg16(pretrained = True)
state_vgg = vgg.features.state_dict()
self.features.load_state_dict(state_vgg)
for ly in self.classifier.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
nn.init.constant_(ly.bias, 0)
The running process:
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
from model import Model
if __name__ == "__main__":
net = Model(3, 21)
net.train()
net.cuda()
net = nn.DataParallel(net)
Loss = nn.CrossEntropyLoss(ignore_index = 255)
Loss.cuda()
optim = torch.optim.SGD(net.parameters(), lr = 1e-3, momentum = 0.9, weight_decay = 5e-4)
st = time.time()
scale = [0.5, 0.75, 1]
loss_avg = []
for i in range(10000):
in_ten = torch.randn(70, 3, 224, 224)
label = torch.randint(0, 21, [70, 1, 224, 224])
in_ten = in_ten.cuda()
label = label.cuda()
label = torch.tensor(label).long().cuda()
optim.zero_grad()
H, W = in_ten.size()[2:]
for sub_i, s in enumerate(scale):
print(time.time() - st)
h, w = int(H * s), int(W * s)
in_ten_s = F.interpolate(in_ten, (h, w), mode = 'bilinear')
out = net(in_ten_s)
out = F.interpolate(out, [H, W], mode = 'bilinear')
label = torch.squeeze(label)
loss = Loss(out, label)
loss.backward()
loss = loss.detach().cpu().numpy()
loss_avg.append(loss)
optim.step()
if i % 20 == 0 and not i == 0:
ed = time.time()
interval = ed - st
st = ed
loss_avg = sum(loss_avg) / len(loss_avg)
print('iter: {}, time: {}, loss: {}'.format(i, interval, loss_avg))
loss_avg = []
I got to find that this same code runs much slower with pytorch 1.0 than that of 0.4.1.
My environment is python3.5 and ubuntu16.04 with two 1080ti gpus. I installed my pytorch 1.0 with pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cu90/torch_nightly.html
.
Do I have anyway to avoid this ?