I’m having a pretty interesting error around the backwards function on a variable from a very simple network in pytorch.
When I run the following simple program using pytorch, I get some strange behaviour, where the program appears to continue after the Variable.backwards()
call, but the program does not actually close, I must manually close the program myself (in this case by using ctrl+c
to send SIGTERM
). This might be desired behaviour but I’m not sure what I should be doing to prevent it from happening then.
$ cat net_test.py
import torch
import sys
import torch.utils
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(100, 75)
self.fc2 = nn.Linear(75, 25)
self.fc3 = nn.Linear(25, 1)
def forward(self, x):
x = F.elu(self.fc1(x))
x = F.elu(self.fc2(x))
x = self.fc3(x)
return x
if __name__ == '__main__':
net = Net()
inp = Variable(torch.randn(1, 100))
out = net(inp)
net.zero_grad()
out.backward(torch.randn(1, 1))
print('done')
sys.exit()
$ python
Python 3.6.0 |Anaconda custom (64-bit)| (default, Dec 23 2016, 12:22:00)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> torch.version.__version__
'0.1.11+8aa1cef'
>>>
$ time python net_test.py
done
^C
real 0m17,992s
user 0m0,223s
sys 0m0,037s
Strangely, I was able to get similar code to function in an iPython notebook perfectly fine, and I can actually get code to work that trains a network even with this Variable.backwards
call in the code, but the program still shows the same behaviour wherein it does not close on its own.
$ cat net.py
import torch
import numpy as np
import random
import sys
import torch.utils
import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(100, 75)
self.fc2 = nn.Linear(75, 25)
self.fc3 = nn.Linear(25, 1)
def forward(self, x):
x = F.elu(self.fc1(x))
x = F.elu(self.fc2(x))
x = self.fc3(x)
return x
if __name__ == '__main__':
net = Net()
inp = Variable(torch.randn(1, 100))
out = net(inp)
net.zero_grad()
out.backward(torch.randn(1, 1))
alpha = 0.01
optimizer = optim.SGD(net.parameters(), lr=alpha)
optimizer.zero_grad()
good_vecs = [np.random.randn(100).astype('float32') for _ in range(0, 20)]
bad_vecs = [np.random.randn(100).astype('float32') for _ in range(0, 20)]
bad_set = [(vec, [-1.0]) for vec in good_vecs]
good_set = [(vec, [1.0]) for vec in bad_vecs]
shuffled_data = bad_set + good_set
random.shuffle(shuffled_data)
vectors = []
values = []
for vector, value in shuffled_data:
vectors.append(torch.from_numpy(vector))
values.append(torch.Tensor(value))
vectors = torch.stack(vectors)
values = torch.stack(values)
running_loss = 0.0
loss = nn.MSELoss()
shuffled_data = bad_set + good_set
random.shuffle(shuffled_data)
for epoch in range(3):
running_loss = 0.0
for i in range(0, len(shuffled_data), 4):
inp = vectors[i:i+5]
label = values[i:i+5]
inp, label = Variable(inp), Variable(label)
optimizer.zero_grad()
outputs = net(inp)
this_loss = loss(outputs, label)
this_loss.backward()
optimizer.step()
running_loss += this_loss.data
print(running_loss)
print('done')
sys.exit()
$ time python net.py
9.9317
[torch.FloatTensor of size 1]
8.5297
[torch.FloatTensor of size 1]
7.2956
[torch.FloatTensor of size 1]
done
^C
real 19m41,483s
user 0m0,240s
sys 0m0,037s
Sorry if the numpy interchange stuff is odd, it is just analogous to how I’m using pytorch in a project that is using numpy.