I am trying to switch my code to run on CUDA-enabled machine. I received a warning and AssertionError below. The code works fine if setting cuda_on = False. Since the error message is very brief, I didn’t know where the problem is. Any suggestion how to solve it? Thanks!
Error message:
char_rnn_shakespeare.py:33: UserWarning: RNN module weights are not part of
single contiguous chunk of memory. This means they need to be compacted at
every call, possibly greately increasing memory usage. To compact weights again
call flatten_parameters().
output, self.hidden = self.lstm(input, self.hidden)
Traceback (most recent call last):
File "char_rnn_shakespeare.py", line 213, in <module>
all_losses = start_training()
File "char_rnn_shakespeare.py", line 193, in start_training
output, loss = train(input, target)
File "char_rnn_shakespeare.py", line 157, in train
output = rnn.forward(input)
File "char_rnn_shakespeare.py", line 33, in forward
output, self.hidden = self.lstm(input, self.hidden)
File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 224, in __call__
result = self.forward(*input, **kwargs)
File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 162, in forward
output, hidden = func(input, self.all_weights, hx)
File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/nn/_functions/rnn.py", line 351, in forward
return func(input, *fargs, **fkwargs)
File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py", line 284, in _do_forward
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py", line 306, in forward
result = self.forward_extended(*nested_tensors)
File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/nn/_functions/rnn.py", line 293, in forward_extended
cudnn.rnn.forward(self, input, hx, weight, output, hy)
File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py", line 259, in forward
_copyParams(weight, params)
File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py", line 186, in _copyParams
assert param_from.type() == param_to.type()
AssertionError
The code:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import unicodedata
import string
import torch
import torch.nn as nn
from torch.autograd import Variable
import random
import time
import math
import torch.optim as optim
all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1 # Plus EOS marker
batch_size = 5
input_length = 10
cuda_on = True
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.output_size = output_size
self.o2o = nn.Linear(hidden_size, output_size)
self.lstm = nn.LSTM(input_size, hidden_size, dropout=0.1, num_layers=1)
self.softmax = nn.LogSoftmax()
self.hidden = self.initHidden()
def forward(self, input):
output, self.hidden = self.lstm(input, self.hidden)
output = self.o2o(output)
for v in self.hidden:
v.detach_()
soutput = self.softmax(output[0])
self.lstm.flatten_parameters()
# print(soutput)
return soutput
def initHidden(self):
h0 = Variable(torch.zeros(2, batch_size, self.hidden_size), requires_grad=True)
c0 = Variable(torch.zeros(2, batch_size, self.hidden_size), requires_grad=True)
if torch.cuda.is_available() and cuda_on:
h0 = h0.cuda()
c0 = c0.cuda()
return h0, c0
def findFiles(path): return glob.glob(path)
# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
and c in all_letters
)
# Read a file and split into lines
def readLines(filename):
lines = open(filename, encoding='utf-8').read().strip().split('\n')
return [unicodeToAscii(line) for line in lines]
# Build a list of names
filename = 'data/shakespeare.txt'
lines = readLines(filename)
# Random item from a list
def randomChoice(l):
return l[random.randint(0, len(l) - 1)]
# Get a random category and random line from that category
def randomTraining():
line = randomChoice(lines)
return line
def inputTensor(lines):
tensors = []
for index, line in enumerate(lines):
tensor = torch.zeros(1, batch_size, n_letters)
for i in range(input_length):
if i < len(line):
tensor[0][index][all_letters.find(line[i])] = 1
else:
tensor[0][index][n_letters - 1] = 1
tensors.append(tensor)
return tensors
def targetTensor(lines):
targets = []
for i in range(1, input_length + 1):
target = []
for idx, line in enumerate(lines):
if i < len(line):
target.append(all_letters.find(line[i]))
else:
target.append(n_letters - 1)
targets.append(target)
return torch.LongTensor(targets)
def randomTrainingExample():
# create input of 5 lines
lines = []
while len(lines) != batch_size:
line = randomTraining()
# skip blank line
while line == "":
line = randomTraining()
lines.append(line)
input_tensors = inputTensor(lines)
target_tensor = targetTensor(lines)
return input_tensors, target_tensor
criterion = nn.NLLLoss()
learning_rate = 0.001
hidden_size = 30
rnn = LSTM(n_letters, hidden_size, n_letters)
optimizer = optim.SGD(rnn.parameters(), lr=learning_rate)
def train(input_line_tensor, target_line_tensor):
rnn.zero_grad()
# rnn.initHidden()
loss = 0
for i in range(len(input_line_tensor)):
input = Variable(input_line_tensor[i])
target = Variable(target_line_tensor[i])
if torch.cuda.is_available() and cuda_on:
input = input.cuda()
target = target.cuda()
output = rnn.forward(input)
loss += criterion(output, target)
loss.backward()
optimizer.step()
return output, loss.data[0] / len(input_line_tensor)
def timeSince(since):
now = time.time()
s = now - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
n_iters = 1000000
print_every = 50000
plot_every = 500
all_losses = []
def start_training():
total_loss = 0
start = time.time()
# TRAINING
for iter in range(1, n_iters + 1):
input, target = randomTrainingExample()
output, loss = train(input, target)
total_loss += loss
if iter % print_every == 0:
print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))
if iter % plot_every == 0:
all_losses.append(total_loss / plot_every)
total_loss = 0
torch.save(rnn, "model.data")
return all_losses
all_losses = start_training()