AssertionError after switching to CUDA

cragkhit · September 17, 2017, 11:05pm

I am trying to switch my code to run on CUDA-enabled machine. I received a warning and AssertionError below. The code works fine if setting cuda_on = False. Since the error message is very brief, I didn’t know where the problem is. Any suggestion how to solve it? Thanks!

Error message:

char_rnn_shakespeare.py:33: UserWarning: RNN module weights are not part of 
single contiguous chunk of memory. This means they need to be compacted at 
every call, possibly greately increasing memory usage. To compact weights again 
call flatten_parameters().
  output, self.hidden = self.lstm(input, self.hidden)
Traceback (most recent call last):
  File "char_rnn_shakespeare.py", line 213, in <module>
    all_losses = start_training()
  File "char_rnn_shakespeare.py", line 193, in start_training
    output, loss = train(input, target)
  File "char_rnn_shakespeare.py", line 157, in train
    output = rnn.forward(input)
  File "char_rnn_shakespeare.py", line 33, in forward
    output, self.hidden = self.lstm(input, self.hidden)
  File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 224, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 162, in forward
    output, hidden = func(input, self.all_weights, hx)
  File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/nn/_functions/rnn.py", line 351, in forward
    return func(input, *fargs, **fkwargs)
  File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py", line 284, in _do_forward
    flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
  File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py", line 306, in forward
    result = self.forward_extended(*nested_tensors)
  File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/nn/_functions/rnn.py", line 293, in forward_extended
    cudnn.rnn.forward(self, input, hx, weight, output, hy)
  File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py", line 259, in forward
    _copyParams(weight, params)
  File "/home/chaiyong/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py", line 186, in _copyParams
    assert param_from.type() == param_to.type()
AssertionError

The code:

from __future__ import unicode_literals, print_function, division
from io import open
import glob
import unicodedata
import string
import torch
import torch.nn as nn
from torch.autograd import Variable
import random
import time
import math
import torch.optim as optim

all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1 # Plus EOS marker
batch_size = 5
input_length = 10
cuda_on = True


class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.o2o = nn.Linear(hidden_size, output_size)

        self.lstm = nn.LSTM(input_size, hidden_size, dropout=0.1, num_layers=1)
        self.softmax = nn.LogSoftmax()
        self.hidden = self.initHidden()

    def forward(self, input):
        output, self.hidden = self.lstm(input, self.hidden)
        output = self.o2o(output)
        for v in self.hidden:
            v.detach_()
        soutput = self.softmax(output[0])
        self.lstm.flatten_parameters()
        # print(soutput)
        return soutput

    def initHidden(self):
        h0 = Variable(torch.zeros(2, batch_size, self.hidden_size), requires_grad=True)
        c0 = Variable(torch.zeros(2, batch_size, self.hidden_size), requires_grad=True)
        if torch.cuda.is_available() and cuda_on:
            h0 = h0.cuda()
            c0 = c0.cuda()
        return h0, c0

def findFiles(path): return glob.glob(path)


# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )


# Read a file and split into lines
def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]


# Build a list of names
filename = 'data/shakespeare.txt'
lines = readLines(filename)


# Random item from a list
def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]


# Get a random category and random line from that category
def randomTraining():
    line = randomChoice(lines)
    return line


def inputTensor(lines):
    tensors = []
    for index, line in enumerate(lines):
        tensor = torch.zeros(1, batch_size, n_letters)
        for i in range(input_length):
            if i < len(line):
                tensor[0][index][all_letters.find(line[i])] = 1
            else:
                tensor[0][index][n_letters - 1] = 1
        tensors.append(tensor)
    return tensors


def targetTensor(lines):
    targets = []
    for i in range(1, input_length + 1):
        target = []
        for idx, line in enumerate(lines):
            if i < len(line):
                target.append(all_letters.find(line[i]))
            else:
                target.append(n_letters - 1)

        targets.append(target)

    return torch.LongTensor(targets)


def randomTrainingExample():
    # create input of 5 lines
    lines = []
    while len(lines) != batch_size:
        line = randomTraining()
        # skip blank line
        while line == "":
            line = randomTraining()
        lines.append(line)

    input_tensors = inputTensor(lines)
    target_tensor = targetTensor(lines)

    return input_tensors, target_tensor


criterion = nn.NLLLoss()
learning_rate = 0.001
hidden_size = 30
rnn = LSTM(n_letters, hidden_size, n_letters)
optimizer = optim.SGD(rnn.parameters(), lr=learning_rate)


def train(input_line_tensor, target_line_tensor):
    rnn.zero_grad()
    # rnn.initHidden()
    loss = 0
    for i in range(len(input_line_tensor)):
        input = Variable(input_line_tensor[i])
        target = Variable(target_line_tensor[i])
        if torch.cuda.is_available() and cuda_on:
            input = input.cuda()
            target = target.cuda()
        output = rnn.forward(input)
        loss += criterion(output, target)

    loss.backward()
    optimizer.step()

    return output, loss.data[0] / len(input_line_tensor)


def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


n_iters = 1000000
print_every = 50000
plot_every = 500
all_losses = []


def start_training():
    total_loss = 0

    start = time.time()

    # TRAINING
    for iter in range(1, n_iters + 1):
        input, target = randomTrainingExample()
        output, loss = train(input, target)

        total_loss += loss

        if iter % print_every == 0:
            print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))

        if iter % plot_every == 0:
            all_losses.append(total_loss / plot_every)
            total_loss = 0

    torch.save(rnn, "model.data")

    return all_losses


all_losses = start_training()

albanD · September 18, 2017, 9:18am

Hi,

You forgot to send your model on the gpu as well with:

rnn.cuda()
criterion.cuda()

cragkhit · September 18, 2017, 6:45pm

Hi @albanD. Thanks a lot! It works now