AssertError in LSTM layer on GPU

I have the following network that includes an LSTM layer:

class TopicEmbedding(nn.Module):
    
    def __init__(self, input_shape = (sequence_length,), d = 200):
        super(TopicEmbedding, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings = sequence_dictionary_size,
                                            embedding_dim = embedding_dim_encode)
        self.embedding_layer.weight = nn.Parameter(torch.from_numpy(embedding_matrix_sequence).type(torch.FloatTensor).cuda())
        self.embedding_layer.weight.requires_grad = False
        self.conv_layer = nn.Sequential(
            nn.Conv2d(in_channels = 128, out_channels = 32,
                      kernel_size = (1, 3), stride = 1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size = (1, 2)),
            nn.Conv2d(in_channels = 32, out_channels = 16,
                      kernel_size = (1, 3), stride = 1),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size = (1, 2)))
        
        print self.embedding_layer.weight.data.type
        self.n_lstm_layers = 1
        self.lstm_layer = nn.LSTM(embedding_dim_encode, 128, 1)
        
        convolution_output_size = self._get_conv_output(input_shape)
        
        self.fc_layer = nn.Sequential(nn.Linear(convolution_output_size, 32), nn.Tanh(), nn.Linear(32, d),nn.Tanh())

    def _get_conv_output(self, shape):
        bs = 1
        input = Variable(torch.zeros(bs, *shape).type(torch.LongTensor).cuda())
        convoluted_features = self._forward_features(input)
        n_size = convoluted_features.data.view(bs, -1).size(1)
        return n_size

    def _forward_features(self, x):
        x = self.embedding_layer(x)
        x = x.permute(1, 0, 2) # sequence length x batch x embedding dim
        hidden = self.init_hidden(x.size(1)) # 1 x batch x hidden dim
        x, _ = self.lstm_layer(x, hidden) # sequence length x batch_size x hidden_dimension
        x = torch.unsqueeze(x, 0)
        x = x.permute(2, 3, 0, 1) # order the dimension according to n_input, n_channels (embedding_dimension), height(1), width(sequence length)
        x = self.conv_layer(x)
        return x
    
    def init_hidden(self, batch_size):
        return (Variable(torch.zeros(self.n_lstm_layers, batch_size, 128).type(torch.FloatTensor).cuda()),
                Variable(torch.zeros(self.n_lstm_layers, batch_size, 128).type(torch.FloatTensor).cuda()))

    def forward(self, x):
        out = self._forward_features(x)
        out = out.view(out.size(0), -1)
        out = self.fc_layer(out)
        out_norm = torch.norm(out, p=2, dim=1).detach()
        out = torch.div(out,out_norm.expand_as(out))
        return out

And the network is fitted using the following code:

batch_size = 50
d = 50
num_epoch = 100

topic_embedding = TopicEmbedding(d = d)
topic_embedding.cuda()

P = np.eye(num_sample)
C = generate_target(num_sample, d)

optimizer = torch.optim.Adam(params = ifilter(lambda p: p.requires_grad, topic_embedding.parameters()), lr=0.001)

for i in range(num_epoch):

    idx = np.random.permutation(num_sample)
    j = 0
    
    while j <= num_sample - batch_size:
        batch_idx = idx[j:(j+batch_size)]
            
        input_batch = Variable(torch.from_numpy(dat_seq_train[batch_idx]).type(torch.LongTensor).cuda())
        topic_embedding.zero_grad()
        output_batch = topic_embedding(input_batch)

        if i % 3 == 0:            
            idx_in, idx_out = compute_assignment(output_batch.cpu().data.numpy(), C[batch_idx] )
            
            P[batch_idx[idx_in]] = P[batch_idx[idx_out]]
            C = P.dot(C)
            
        target_batch = Variable(torch.from_numpy(C[batch_idx]).type(torch.FloatTensor))
            
        loss = - torch.trace((torch.mm(target_batch, output_batch.transpose(1,0))))
        loss.backward()
        optimizer.step()
        
        j += batch_size
        
    if i % 10 == 0:
        print i, loss

Notice that the input to the LSTM layer is the output in the embedding layer, which I have verified to be type torch.cuda.FloatTensor (by inserting checkpoints). This is suggested by Error on forward pass in LSTM module . However, I still encounter the AssertError:

AssertionError                        Traceback (most recent call last)
<ipython-input-32-9e5402cf6c12> in <module>()
      3 num_epoch = 100
      4 
----> 5 topic_embedding = TopicEmbedding(d = d)
      6 topic_embedding.cuda()
      7 

<ipython-input-31-4940794a75e2> in __init__(self, input_shape, d)
     33         self.lstm_layer = nn.LSTM(embedding_dim_encode, 128, 1)
     34 
---> 35         convolution_output_size = self._get_conv_output(input_shape)
     36 
     37         self.fc_layer = nn.Sequential(nn.Linear(convolution_output_size, 32), nn.Tanh(), nn.Linear(32, d),nn.Tanh())

<ipython-input-31-4940794a75e2> in _get_conv_output(self, shape)
     40         bs = 1
     41         input = Variable(torch.zeros(bs, *shape).type(torch.LongTensor).cuda())
---> 42         convoluted_features = self._forward_features(input)
     43         n_size = convoluted_features.data.view(bs, -1).size(1)
     44         return n_size

<ipython-input-31-4940794a75e2> in _forward_features(self, x)
     48         x = x.permute(1, 0, 2) # sequence length x batch x embedding dim
     49         hidden = self.init_hidden(x.size(1)) # 1 x batch x hidden dim
---> 50         x, _ = self.lstm_layer(x, hidden) # sequence length x batch_size x hidden_dimension
     51         x = torch.unsqueeze(x, 0)
     52         x = x.permute(2, 3, 0, 1) # order the dimension according to n_input, n_channels (embedding_dimension), height(1), width(sequence length)

/usr/local/lib64/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    222         for hook in self._forward_pre_hooks.values():
    223             hook(self, input)
--> 224         result = self.forward(*input, **kwargs)
    225         for hook in self._forward_hooks.values():
    226             hook_result = hook(self, input, result)

/usr/local/lib64/python2.7/site-packages/torch/nn/modules/rnn.pyc in forward(self, input, hx)
    160             flat_weight=flat_weight
    161         )
--> 162         output, hidden = func(input, self.all_weights, hx)
    163         if is_packed:
    164             output = PackedSequence(output, batch_sizes)

/usr/local/lib64/python2.7/site-packages/torch/nn/_functions/rnn.pyc in forward(input, *fargs, **fkwargs)
    349         else:
    350             func = AutogradRNN(*args, **kwargs)
--> 351         return func(input, *fargs, **fkwargs)
    352 
    353     return forward

/usr/local/lib64/python2.7/site-packages/torch/autograd/function.pyc in _do_forward(self, *input)
    282         self._nested_input = input
    283         flat_input = tuple(_iter_variables(input))
--> 284         flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
    285         nested_output = self._nested_output
    286         nested_variables = _unflatten(flat_output, self._nested_output)

/usr/local/lib64/python2.7/site-packages/torch/autograd/function.pyc in forward(self, *args)
    304     def forward(self, *args):
    305         nested_tensors = _map_variable_tensor(self._nested_input)
--> 306         result = self.forward_extended(*nested_tensors)
    307         del self._nested_input
    308         self._nested_output = result

/usr/local/lib64/python2.7/site-packages/torch/nn/_functions/rnn.pyc in forward_extended(self, input, weight, hx)
    291             hy = tuple(h.new() for h in hx)
    292 
--> 293         cudnn.rnn.forward(self, input, hx, weight, output, hy)
    294 
    295         self.save_for_backward(input, hx, weight, output)

/usr/local/lib64/python2.7/site-packages/torch/backends/cudnn/rnn.pyc in forward(fn, input, hx, weight, output, hy)
    257             w.zero_()
    258             params = get_parameters(fn, handle, w)
--> 259             _copyParams(weight, params)
    260         else:
    261             fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)

/usr/local/lib64/python2.7/site-packages/torch/backends/cudnn/rnn.pyc in _copyParams(params_from, params_to)
    184         # use biases, zip will terminate once layer_params_from ends and ignore them.
    185         for param_from, param_to in zip(layer_params_from, layer_params_to):
--> 186             assert param_from.type() == param_to.type()
    187             param_to.copy_(param_from, broadcast=False)
    188 

AssertionError: 

If I remove all “.cuda()” from the above codes, the codes can run on CPU with no error. I wonder if some one can give me suggestions on this issue.

OK, I fixed the error myself. The problem is that,

topic_embedding.cuda()

is executed after the network is initialized. As a result, during network initialization the output of the LSTM layer is of type “torch.XXX”, while the input of this layer is hard coded as “torch.cuda.XXXX”.

To fix this, I need remove “.cuda()” from all variables within the network definition block. In addition, the “init_hidden” function also need be modified so that it is adaptive to both CPU and GPU:

def init_hidden(self, x):
    return (Variable(x.data.new(self.n_lstm_layers, x.size(1), 128).zero_()),
            Variable(x.data.new(self.n_lstm_layers, x.size(1), 128).zero_()))
3 Likes