I have the following network that includes an LSTM layer:
class TopicEmbedding(nn.Module):
def __init__(self, input_shape = (sequence_length,), d = 200):
super(TopicEmbedding, self).__init__()
self.embedding_layer = nn.Embedding(num_embeddings = sequence_dictionary_size,
embedding_dim = embedding_dim_encode)
self.embedding_layer.weight = nn.Parameter(torch.from_numpy(embedding_matrix_sequence).type(torch.FloatTensor).cuda())
self.embedding_layer.weight.requires_grad = False
self.conv_layer = nn.Sequential(
nn.Conv2d(in_channels = 128, out_channels = 32,
kernel_size = (1, 3), stride = 1),
nn.ReLU(),
nn.BatchNorm2d(32),
nn.MaxPool2d(kernel_size = (1, 2)),
nn.Conv2d(in_channels = 32, out_channels = 16,
kernel_size = (1, 3), stride = 1),
nn.ReLU(),
nn.BatchNorm2d(16),
nn.MaxPool2d(kernel_size = (1, 2)))
print self.embedding_layer.weight.data.type
self.n_lstm_layers = 1
self.lstm_layer = nn.LSTM(embedding_dim_encode, 128, 1)
convolution_output_size = self._get_conv_output(input_shape)
self.fc_layer = nn.Sequential(nn.Linear(convolution_output_size, 32), nn.Tanh(), nn.Linear(32, d),nn.Tanh())
def _get_conv_output(self, shape):
bs = 1
input = Variable(torch.zeros(bs, *shape).type(torch.LongTensor).cuda())
convoluted_features = self._forward_features(input)
n_size = convoluted_features.data.view(bs, -1).size(1)
return n_size
def _forward_features(self, x):
x = self.embedding_layer(x)
x = x.permute(1, 0, 2) # sequence length x batch x embedding dim
hidden = self.init_hidden(x.size(1)) # 1 x batch x hidden dim
x, _ = self.lstm_layer(x, hidden) # sequence length x batch_size x hidden_dimension
x = torch.unsqueeze(x, 0)
x = x.permute(2, 3, 0, 1) # order the dimension according to n_input, n_channels (embedding_dimension), height(1), width(sequence length)
x = self.conv_layer(x)
return x
def init_hidden(self, batch_size):
return (Variable(torch.zeros(self.n_lstm_layers, batch_size, 128).type(torch.FloatTensor).cuda()),
Variable(torch.zeros(self.n_lstm_layers, batch_size, 128).type(torch.FloatTensor).cuda()))
def forward(self, x):
out = self._forward_features(x)
out = out.view(out.size(0), -1)
out = self.fc_layer(out)
out_norm = torch.norm(out, p=2, dim=1).detach()
out = torch.div(out,out_norm.expand_as(out))
return out
And the network is fitted using the following code:
batch_size = 50
d = 50
num_epoch = 100
topic_embedding = TopicEmbedding(d = d)
topic_embedding.cuda()
P = np.eye(num_sample)
C = generate_target(num_sample, d)
optimizer = torch.optim.Adam(params = ifilter(lambda p: p.requires_grad, topic_embedding.parameters()), lr=0.001)
for i in range(num_epoch):
idx = np.random.permutation(num_sample)
j = 0
while j <= num_sample - batch_size:
batch_idx = idx[j:(j+batch_size)]
input_batch = Variable(torch.from_numpy(dat_seq_train[batch_idx]).type(torch.LongTensor).cuda())
topic_embedding.zero_grad()
output_batch = topic_embedding(input_batch)
if i % 3 == 0:
idx_in, idx_out = compute_assignment(output_batch.cpu().data.numpy(), C[batch_idx] )
P[batch_idx[idx_in]] = P[batch_idx[idx_out]]
C = P.dot(C)
target_batch = Variable(torch.from_numpy(C[batch_idx]).type(torch.FloatTensor))
loss = - torch.trace((torch.mm(target_batch, output_batch.transpose(1,0))))
loss.backward()
optimizer.step()
j += batch_size
if i % 10 == 0:
print i, loss
Notice that the input to the LSTM layer is the output in the embedding layer, which I have verified to be type torch.cuda.FloatTensor (by inserting checkpoints). This is suggested by Error on forward pass in LSTM module . However, I still encounter the AssertError:
AssertionError Traceback (most recent call last)
<ipython-input-32-9e5402cf6c12> in <module>()
3 num_epoch = 100
4
----> 5 topic_embedding = TopicEmbedding(d = d)
6 topic_embedding.cuda()
7
<ipython-input-31-4940794a75e2> in __init__(self, input_shape, d)
33 self.lstm_layer = nn.LSTM(embedding_dim_encode, 128, 1)
34
---> 35 convolution_output_size = self._get_conv_output(input_shape)
36
37 self.fc_layer = nn.Sequential(nn.Linear(convolution_output_size, 32), nn.Tanh(), nn.Linear(32, d),nn.Tanh())
<ipython-input-31-4940794a75e2> in _get_conv_output(self, shape)
40 bs = 1
41 input = Variable(torch.zeros(bs, *shape).type(torch.LongTensor).cuda())
---> 42 convoluted_features = self._forward_features(input)
43 n_size = convoluted_features.data.view(bs, -1).size(1)
44 return n_size
<ipython-input-31-4940794a75e2> in _forward_features(self, x)
48 x = x.permute(1, 0, 2) # sequence length x batch x embedding dim
49 hidden = self.init_hidden(x.size(1)) # 1 x batch x hidden dim
---> 50 x, _ = self.lstm_layer(x, hidden) # sequence length x batch_size x hidden_dimension
51 x = torch.unsqueeze(x, 0)
52 x = x.permute(2, 3, 0, 1) # order the dimension according to n_input, n_channels (embedding_dimension), height(1), width(sequence length)
/usr/local/lib64/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
222 for hook in self._forward_pre_hooks.values():
223 hook(self, input)
--> 224 result = self.forward(*input, **kwargs)
225 for hook in self._forward_hooks.values():
226 hook_result = hook(self, input, result)
/usr/local/lib64/python2.7/site-packages/torch/nn/modules/rnn.pyc in forward(self, input, hx)
160 flat_weight=flat_weight
161 )
--> 162 output, hidden = func(input, self.all_weights, hx)
163 if is_packed:
164 output = PackedSequence(output, batch_sizes)
/usr/local/lib64/python2.7/site-packages/torch/nn/_functions/rnn.pyc in forward(input, *fargs, **fkwargs)
349 else:
350 func = AutogradRNN(*args, **kwargs)
--> 351 return func(input, *fargs, **fkwargs)
352
353 return forward
/usr/local/lib64/python2.7/site-packages/torch/autograd/function.pyc in _do_forward(self, *input)
282 self._nested_input = input
283 flat_input = tuple(_iter_variables(input))
--> 284 flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
285 nested_output = self._nested_output
286 nested_variables = _unflatten(flat_output, self._nested_output)
/usr/local/lib64/python2.7/site-packages/torch/autograd/function.pyc in forward(self, *args)
304 def forward(self, *args):
305 nested_tensors = _map_variable_tensor(self._nested_input)
--> 306 result = self.forward_extended(*nested_tensors)
307 del self._nested_input
308 self._nested_output = result
/usr/local/lib64/python2.7/site-packages/torch/nn/_functions/rnn.pyc in forward_extended(self, input, weight, hx)
291 hy = tuple(h.new() for h in hx)
292
--> 293 cudnn.rnn.forward(self, input, hx, weight, output, hy)
294
295 self.save_for_backward(input, hx, weight, output)
/usr/local/lib64/python2.7/site-packages/torch/backends/cudnn/rnn.pyc in forward(fn, input, hx, weight, output, hy)
257 w.zero_()
258 params = get_parameters(fn, handle, w)
--> 259 _copyParams(weight, params)
260 else:
261 fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
/usr/local/lib64/python2.7/site-packages/torch/backends/cudnn/rnn.pyc in _copyParams(params_from, params_to)
184 # use biases, zip will terminate once layer_params_from ends and ignore them.
185 for param_from, param_to in zip(layer_params_from, layer_params_to):
--> 186 assert param_from.type() == param_to.type()
187 param_to.copy_(param_from, broadcast=False)
188
AssertionError:
If I remove all “.cuda()” from the above codes, the codes can run on CPU with no error. I wonder if some one can give me suggestions on this issue.