Hi,
I am trying to train the following model on GPU:
import torch.nn as nn
class ConvLSTM(nn.Module):
"""Convolutional lstm model to classify raw waveform files. The
architecture is:
Input -> Conv -> ReLU -> Pooling -> LSTM -> Softmax
Properties:
CONVOLUTIONAL LAYER:
conv_kernel_size : The size of the filters for the conv. layer
conv_stride: Stride for the conv. layer
num_features: Number of output channels for conv. layer
MAX POOLING LAYER:
pooling_kernel: Size of the max-pooling window
LSTM:
hidden_size: The dimension of the hidden state
num_layers: Number of hidden layers inside lstm
num_of_classes: Number of classes to classify into
bias: Have/not have bias terms in the layers
"""
def __init__(self, conv_kernel_size, conv_stride, num_features,
pooling_kernel, hidden_size, num_layers = 1,
num_of_classes = 2, bias = True, ):
super(ConvLSTM, self).__init__()
self.conv_kernel_size = conv_kernel_size
self.conv_stride = conv_stride
self.num_features = num_features
self.pooling_kernel = pooling_kernel
self.hidden_size = hidden_size
self.num_layers = num_layers
self.num_of_classes = num_of_classes
self.bias = bias
self.conv = nn.Conv1d(
in_channels = 1,
out_channels = num_features,
kernel_size = conv_kernel_size,
stride = conv_stride,
bias = bias
)
self.relu = nn.ReLU()
if pooling_kernel > 1:
self.pooling = nn.MaxPool1d(
kernel_size = pooling_kernel
)
self.lstm = nn.LSTM(
input_size = num_features,
hidden_size = hidden_size,
num_layers = num_layers,
dropout = 0.5,
bias = bias
)
self.linear = nn.Linear(
in_features = hidden_size,
out_features = num_of_classes,
bias = bias
)
self.softmax = nn.Softmax(dim = 1)
def forward(self, wav_minibatch):
""" Forward pass of the Convolutional LSTM audio based
ideology classifying network.
INPUT:
wav_minibatch: The raw waveform of a spoken word
hidden_init: Initial hidden state
cell_init: Initial cell state
OUTPUT:
prob_score: Probability scores over the classes"""
x = self.conv(wav_minibatch)
x = self.relu(x)
if self.pooling_kernel > 1:
x = self.pooling(x)
x = x.permute(2,0,1)
output, x = self.lstm(x)
x = self.linear(x[0][0,:,:])
probs = self.softmax(x)
return probs
However, when I try to put the model onto Cuda:
classifier = ConvLSTM(
conv_kernel_size = 5,
conv_stride = 1,
num_features = 32,
pooling_kernel = 2,
hidden_size = 1024,
num_layers = 1,
num_of_classes = 2,
bias = True
)
if torch.cuda.is_available():
classifier = classifier.cuda()
I get the following error without any further error message:
AssertionErrorTraceback (most recent call last)
<ipython-input-23-9dc2b59bee55> in <module>()
3 if torch.cuda.is_available():
----> 4 classifier = classifier.cuda()
/home/zsolt/anaconda3/lib/python3.5/site-packages/torch/nn/modules/module.py in cuda(self, device)
214 Module: self
215 """
--> 216 return self._apply(lambda t: t.cuda(device))
217
218 def cpu(self):
/home/zsolt/anaconda3/lib/python3.5/site-packages/torch/nn/modules/module.py in _apply(self, fn)
144 def _apply(self, fn):
145 for module in self.children():
--> 146 module._apply(fn)
147
148 for param in self._parameters.values():
/home/zsolt/anaconda3/lib/python3.5/site-packages/torch/nn/modules/rnn.py in _apply(self, fn)
121 def _apply(self, fn):
122 ret = super(RNNBase, self)._apply(fn)
--> 123 self.flatten_parameters()
124 return ret
125
/home/zsolt/anaconda3/lib/python3.5/site-packages/torch/nn/modules/rnn.py in flatten_parameters(self)
109 # Slice off views into weight_buf
110 all_weights = [[p.data for p in l] for l in self.all_weights]
--> 111 params = rnn.get_parameters(fn, handle, fn.weight_buf)
112
113 # Copy weights and update their storage
/home/zsolt/anaconda3/lib/python3.5/site-packages/torch/backends/cudnn/rnn.py in get_parameters(fn, handle, weight_buf)
163 # might as well merge the CUDNN ones into a single tensor as well
164 if linear_id == 0 or linear_id == num_linear_layers / 2:
--> 165 assert filter_dim_a.prod() == filter_dim_a[0]
166 size = (filter_dim_a[0] * num_linear_layers // 2, filter_dim_a[2])
167 param = fn.weight_buf.new().set_(
AssertionError:
Any suggestions?