Please look at the network code below:
class CNN(nn.Module):
def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim,
dropout, pad_idx = 0):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.convs = nn.ModuleList([
nn.Conv2d(in_channels = 1,
out_channels = n_filters,
kernel_size = (fs, embedding_dim))
for fs in filter_sizes
])
self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
#print(text.shape)
#text = [sent len, batch size]
#text = text.permute(1, 0)
#text = [batch size, sent len]
embedded = self.embedding(text)
#embedded = [batch size, sent len, emb dim]
embedded = embedded.unsqueeze(1)
#embedded = [batch size, 1, sent len, emb dim]
conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
#conv_n = [batch size, n_filters, sent len - filter_sizes[n]]
pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
#pooled_n = [batch size, n_filters]
cat = self.dropout(torch.cat(pooled, dim = 1))
#cat = [batch size, n_filters * len(filter_sizes)]
return self.fc(cat)
It is a simple multidim. implementation for a CNN for Sentence Classification.
I instantiate my network as follows:
net = CNN(vocab_size = len(v2i),
embedding_dim = emb_dim_len, n_filters= 100, filter_sizes=[2,3,4],
dropout = 0.5,
output_dim = len(Classes))
print(net)
o/p:
CNN(
(embedding): Embedding(6925, 100)
(convs): ModuleList(
(0): Conv2d(1, 100, kernel_size=(2, 100), stride=(1, 1))
(1): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
(2): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
)
(fc): Linear(in_features=300, out_features=20, bias=True)
(dropout): Dropout(p=0.5, inplace=False)
)
here vocab_size = 6925, embed_dim_len = 100, output_dim = 20.
I instantiate it on CPU and try the network on a custom input. It works fine.
But when I try to send it to GPU using the following:
net = net.to('cuda:0')
I get the following stack trace:
File "<ipython-input-104-9aae86a2a66d>", line 1, in <module>
net = net.to('cuda')
File "C:\Users\uchih\Anaconda3\lib\site-packages\torch\nn\modules\module.py", line 425, in to
return self._apply(convert)
File "C:\Users\uchih\Anaconda3\lib\site-packages\torch\nn\modules\module.py", line 201, in _apply
module._apply(fn)
File "C:\Users\uchih\Anaconda3\lib\site-packages\torch\nn\modules\module.py", line 223, in _apply
param_applied = fn(param)
File "C:\Users\uchih\Anaconda3\lib\site-packages\torch\nn\modules\module.py", line 423, in convert
return t.to(device, dtype if t.is_floating_point() else None, non_blocking)
RuntimeError: CUDA error: device-side assert triggered
Did I do something incorrectly? Any way I can run this on GPU?