I know this error is common but I am unable to solve this specific problem. I’ve checked this topic and have both the concern checked-marked which were about label_size and vocab_size. It would be glad if anyone could point out the problem. Stuck for 2 days already. It started to show up when I tuned a pre-trained fastText. Please let me know if you need anymore information.
Below is the error code
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-68-10826427ec3f> in <module>
17 # print("Shape : " , _.shape)
18
---> 19 predictions, h = model(inp.permute(1,0).to(device), lens, device ) # TODO:don't need _ #Changed Permute
20
21 # print(targ)
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
<ipython-input-64-bc1668a9efcd> in forward(self, x, lens, device)
32 def forward(self, x, lens, device ):
33 x = self.embedding(x)
---> 34 self.hidden = self.initialize_hidden_state(device)
35 h = self.initialize_hidden_state(device)
36
<ipython-input-64-bc1668a9efcd> in initialize_hidden_state(self, device)
22 def initialize_hidden_state(self, device):
23 # weight = next(self.parameters()).data
---> 24 return torch.zeros(((self.n_layers, self.batch_sz, self.hidden_units))).to(device)
25 # if (device == "cuda:0"):
26 # hidden = (weight.new(self.n_layers, batch_sz, self.hidden_units).zero_().cuda(),
RuntimeError: cuda runtime error (59) : device-side assert triggered at /pytorch/aten/src/THC/generic/THCTensorCopy.cpp:21
Here is the fine-tuning FastText word-embedding part
!pip install bnlp_toolkit
from bnlp.bengali_fasttext import Bengali_Fasttext
bft = Bengali_Fasttext()
model_name = "/content/drive/My Drive/Research_Shanto/pretrained/saved_model_39.bin"
data = "/content/drive/My Drive/Research_Shanto/Datasets/Ashik Bhai_Sentiment/corpus_39.txt"
epoch = 50
bft.train_fasttext(data, model_name, epoch)
fastText_wv = fText.load_fasttext_format("../input/pretrained/saved_model_39.bin")
weights = torch.FloatTensor(fastText_wv.wv.vectors)
print(weights.shape)
Here is the part where I merge it with the data
# This class creates a word -> index mapping (e.g,. "dad" -> 5) and vice-versa
# (e.g., 5 -> "dad") for the dataset
class ConstructVocab():
def __init__(self, sentences):
self.sentences = sentences
self.word2idx = {}
self.idx2word = {}
self.vocab = set()
self.create_index()
def create_index(self):
# update with individual tokens
self.vocab.update(self.sentences)
# sort the vocab
self.vocab = sorted(self.vocab)
print(self.vocab)
# add a padding token with index 0
self.word2idx['<pad>'] = 0
# word to index mapping
for index, word in enumerate(self.vocab):
self.word2idx[word] = index + 1 # +1 because of pad token
# index to word mapping
for word, index in self.word2idx.items():
self.idx2word[index] = word
inputs = ConstructVocab(fastText_wv.wv.vocab.keys())
With vocab_inp_size = len(inputs.word2idx)
, below is the architecture
class EmoLSTM(nn.Module):
def __init__(self, embedding_matrix, vocab_size, embedding_dim, hidden_units, batch_sz, n_layers, seqLength, device, output_size):
super(EmoLSTM, self).__init__()
self.batch_sz = batch_sz
self.hidden_units = hidden_units
self.embedding_dim = embedding_dim
self.vocab_size = vocab_size
self.output_size = output_size
self.n_layers = n_layers
self.seqLength = seqLength
# layers
self.embedding = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_matrix))
self.dropout = nn.Dropout(p=0.5)
self.lstm = nn.LSTM(self.embedding_dim, self.hidden_units, self.n_layers, bidirectional = False)
self.fc = nn.Linear(self.hidden_units, self.output_size)
def initialize_hidden_state(self, device):
return torch.zeros(((self.n_layers, self.batch_sz, self.hidden_units))).to(device)
def forward(self, x, lens, device ):
x = self.embedding(x)
self.hidden = self.initialize_hidden_state(device)
h = self.initialize_hidden_state(device)
output, _ = self.lstm(x, (self.hidden,h) )
out = output[-1, : , :]
out = self.fc(out)
return out, _
And now the training and validation part
EPOCHS =10
for epoch in range(EPOCHS):
start = time.time()
### Initialize hidden state
# TODO: do initialization here.
total_loss = 0
train_accuracy, val_accuracy = 0, 0
### Training
for (batch, (inp, targ, lens)) in enumerate(train_dataset):
loss = 0
predictions, h = model(inp.permute(1,0).to(device), lens, device ) # TODO:don't need _ #Changed Permute
loss += loss_function(targ.to(device), predictions)
batch_loss = (loss / int(targ.shape[1]))
total_loss += batch_loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
batch_accuracy = accuracy(targ.to(device), predictions)
train_accuracy += batch_accuracy
if batch % 100 == 0:
print('Epoch {} Batch {} Val. Loss {:.4f}'.format(epoch + 1,
batch,
batch_loss.cpu().detach().numpy()))
### Validating
for (batch, (inp, targ, lens)) in enumerate(val_dataset):
predictions,val_ = model(inp.permute(1,0).to(device), lens, device) #Changed Permute
batch_accuracy = accuracy(targ.to(device), predictions)
val_accuracy += batch_accuracy
print('Epoch {} Loss {:.4f} -- Train Acc. {:.4f} -- Val Acc. {:.4f}'.format(epoch + 1,
total_loss / TRAIN_N_BATCH,
train_accuracy / TRAIN_N_BATCH,
val_accuracy / VAL_N_BATCH))
print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
Thank you in advance.