This is my model architecture.
#defining model
class LSTMModel(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim,hidden_size, n_layers, drop_prob=0.5):
super().__init__()
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_size
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob,
batch_first = True)
self.dropout = nn.Dropout(0.3)
self.fc1=nn.Linear(hidden_dim, 64)
self.fc2 = nn.Linear(64, 16)
self.fc3 = nn.Linear(16, output_size)
self.softmax = nn.Softmax()
def forward(self, x, hidden):
batch_size=x.size()
#Embadding and LSTM output
embedd=self.embedding(x)
out, hidden = self.lstm(embedd, hidden)
#dropout and fully connected layers
out=self.dropout(out)
out=self.fc1(out)
out=self.dropout(out)
out=self.fc2(out)
out=self.dropout(out)
out=self.fc3(out)
return self.softmax(out)
def init_hidden(self, batch_size):
return (torch.zeros(self.n_layers, batch_size, self.hidden_dim), torch.zeros(self.n_layers, batch_size, self.hidden_dim))
This is my model instantiation.
vocab_size = 10000 # +1 for the 0 padding
output_size = 5
embedding_dim = 100
hidden_dim = 256
n_layers = 2
model = LSTMModel(vocab_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)
This is my training loop.
train_loss = []
train_accuracies= []
val_loss = []
val_accuracies = []
for i in range(10):
y_pred= []
y_true = []
print(f'Epoch: _________*****{i}*****_______')
train_epoch_losses, train_epoch_accuracies = [], []
val_epoch_accuracies, val_epoch_losses = [], []
for ix, batch in (enumerate(iter(trn_ldr))):
x, y = batch
# print(len(y))
# print(y)
# print(x[1])
batch_size = x.size(0)
# print(len(batch[1]))
h0, c0 = model.init_hidden(batch_size)
# print(len(x[0]))
# print(batch_size)
h0 = h0.to(device)
c0 = c0.to(device)
k =(h0, c0)
# print(h0.shape, c0.shape)
# print(x.shape)
y = torch.tensor(y).to(device)
# x = x.permute(0, 3,1,2)
train_epoch_losses.append(train_batch(x, k, y, model, loss_fn, optimizer))
train_epoch_loss = np.array(train_epoch_losses).mean()
print(f'Epoch: _________*****{i} Training Loss : {train_epoch_loss} *****_______')
for ix, batch in (enumerate(iter(trn_ldr))):
x, y = batch
batch_size = x[0].size(0)
h0, c0 = model.init_hidden(batch_size)
h0 = h0.to(device)
c0 = c0.to(device)
k =(h0, c0)
y = torch.tensor(y).to(device)
# x = x.permute(0, 3,1,2)
train_epoch_accuracies.append(sum(accuracy(x[0], y, model)) / len(y))
train_epoch_accuracy = np.array(train_epoch_accuracies).mean()
print(f'Epoch: _________*****{i} Training Accuracy: {train_epoch_accuracy} *****_______')
for ix, batch in (enumerate(iter(test_ldr))):
h0, c0 = model.init_hidden(batch_size)
h0 = h0.to(device)
c0 = c0.to(device)
k =(h0, c0)
x, y = batch
y = torch.tensor(y).to(device)
# x = x.permute(0, 3,1,2)
val_epoch_losses.append(val_loss_trn(x, y, model, loss_fn))
val_epoch_loss = np.array(val_epoch_losses).mean()
print(f'Epoch: _________*****{i} Validation Loss : {val_epoch_loss} *****_______')
for ix, batch in (enumerate(iter(test_ldr))):
h0, c0 = model.init_hidden(batch_size)
h0 = h0.to(device)
c0 = c0.to(device)
k =(h0, c0)
x, y = batch
y_true.append(y)
y = torch.tensor(y).to(device)
test_acc_return = accuracy(x, y, model, test=True)[0].tolist()
y_pred.append(accuracy(x, y, model, test=True)[1])
val_epoch_accuracies.append(sum(test_acc_return) / len(y))
val_epoch_accuracy = np.array(val_epoch_accuracies).mean()
print(f'Epoch: _________*****{i} Validation Accuracy. {val_epoch_accuracy} *****_______')
print('\n')
train_loss.append(train_epoch_loss)
train_accuracies.append(train_epoch_accuracy)
val_accuracies.append(val_epoch_accuracy)
val_loss.append(val_epoch_loss)
And this is my error.
ValueError Traceback (most recent call last)
Cell In [251], line 33
31 y = torch.tensor(y).to(device)
32 # x = x.permute(0, 3,1,2)
---> 33 train_epoch_losses.append(train_batch(x, k, y, model, loss_fn, optimizer))
34 train_epoch_loss = np.array(train_epoch_losses).mean()
35 print(f'Epoch: _________*****{i} Training Loss : {train_epoch_loss} *****_______')
Cell In [217], line 5, in train_batch(x, k, y, model, loss_fn, opt)
3 prediction = model(x, k)
4 y = torch.tensor(y)
----> 5 batch_loss = loss_fn(torch.tensor(prediction, requires_grad=True), y)
6 batch_loss.backward()
7 optimizer.step()
File ~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/loss.py:1174, in CrossEntropyLoss.forward(self, input, target)
1173 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1174 return F.cross_entropy(input, target, weight=self.weight,
1175 ignore_index=self.ignore_index, reduction=self.reduction,
1176 label_smoothing=self.label_smoothing)
File ~/Library/Python/3.9/lib/python/site-packages/torch/nn/functional.py:3029, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3027 if size_average is not None or reduce is not None:
3028 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3029 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
ValueError: Expected input batch_size (165) to match target batch_size (32).
Can anyone help me to solve this issue. I am new to NLP and torchtext and pytorch.