Hi there,
I am working on a sentiment analysis project with the SST-1 dataset using the Torchtext library. As a baseline, I want to create a vanilla softmax classifier as a 1-linear-layer net with log-softmax and negative log-likelihood loss.
I read the doc of nn.NLLLoss(), and (I think) I understand what it does. Meaning that it requires a tensor of size [minibatch, classes] as the input and a tensor of size [classes] as the target to be able to categorize each batch item into one of the classes.
Now this is where I hit the wall. I computed the log probabilities with log_softmax and got a tensor of (minibatch, classes) (here, it has torch.Size([10, 6])) which I used as the input; then used the tensor of labels (torch.Size([10])) as the target.
However, I still got the following:
ValueError: Expected input batch_size (10) to match target batch_size (4).
Could you please point out where things went wrong and how could I fix it? Any help is greatly appreciated!
Below is the relevant code snippet:
(Python 3.5.6, PyTorch 1.0.0, torchtext 0.3.1)
TEXT = torchtext.data.Field()
LABEL = torchtext.data.Field(sequential=False, is_target=True)
train, val, test = torchtext.datasets.SST.splits(
TEXT, LABEL, fine_grained=True)
TEXT.build_vocab(train)
LABEL.build_vocab(train)
train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits((train, val, test), batch_size=10, device=-1, repeat=False)
class Softmax(nn.Module):
def __init__(self, vocab_size, n_classes, batch_size):
super(Softmax, self).__init__()
self.linear = nn.Linear(vocab_size, n_classes, bias = True)
def forward(self, x):
out = self.linear(x)
log_probs = F.log_softmax(out, dim=1)
return log_probs
def get_one_hot(batch, batch_size, vocab_size):
new_tensor = torch.zeros(batch_size, vocab_size)
word_indices = torch.transpose(batch.text, 0, 1)
for batch_item, word_ix in enumerate(word_indices):
new_tensor[batch_item][word_ix] = 1
return new_tensor
def train_softmax(train_iter):
losses = []
model = Softmax(VOCAB_SIZE, N_CLASSES, BATCH_SIZE)
loss_fn = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), LEARNING_RATE)
for epoch in range(EPOCHS):
epoch_loss = 0
for batch in train_iter:
x = Variable(torch.FloatTensor(get_one_hot(batch, BATCH_SIZE, VOCAB_SIZE)), requires_grad=True)
y = Variable(batch.label)
model.zero_grad()
log_probs = model.forward(x)
loss = loss_fn(log_probs, y) # here is the problem
loss.backward()
optimizer.step()
epoch_loss += loss.item()
losses.append(epoch_loss)
return model, losses