I am training my model on multi-class task using CrossEntropyLoss
but I’m getting the following error:
ValueError: Classification metrics can't handle a mix of multiclass and continuous-multioutput targets
here is my training loop:
for i, batch in enumerate(iterator):
text = batch['sequence']
features = batch['features']
label = batch['target']
optimizer.zero_grad()
output = model(text, features)
loss = criterion(output, label)
...
info about output
:
tensor([[ 0.0436, -0.0301, -0.3313, 0.0675, -0.0690, -0.0067, 0.1439, 0.1636,
0.0143, -0.1089, 0.0506, 0.0580, -0.0693, 0.0765, -0.1481, 0.0110,
0.0162, -0.0422, 0.1236, -0.0883, -0.0513, -0.0020, -0.0276, -0.1482],
[ 0.0752, -0.0756, -0.1759, -0.0140, -0.0878, 0.1035, 0.1359, -0.0080,
-0.0461, -0.1518, -0.0269, -0.0483, -0.0990, -0.0592, -0.1666, -0.0031,
0.0484, 0.0079, 0.1189, -0.0485, -0.1961, -0.0044, -0.0004, -0.1201],
[ 0.1314, 0.1292, -0.3979, -0.0088, 0.0360, -0.0198, 0.0983, 0.0571,
-0.0215, -0.1564, -0.1584, -0.0038, 0.0310, -0.0390, -0.2015, -0.1018,
0.1232, -0.0587, 0.1216, -0.0834, -0.1158, -0.0572, -0.2376, -0.1555],
[ 0.0392, -0.0082, -0.0542, 0.0607, 0.0393, 0.0213, 0.0775, 0.0141,
-0.0202, -0.0561, 0.0173, 0.0134, -0.0254, 0.0482, -0.0222, 0.0305,
0.0254, 0.0485, 0.0633, -0.0564, 0.0158, -0.0278, -0.0495, -0.0834]],
grad_fn=<AddmmBackward>)
shape: torch.Size([4, 24])
info about label
:
tensor([18, 4, 19, 11])
shape: torch.Size([4])
and here is my model:
class RCNNwithFeatures(nn.Module):
def __init__(self, vocab_size, feature_size, embedding_length, hidden_size, output_size, n_layers):
super().__init__()
self.output_size = output_size
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.embedding_length = embedding_length
self.Embedding = nn.Embedding(vocab_size, embedding_length) # Initializing the look-up table.
self.dropout = 0.5
self.lstm = nn.LSTM(embedding_length, hidden_size, n_layers, dropout=self.dropout, bidirectional=True)
self.fc = nn.Linear(2 * hidden_size + embedding_length, hidden_size*2)
self.fc2 = nn.Linear(feature_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, hidden_size*2)
self.label = nn.Linear(hidden_size*4, output_size)
def forward(self, input_sentence, features):
inputs = self.Embedding(input_sentence)
inputs = inputs.permute(1, 0, 2)
final_encoding = torch.cat((output, inputs), 2).permute(1, 0, 2)
x = self.fc(final_encoding)
x = x.permute(0, 2, 1)
x = F.max_pool1d(x, x.size()[2])
x = x.squeeze(2)
x2 = F.relu(self.fc2(features))
x2 = F.relu(self.fc3(x2))
y = torch.cat([x, x2], dim=1)
logits = self.label(y)
return logits
notice that output
is (batchsize, C) and that what the loss want
any advice will be I thankful