I’m using this kaggle dataset of news articles (News Category Dataset | Kaggle), and have 7 classes:
def news_data():
# load embeddings
with open('embeddings_v1.pkl', "rb") as fIn:
stored_data = pickle.load(fIn)
stored_sentences = stored_data['sentences']
stored_embeddings = stored_data['embeddings']
x = stored_embeddings
x = torch.tensor(x).float()
# load labels
with open('.../News_Category_Dataset_v3.json','r') as f:
jdata = f.read()
jdata2 = [json.loads(line) for line in jdata.split('\n') if line]
df = pd.DataFrame.from_records(jdata2)
label_dict = {'CRIME':0, 'BUSINESS':1, 'SPORTS':2 ,'WEDDINGS':3, 'DIVORCE':4, 'PARENTING':5}
df['label'] = df['category'].map(label_dict).fillna(6).astype(int)
y = df['label']
y = torch.tensor(y).float().unsqueeze(1)
return split_train_test(x, y)
############# Data summary #############
x_train has shape: torch.Size([167622, 384])
y_train has shape: torch.Size([167622, 1])
x_test has shape: torch.Size([41905, 384])
y_test has shape: torch.Size([41905, 1])
#######################################
I am trying to implement a logistic regression model for multiclass classification in pytorch:
class LR(torch.nn.Module):
def __init__(self, n_features, n_outputs):
super(LR, self).__init__()
self.lr = torch.nn.Linear(n_features, n_outputs)
def forward(self, x):
out = torch.sigmoid(self.lr(x))
return out
model = LR(n_features, n_outputs)
# use gradient descent with a learning_rate=0.01
optim = torch.optim.SGD(model.parameters(), lr=0.01)
# use Cross Entropy Loss
criterion = torch.nn.CrossEntropyLoss()
# instantiate the model
n_features = 384
n_outputs = 7
# train the model
EPOCHS = 6
def train(model, optim, criterion, x, y, epochs=EPOCHS):
for e in range(1, epochs + 1):
optim.zero_grad()
out = model(x)
loss = criterion(out, y)
loss.backward()
optim.step()
print(f"Loss at epoch {e}: {loss.data}")
return model
model = train(model, optim, criterion, x_train, y_train)
I run into this error,
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[15], line 15
12 print(f"Loss at epoch {e}: {loss.data}")
13 return model
---> 15 model = train(model, optim, criterion, x_train, y_train)
Cell In[15], line 9, in train(model, optim, criterion, x, y, epochs)
7 optim.zero_grad()
8 out = model(x)
----> 9 loss = criterion(out, y)
10 loss.backward()
11 optim.step()
File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~\anaconda3\lib\site-packages\torch\nn\modules\loss.py:1174, in CrossEntropyLoss.forward(self, input, target)
1173 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1174 return F.cross_entropy(input, target, weight=self.weight,
1175 ignore_index=self.ignore_index, reduction=self.reduction,
1176 label_smoothing=self.label_smoothing)
File ~\anaconda3\lib\site-packages\torch\nn\functional.py:3029, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3027 if size_average is not None or reduce is not None:
3028 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3029 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: 0D or 1D target tensor expected, multi-target not supported
what do I need to correct in my code?