I’m working on text classification problem.
here is all the code
import torch.nn as nn
import torchtext.data as data
import pandas as pd
import torch
import torch.nn.functional as F
Text = data.Field(batch_first=True, include_lengths=True)
LABEL = data.LabelField(dtype=torch.float, batch_first=True)
feilds = [('text', Text), ('target', LABEL)]
train, val = data.TabularDataset(path='processed_data.csv', format='csv', fields=feilds).split()
Text.build_vocab(train, max_size=3000, min_freq=3, vectors='glove.twitter.27B.50d')
LABEL.build_vocab(train)
class text_classifier(nn.Module):
def __init__(self, vocab_size, embedding_dim=50):
super(text_classifier, self).__init__()
self.Embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=Text.vocab[Text.pad_token])
self.LSTM = nn.LSTM(embedding_dim, 30, 3,bidirectional=True, dropout=0.5, batch_first=True)
self.Linear = nn.Linear(30, 1)
def forward(self, x, length):
x = self.Embedding(x)
x = nn.utils.rnn.pack_padded_sequence(x, length, True)
x, (hidden, cell) = self.LSTM(x)
x = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
x = self.Linear(x)
return x
train, val = data.BucketIterator.splits((train, val), batch_sizes=(32, 32), sort_within_batch=True,
device='cpu',sort_key=lambda x: len(x.text))
loss = nn.BCEWithLogitsLoss()
clf = text_classifier(len(Text.vocab))
for (text, labels) in train:
text, length = text
output = clf(text, length)
loss = loss(output, labels)
print(loss)
it raise this error:
Traceback (most recent call last):
File "C:/Users/BHAAK/Desktop/ML_PATH/dirty-hands/dirty-hands file 3/Project.py", line 44, in <module>
output = clf(text, length)
File "C:\Users\BHAAK\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:/Users/BHAAK/Desktop/ML_PATH/dirty-hands/dirty-hands file 3/Project.py", line 31, in forward
x = self.Linear(x)
File "C:\Users\BHAAK\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\BHAAK\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\linear.py", line 91, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\BHAAK\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\functional.py", line 1674, in linear
ret = torch.addmm(bias, input, weight.t())
RuntimeError: size mismatch, m1: [32 x 60], m2: [30 x 1] at ..\aten\src\TH/generic/THTensorMath.cpp:41
here is info about my dataset
print(text.shape) # torch.Size([32, 19])
print(text) # part of the hole batch [ 348, 1528, 1117, 2, 3, 0, 235, 2516, 0, 42, 2481, 2412, 6, 1525, 2830, 6, 0, 0, 2]
print(labels.shape) # torch.Size([32])
print(labels) # tensor([0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0.])
print(length.shape) # torch.Size([32])
print(length) # tensor([15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14])
any solutions for this problem ?