glove_model = api.load("glove-wiki-gigaword-100")
class TextDataset(Dataset):
def __init__(self, data):
self.data = data
self.X = self.data["text"]
self.y = self.data["label"]
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
text = self.data['text'][idx]
label = self.data['label'][idx]
text_embeddings = [self.get_embedding(word) for word in text]
text_embeddings = torch.tensor(text_embeddings,dtype=torch.float32)
return text_embeddings, label
def get_embedding(self,word):
if word in glove_model:
return glove_model[word]
return np.zeros(100)
train_dataset = TextDataset(train_data)
val_dataset = TextDataset(val_data)
test_dataset = TextDataset(test_data)
# create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)
and anfter this I feed it to LSTM directly as shown below
epochs = 10
# Train the model
for epoch in range(epochs):
train_accuracy = 0
for i, (texts, labels) in enumerate(train_loader):
texts = texts.to(device)
labels = labels.to(device)
# Forward pass
outputs = LSTM(texts)
loss = criterion(outputs, labels)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Accuracy
_, predicted = torch.max(outputs, dim=1)
train_accuracy += (predicted == labels).sum().item()
print (f'Epoch {epoch+1},Loss: {loss.item()}, Accuracy: {train_accuracy/len(train_data)}')