I am working on sentiment analysis using GNN, While running the below code i am getting the run time error.
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
Load the preprocessed tweets into a pandas dataframe
Split the data into training, validation, and testing sets
train_data, test_data = train_test_split(df_tweets, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42)
Convert the data to PyTorch tensors
train_tweets = torch.DoubleTensor(train_data[‘Numerical Representations’].tolist())
train_labels = torch.DoubleTensor(train_data[‘Label’].tolist())
val_tweets = torch.DoubleTensor(val_data[‘Numerical Representations’].tolist())
val_labels = torch.DoubleTensor(val_data[‘Label’].tolist())
test_tweets = torch.DoubleTensor(test_data[‘Numerical Representations’].tolist())
test_labels = torch.DoubleTensor(test_data[‘Label’].tolist())
import torch
import torch.nn as nn
import torch.nn.functional as F
class GNNGRUModel(nn.Module):
def init(self, embedding_matrix, hidden_size, num_classes):
super(GNNGRUModel, self).init()
# Set the embedding layer with pre-trained word embeddings
self.embedding_layer = nn.Embedding.from_pretrained(embedding_matrix, freeze=True)
# Set the GRU layer with hidden size
self.gru_layer = nn.GRU(embedding_matrix.shape[1], hidden_size, batch_first=True)
# Set the fully connected layer with output size equal to the number of classes
self.fc_layer = nn.Linear(hidden_size, num_classes)
def forward(self, inputs):
# Extract the padded sequence tensor from inputs
padded_sequences = inputs
# Pass the padded sequence tensor to the embedding layer
embedded_sequences = self.embedding_layer(padded_sequences.long())
# Convert the embedded sequence tensor to double type
#embedded_sequences = embedded_sequences.double()
# Pass the embedded sequence tensor to the GRU layer
gru_output, _ = self.gru_layer(embedded_sequences)
# Take the last hidden state from the GRU layer as the sentence representation
sentence_representation = gru_output[:, -1, :]
# Pass the sentence representation tensor to the fully connected layer
logits = self.fc_layer(sentence_representation)
# Apply softmax activation to get the predicted probabilities
output = F.softmax(logits, dim=1)
return output
import torch
Convert the embedding matrix to a PyTorch tensor
embedding_tensor = torch.from_numpy(embedding_matrix)
Create an instance of the GNNGRUModel class
model = GNNGRUModel(embedding_tensor, hidden_size=128, num_classes=3)
Move the model to the GPU if available
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
model.to(device)
Set the loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
from tqdm import tqdm
Set the number of epochs to train for
num_epochs = 10
Set the batch size for training
batch_size = 32
Create a DataLoader for the training set
train_dataset = torch.utils.data.TensorDataset(train_tweets, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
Create a DataLoader for the validation set
val_dataset = torch.utils.data.TensorDataset(val_tweets, val_labels)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
Train the model
for epoch in range(num_epochs):
# Set the model to training mode
model.train()
# Initialize the total loss for this epoch
total_loss = 0
# Loop over the batches in the training set
for batch_tweets, batch_labels in tqdm(train_loader, desc=f'Training epoch {epoch + 1}/{num_epochs}'):
# Move the batch to the GPU if available
batch_tweets = batch_tweets.double()
batch_tweets = batch_tweets.to(device)
batch_labels = batch_labels.double()
batch_labels = batch_labels.to(device)
# Zero out the gradients
optimizer.zero_grad()
# Forward pass
batch_logits = model(batch_tweets)
# Compute the loss
batch_loss = loss_fn(batch_logits, batch_label)
# Backward pass
batch_loss.backward()
# Update the parameters
optimizer.step()
# Add the batch loss to the total loss for this epoch
total_loss += batch_loss.item()
# Compute the average loss for this epoch
avg_loss = total_loss / len(train_loader)
# Set the model to evaluation mode
model.eval()
# Initialize the number of correct predictions for this epoch
num_correct = 0
# Loop over the batches in the validation set
for batch_tweets, batch_labels in tqdm(val_loader, desc=f'Validation epoch {epoch + 1}/{num_epochs}'):
# Move the batch to the GPU if available
batch_tweets = batch_tweets.to(device)
batch_labels = batch_labels.to(device)
# Forward pass
batch_logits = model(batch_tweets)
# Get the predicted labels
_, batch_preds = batch_logits.max(dim=1)
# Compute the number of correct predictions
num_correct += (batch_preds == batch_labels).sum().item()
# Compute the accuracy for this epoch
accuracy = num_correct / len(val_dataset)
# Print the average loss and accuracy for this epoch
print(f'Epoch {epoch + 1}/{num_epochs}, '
f'Average Loss: {avg_loss:.4f}, '
f'Validation Accuracy: {accuracy:.4f}')