Runtimer: expected scalar type Double but found Float

I am working on sentiment analysis using GNN, While running the below code i am getting the run time error.
import pandas as pd
import torch
from sklearn.model_selection import train_test_split

Load the preprocessed tweets into a pandas dataframe

Split the data into training, validation, and testing sets

train_data, test_data = train_test_split(df_tweets, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42)

Convert the data to PyTorch tensors

train_tweets = torch.DoubleTensor(train_data[‘Numerical Representations’].tolist())
train_labels = torch.DoubleTensor(train_data[‘Label’].tolist())

val_tweets = torch.DoubleTensor(val_data[‘Numerical Representations’].tolist())
val_labels = torch.DoubleTensor(val_data[‘Label’].tolist())

test_tweets = torch.DoubleTensor(test_data[‘Numerical Representations’].tolist())
test_labels = torch.DoubleTensor(test_data[‘Label’].tolist())

import torch
import torch.nn as nn
import torch.nn.functional as F

class GNNGRUModel(nn.Module):
def init(self, embedding_matrix, hidden_size, num_classes):
super(GNNGRUModel, self).init()

    # Set the embedding layer with pre-trained word embeddings
    self.embedding_layer = nn.Embedding.from_pretrained(embedding_matrix, freeze=True)

    # Set the GRU layer with hidden size
    self.gru_layer = nn.GRU(embedding_matrix.shape[1], hidden_size, batch_first=True)

    # Set the fully connected layer with output size equal to the number of classes
    self.fc_layer = nn.Linear(hidden_size, num_classes)

def forward(self, inputs):
    # Extract the padded sequence tensor from inputs
    padded_sequences = inputs

    # Pass the padded sequence tensor to the embedding layer
    embedded_sequences = self.embedding_layer(padded_sequences.long())

    # Convert the embedded sequence tensor to double type
    #embedded_sequences = embedded_sequences.double()

    # Pass the embedded sequence tensor to the GRU layer
    gru_output, _ = self.gru_layer(embedded_sequences)

    # Take the last hidden state from the GRU layer as the sentence representation
    sentence_representation = gru_output[:, -1, :]

    # Pass the sentence representation tensor to the fully connected layer
    logits = self.fc_layer(sentence_representation)

    # Apply softmax activation to get the predicted probabilities
    output = F.softmax(logits, dim=1)

    return output

import torch

Convert the embedding matrix to a PyTorch tensor

embedding_tensor = torch.from_numpy(embedding_matrix)

Create an instance of the GNNGRUModel class

model = GNNGRUModel(embedding_tensor, hidden_size=128, num_classes=3)

Move the model to the GPU if available

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
model.to(device)

Set the loss function and optimizer

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

from tqdm import tqdm

Set the number of epochs to train for

num_epochs = 10

Set the batch size for training

batch_size = 32

Create a DataLoader for the training set

train_dataset = torch.utils.data.TensorDataset(train_tweets, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

Create a DataLoader for the validation set

val_dataset = torch.utils.data.TensorDataset(val_tweets, val_labels)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

Train the model

for epoch in range(num_epochs):
# Set the model to training mode
model.train()

# Initialize the total loss for this epoch
total_loss = 0

# Loop over the batches in the training set
for batch_tweets, batch_labels in tqdm(train_loader, desc=f'Training epoch {epoch + 1}/{num_epochs}'):
    # Move the batch to the GPU if available
    batch_tweets = batch_tweets.double()
    batch_tweets = batch_tweets.to(device)
    batch_labels = batch_labels.double()
    batch_labels = batch_labels.to(device)

    # Zero out the gradients
    optimizer.zero_grad()

    # Forward pass
    batch_logits = model(batch_tweets)

    # Compute the loss
    batch_loss = loss_fn(batch_logits, batch_label)

    # Backward pass
    batch_loss.backward()

    # Update the parameters
    optimizer.step()

    # Add the batch loss to the total loss for this epoch
    total_loss += batch_loss.item()

# Compute the average loss for this epoch
avg_loss = total_loss / len(train_loader)

# Set the model to evaluation mode
model.eval()

# Initialize the number of correct predictions for this epoch
num_correct = 0

# Loop over the batches in the validation set
for batch_tweets, batch_labels in tqdm(val_loader, desc=f'Validation epoch {epoch + 1}/{num_epochs}'):
    # Move the batch to the GPU if available
    batch_tweets = batch_tweets.to(device)
    batch_labels = batch_labels.to(device)

    # Forward pass
    batch_logits = model(batch_tweets)

    # Get the predicted labels
    _, batch_preds = batch_logits.max(dim=1)

    # Compute the number of correct predictions
    num_correct += (batch_preds == batch_labels).sum().item()

# Compute the accuracy for this epoch
accuracy = num_correct / len(val_dataset)

# Print the average loss and accuracy for this epoch
print(f'Epoch {epoch + 1}/{num_epochs}, '
      f'Average Loss: {avg_loss:.4f}, '
      f'Validation Accuracy: {accuracy:.4f}')

In your code you are explicitly creating the data as DoubleTensors and are also casting again to double inside the DataLoader loop.
PyTorch uses float32 by default for all parameters of the model.
If you really want top use float64, you could cast the model’s parameters and buffers via model.to(torch.float64), but note that this could have a large performance impact especially on the GPU.
Assuming you don’t need this high precision you could cast the inputs to float32.

Thank you nut the error is still there.

Could you post the stacktrace raising this error, please?


RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_17800\2285477610.py in
117
118 # Forward pass
→ 119 batch_logits = model(batch_tweets)
120
121 # Compute the loss

~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Local\Temp\ipykernel_17800\2285477610.py in forward(self, inputs)
49
50 # Pass the embedded sequence tensor to the GRU layer
—> 51 gru_output, _ = self.gru_layer(embedded_sequences)
52
53 # Take the last hidden state from the GRU layer as the sentence representation

~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []

~\anaconda3\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)
835 self.check_forward_args(input, hx, batch_sizes)
836 if batch_sizes is None:
→ 837 result = _VF.gru(input, hx, self._flat_weights, self.bias, self.num_layers,
838 self.dropout, self.training, self.bidirectional, self.batch_first)
839 else:

RuntimeError: expected scalar type Double but found Float

Thanks for the stacktrace (you can post code snippet by wrapping them into three backticks ``` :wink: ).
Are you casting the activations back to double before passing them to the gru_layer?
I cannot reproduce the issue using your code:

class GNNGRUModel(nn.Module):
    def __init__(self, embedding_matrix, hidden_size, num_classes):
        super(GNNGRUModel, self).__init__()
    
        # Set the embedding layer with pre-trained word embeddings
        self.embedding_layer = nn.Embedding.from_pretrained(embedding_matrix, freeze=True)
    
        # Set the GRU layer with hidden size
        self.gru_layer = nn.GRU(embedding_matrix.shape[1], hidden_size, batch_first=True)
    
        # Set the fully connected layer with output size equal to the number of classes
        self.fc_layer = nn.Linear(hidden_size, num_classes)
    
    def forward(self, inputs):
        # Extract the padded sequence tensor from inputs
        padded_sequences = inputs
    
        # Pass the padded sequence tensor to the embedding layer
        embedded_sequences = self.embedding_layer(padded_sequences.long())
    
        # Convert the embedded sequence tensor to double type
        #embedded_sequences = embedded_sequences.double()
    
        # Pass the embedded sequence tensor to the GRU layer
        gru_output, _ = self.gru_layer(embedded_sequences)
        return gru_output
        
model = GNNGRUModel(torch.randn(10, 10), 10, 10)
x = torch.randint(0, 10, (10, 10))
out = model(x)
print(out.shape)
# torch.Size([10, 10, 10])

sorry I didn’t understand. Could you please explain where i am going wrong?