train an LSTM with the GPU

Hello I can not execute this code with the GPU I tried, I do not understand how to do that works on the graphics card.

here you have the LSTM model code:

import torch
from torch import nn
class Model(nn.Module):
    def __init__(self, dataset):
        super(Model, self).__init__()
        self.lstm_size = 128
        self.embedding_dim = 128
        self.num_layers = 3
        n_vocab = len(dataset.uniq_words)
        self.embedding = nn.Embedding(
            num_embeddings=n_vocab,
            embedding_dim=self.embedding_dim,
        )
        self.lstm = nn.LSTM(
            input_size=self.lstm_size,
            hidden_size=self.lstm_size,
            num_layers=self.num_layers,
            dropout=0.2,
        )
        self.fc = nn.Linear(self.lstm_size, n_vocab)
    def forward(self, x, prev_state):
        embed = self.embedding(x)
        output, state = self.lstm(embed, prev_state)
        logits = self.fc(output)
        return logits, state
    def init_state(self, sequence_length):
        return (torch.zeros(self.num_layers, sequence_length, self.lstm_size),
                torch.zeros(self.num_layers, sequence_length, self.lstm_size))

And there, the code to train the model

def save_checkpoint(dataset, model, args):
    torch.save({
    'state_dict': model.state_dict()
}, 'filename.pth.tar')


def load_checkpoint(dataset, model, args):
    model = Model(dataset)
    torch.load('filename.pth.tar')
    model.eval()
    return dataset, model, args

def train(dataset, model, args):
    model.train()
    dataloader = DataLoader(dataset, batch_size=args.batch_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    for epoch in range(args.max_epochs):
        state_h, state_c = model.init_state(args.sequence_length)
        for batch, (x, y) in enumerate(dataloader):
            optimizer.zero_grad()
            y_pred, (state_h, state_c) = model(x, (state_h, state_c))
            loss = criterion(y_pred.transpose(1, 2), y)
            state_h = state_h.detach()
            state_c = state_c.detach()
            loss.backward()
            optimizer.step()
            print({ 'epoch': epoch, 'batch': batch, 'loss': loss.item() })
        


def predict(dataset, model, text, next_words=20):
    model.eval()
    words = text.split(' ')
    state_h, state_c = model.init_state(len(words))
    for i in range(0, next_words):
        x = torch.tensor([[dataset.word_to_index[w] for w in words[i:]]])
        y_pred, (state_h, state_c) = model(x, (state_h, state_c))
        last_word_logits = y_pred[0][-1]
        p = torch.nn.functional.softmax(last_word_logits, dim=0).detach().numpy()
        word_index = np.random.choice(len(last_word_logits), p=p)
        words.append(dataset.index_to_word[word_index])
    return words

parser = argparse.ArgumentParser()
parser.add_argument('--max-epochs', type=int, default=10)
parser.add_argument('--batch-size', type=int, default=256)
parser.add_argument('--sequence-length', type=int, default=20)
args = parser.parse_args()
dataset = Dataset(args)
model = Model(dataset)
#load_checkpoint(dataset, model, args)
train(dataset, model, args)

can you tell me how to do it, in my attempts pytorch reports an error saying that not everything is calculated with the GPU, that some parts are calcuated with CPU. Thank you for helping me, I am a beginner

I found the solution, sorry. I share the code for others.
=> torch.set_default_device(‘cuda:0’)

import argparse
import torch
import numpy as np
from torch import nn, optim
from torch.utils.data import DataLoader
from model import Model
from dataset import Dataset
torch.set_default_device('cuda:0')


def save_checkpoint(dataset, model, args):
    torch.save({
    'state_dict': model.state_dict()
}, 'filename.pth.tar')


def load_checkpoint(dataset, model, args):
    model = Model(dataset)
    torch.load('filename.pth.tar')
    model.eval()
    return dataset, model, args

def train(dataset, model, args):
    model.train()
    dataloader = DataLoader(dataset, batch_size=args.batch_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    for epoch in range(args.max_epochs):
        state_h, state_c = model.init_state(args.sequence_length)
        for batch, (x, y) in enumerate(dataloader):
            optimizer.zero_grad()
            y_pred, (state_h, state_c) = model(x, (state_h, state_c))
            loss = criterion(y_pred.transpose(1, 2), y)
            state_h = state_h.detach()
            state_c = state_c.detach()
            loss.backward()
            optimizer.step()
            print({ 'epoch': epoch, 'batch': batch, 'loss': loss.item() })
        save_checkpoint(dataset, model, args)
        


def predict(dataset, model, text, next_words=100):
    model.eval()
    words = text.split(' ')
    state_h, state_c = model.init_state(len(words))
    for i in range(0, next_words):
        x = torch.tensor([[dataset.word_to_index[w] for w in words[i:]]])
        y_pred, (state_h, state_c) = model(x, (state_h, state_c))
        last_word_logits = y_pred[0][-1]
        p = torch.nn.functional.softmax(last_word_logits, dim=0).detach().numpy()
        word_index = np.random.choice(len(last_word_logits), p=p)
        words.append(dataset.index_to_word[word_index])
    return words

parser = argparse.ArgumentParser()
parser.add_argument('--max-epochs', type=int, default=50)
parser.add_argument('--batch-size', type=int, default=200)
parser.add_argument('--sequence-length', type=int, default=3)
args = parser.parse_args()
dataset = Dataset(args)
model = Model(dataset)
#load_checkpoint(dataset, model, args)
train(dataset, model, args)


# to store
torch.save({
    'state_dict': model.state_dict()
}, 'filename.pth.tar')




#print(predict(dataset, model, text='I like my slaves like'))

and to load the model with cpu

device = torch.device('cpu')
model = Model(dataset)
torch.load('filename.pth.tar', map_location=device)