Simple autoencoder, CUDA illegal memory access

Hello !
Here is my simple autoencoder code :
It seems to work well on my laptop, without GPU acceleration. However when I run it on a computer (remotely using SSH) with a RTX 2080, I get an error

File “autoencoder.py”, line 87, in
batch = batch.to(device)
RuntimeError: CUDA error: an illegal memory access was encountered

from pathlib import Path
import os
import torch
from torchvision.utils import make_grid
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import normalize
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import datetime


from datamaestro import prepare_dataset
ds = prepare_dataset("com.lecun.mnist");
train_images, train_labels = ds.train.images.data(), ds.train.labels.data()
test_images, test_labels =  ds.test.images.data(), ds.test.labels.data()


writer = SummaryWriter("runs/runs"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))




savepath = Path("model.pch")


#dataset


class MNISTDataset(Dataset):
    def __init__(self, data, label):
        super().__init__()
        self.data, self.label = data, label
        self.data.reshape(data.shape[0],-1)
        self.data = self.data/np.max(self.data)
        self.data = torch.tensor(data).reshape(data.shape[0], 784).float()
        self.label = torch.tensor(label)
        
    def __getitem__(self, index):
        return self.data[index], self.label[index]
    def __len__(self):
        return len(self.data)
    
    

class AutoEncoder(torch.nn.Module):
    def __init__(self, size_in = 784, size_out = 392):
        super().__init__()
        self.encoder = nn.Linear(size_in, size_out, bias = False)

    def forward(self, x):
        x = self.encoder(x)
        x = F.linear(x, self.encoder.weight.t()) #decoder
        return x
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
    print('CUDA is available')
autoencod = AutoEncoder()
eps = 10e-6
BS = 50
nb_epochs = 1000

dataset = MNISTDataset(train_images, train_labels)

data_train = DataLoader(dataset, shuffle = True, batch_size = BS)

optimizer = torch.optim.SGD(params = autoencod.parameters(), lr = eps)
loss = torch.nn.MSELoss()

autoencod = autoencod.to(device)

for epoch in range(nb_epochs):
    for batch, labels in data_train:
        
        batch = batch.to(device)
        encod_decod = autoencod(batch.to(device))

        l = loss(encod_decod, batch)
        l.backward()


        optimizer.step()
        optimizer.zero_grad()
        
    with torch.no_grad():
        t_l = loss(dataset.data, autoencod(dataset.data))
        print(t_l)
        writer.add_scalar(' AutoEncoder MCELoss train :', t_l, epoch)

Thanks for any help !

Hi, I am not sure, but probably here you don’t need to put batch on device twice.

Also at this line your model (autoencod) is on device, but data is not

1 Like

Thank you so much @Alexey_Demyanchuk !

1 Like