Getting a blue screen error "system thread exception not handled"

I am trying to run the following code on WSL2 (Ubuntu 20.04). Cuda version 12

class CustomDataSet(Dataset):
    
    def __init__(self, data_dir, transform = transforms.ToTensor()):
        self.images=[]
        self.data_dir = data_dir
        labels = os.listdir(self.data_dir)
        labels.sort()
        self.transform = transform
        
        for i, label in enumerate(labels):
            label_dir = os.path.join(data_dir,label)
            for image_name in os.listdir(label_dir):
                image_path = os.path.join(label_dir,image_name)
                self.images.append((image_path,i))
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path,label = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform is not None:
            image= self.transform(image)
        
        return image,label

class CustomTestDataSet(Dataset):
 
    def __init__(self, data_dir, transform = transforms.ToTensor()):
        self.images=[]
        self.data_dir = data_dir
        labels = os.listdir(self.data_dir)
        labels.sort()
        self.transform = transform
        
        for i, label in enumerate(labels):
            label_dir = os.path.join(data_dir,label)
            for image_name in os.listdir(label_dir):
                image_path = os.path.join(label_dir,image_name)
                self.images.append((image_path,i))
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path,label = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform is not None:
            image= self.transform(image)
        
        return image,img_path`
train_set = CustomDataSet(data_dir= os.path.join(datapath,"train_set") )


test_set = CustomTestDataSet(data_dir= os.path.join(datapath,"test_set") )

train_set,valid_set = torch.utils.data.random_split(train_set, [0.8, 0.2], generator=torch.Generator().manual_seed(0))

cuda_device_id=0



train_loader = DataLoader(train_set,
                          batch_size=64,
                          shuffle=True,
                          num_workers=2,
                          pin_memory = True,
                          pin_memory_device = "cuda:%i" % cuda_device_id)

test_loader = DataLoader(test_set,
                         batch_size=64,
                         shuffle=False,
                         num_workers=2,
                         pin_memory = True,
                         pin_memory_device = "cuda:%i" % cuda_device_id)

valid_loader = DataLoader(valid_set,
                         batch_size=64,
                         shuffle=True,
                         num_workers=2,
                         pin_memory = True,
                         pin_memory_device = "cuda:%i" % cuda_device_id)

All this works fine but when I try to run the following code

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d( 3 , 6, kernel_size = 5, stride = 1,padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=5, stride=2),
            nn.Conv2d(6, 12, kernel_size=5, stride=1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(12, 36, kernel_size=5, stride=1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=5, stride=2)
        )

        self.fc_layers = nn.Sequential(    
            nn.Linear(36*4*4,120),
            nn.Linear(120,84),
            nn.ReLU(),
            nn.Linear(84,30)

        )


    def forward(self, x):
      x = self.conv_layers(x)
      x = x.view(x.size(0), -1)
      x = self.fc_layers(x)
      return x


cnn = CNN()
cnn.to('cuda')
# Define the loss function
loss_function_cnn = nn.CrossEntropyLoss()

# Define the optimizer
optimizer_cnn = optim.SGD(cnn.parameters(), lr=0.01, momentum=0.9)

I get a blue screen of death nvlddmkm.sys failed in a few seconds. System thread exception not handled. I am using images from TinyImageNet30 to train the model.

I have narrowed it down to the line

cnn.to('cuda')

Found the solution, the “latest” Nvidia drivers I had had a bug, and downloading a hotfix from the Nvidia forums fixed the issue