I am trying to run the following code on WSL2 (Ubuntu 20.04). Cuda version 12
class CustomDataSet(Dataset):
def __init__(self, data_dir, transform = transforms.ToTensor()):
self.images=[]
self.data_dir = data_dir
labels = os.listdir(self.data_dir)
labels.sort()
self.transform = transform
for i, label in enumerate(labels):
label_dir = os.path.join(data_dir,label)
for image_name in os.listdir(label_dir):
image_path = os.path.join(label_dir,image_name)
self.images.append((image_path,i))
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_path,label = self.images[idx]
image = Image.open(img_path).convert('RGB')
if self.transform is not None:
image= self.transform(image)
return image,label
class CustomTestDataSet(Dataset):
def __init__(self, data_dir, transform = transforms.ToTensor()):
self.images=[]
self.data_dir = data_dir
labels = os.listdir(self.data_dir)
labels.sort()
self.transform = transform
for i, label in enumerate(labels):
label_dir = os.path.join(data_dir,label)
for image_name in os.listdir(label_dir):
image_path = os.path.join(label_dir,image_name)
self.images.append((image_path,i))
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_path,label = self.images[idx]
image = Image.open(img_path).convert('RGB')
if self.transform is not None:
image= self.transform(image)
return image,img_path`
train_set = CustomDataSet(data_dir= os.path.join(datapath,"train_set") )
test_set = CustomTestDataSet(data_dir= os.path.join(datapath,"test_set") )
train_set,valid_set = torch.utils.data.random_split(train_set, [0.8, 0.2], generator=torch.Generator().manual_seed(0))
cuda_device_id=0
train_loader = DataLoader(train_set,
batch_size=64,
shuffle=True,
num_workers=2,
pin_memory = True,
pin_memory_device = "cuda:%i" % cuda_device_id)
test_loader = DataLoader(test_set,
batch_size=64,
shuffle=False,
num_workers=2,
pin_memory = True,
pin_memory_device = "cuda:%i" % cuda_device_id)
valid_loader = DataLoader(valid_set,
batch_size=64,
shuffle=True,
num_workers=2,
pin_memory = True,
pin_memory_device = "cuda:%i" % cuda_device_id)
All this works fine but when I try to run the following code
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv_layers = nn.Sequential(
nn.Conv2d( 3 , 6, kernel_size = 5, stride = 1,padding = 1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=5, stride=2),
nn.Conv2d(6, 12, kernel_size=5, stride=1, padding = 1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(12, 36, kernel_size=5, stride=1, padding = 1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=5, stride=2)
)
self.fc_layers = nn.Sequential(
nn.Linear(36*4*4,120),
nn.Linear(120,84),
nn.ReLU(),
nn.Linear(84,30)
)
def forward(self, x):
x = self.conv_layers(x)
x = x.view(x.size(0), -1)
x = self.fc_layers(x)
return x
cnn = CNN()
cnn.to('cuda')
# Define the loss function
loss_function_cnn = nn.CrossEntropyLoss()
# Define the optimizer
optimizer_cnn = optim.SGD(cnn.parameters(), lr=0.01, momentum=0.9)
I get a blue screen of death nvlddmkm.sys failed in a few seconds. System thread exception not handled. I am using images from TinyImageNet30 to train the model.
I have narrowed it down to the line
cnn.to('cuda')