Hello everyone,
I have 2 problem with my code:
-
validation AUC is high (around 70%) from the first epoch of training. I already checked the validation and train set to make sure they don’t overlap.
-
I am using torch version 2.3.0 + cuda 12.1, which I think isupdate version and I have no problem running my other codes. But, for my current code I get below error. I still get the error even if I lower the batch size to 32, but I can remove it using “torch.backends.cudnn.benchmark = True”, however idk if it affects my code in other ways.
C:\Users.…:456: UserWarning: Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at …\aten\src\ATen\native\cudnn\Conv_v8.cpp:919.)
return F.conv2d(input, weight, bias, self.stride,
Highly appreciate if anyone can help me.
here is my model:
define CNN model
class ResidualNetwork(nn.Module):
def init(self):
super().init()
# CNN
self.identity = nn.Sequential(
## BLOCK 1
nn.Conv2d(1, 8, kernel_size=(8,8)),
nn.ReLU(),
# output size: ((113-8+(2*0))/1)+1 = 106 and ((105-8+(2*0))/1)+1 = 98
nn.MaxPool2d(kernel_size=2),
#output size: ((106-2)/2)+1 = 53 and ((98-2)/2)+1 = 49
nn.BatchNorm2d(8),
## BLOCK 2
nn.Conv2d(8, 16, kernel_size=(4,4), stride=(2,2)),
nn.ReLU(),
#output size: ((53-4+(2*0))/2)+1 = 25 and ((49-4+(2*0))/2)+1 = 23
nn.MaxPool2d(kernel_size=2),
#output size: ((25-2)/2)+1 = 12 and ((23-2)/2)+1 = 11
)
# Residual layer
self.shortcut = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=(10,10), stride=(9,9), padding=(1,1)),
# output size: ((113-10+(2*1))/9)+1 = 12 and ((105-10+(2*1))/9)+1 = 11
)
# MLP
self.classification = nn.Sequential(
# Global Average Pooling layer
nn.AdaptiveAvgPool2d((1,1)),
#output size: (1,1)
nn.Flatten(),
## BLOCK 4
nn.Linear(1*1*16, 2),
)
# Initialization (use random seed for weights)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
self.initialize_weight()
# He (Kaiming) initialization method
def initialize_weight(self):
for layer in self.identity and self.shortcut and self.classification:
if isinstance(layer, nn.Linear) or isinstance(layer, nn.Conv2d):
nn.init.kaiming_uniform_(layer.weight, mode="fan_out", nonlinearity='relu')
def forward(self, x):
x_identity = self.identity(x)
x_shortcut = self.shortcut(x)
x_out = x_identity + x_shortcut
out = self.classification(x_out)
return out