Hi, I have only just moved to GPU usage and encountered many issues. Luckily most of them had solutions on this forum but I can’t seem to get past this one.
Traceback (most recent call last):
File “S:\Desktop Folders\Bots\main (2).py”, line 158, in
loss.backward()
File “C:\Users\AppData\Local\Programs\Python\Python39\lib\site-packages\torch_tensor.py”, line 307, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File “C:\Users\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\autograd_init_.py”, line 154, in backward
Variable._execution_engine.run_backward(
RuntimeError: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR
You can try to repro this exception using the following code snippet. If that doesn’t trigger the error, please include your original repro script when reporting this issue.
import torch
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.allow_tf32 = True
data = torch.randn([2, 16, 30, 30], dtype=torch.float, device=‘cuda’, requires_grad=True)
net = torch.nn.Conv2d(16, 32, kernel_size=[5, 5], padding=[0, 0], stride=[1, 1], dilation=[1, 1], groups=1)
net = net.cuda().float()
out = net(data)
out.backward(torch.randn_like(out))
torch.cuda.synchronize()
ConvolutionParams
data_type = CUDNN_DATA_FLOAT
padding = [0, 0, 0]
stride = [1, 1, 0]
dilation = [1, 1, 0]
groups = 1
deterministic = false
allow_tf32 = true
input: TensorDescriptor 000001F1F3026AD0
type = CUDNN_DATA_FLOAT
nbDims = 4
dimA = 2, 16, 30, 30,
strideA = 14400, 900, 30, 1,
output: TensorDescriptor 000001F1F3026B40
type = CUDNN_DATA_FLOAT
nbDims = 4
dimA = 2, 32, 26, 26,
strideA = 21632, 676, 26, 1,
weight: FilterDescriptor 000001F1E025EA20
type = CUDNN_DATA_FLOAT
tensor_format = CUDNN_TENSOR_NCHW
nbDims = 4
dimA = 32, 16, 5, 5,
Pointer addresses:
input: 000000080F63EE00
output: 000000080F71FE00
weight: 000000080F693400
The code above does not reproduce the error so I can post my code if that is useful.
class CNN(nn.Module):
def init(self):
super(CNN, self).init()
self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=1)
self.fc1 = nn.Linear(288, 2)
self.mp1 = nn.MaxPool2d(kernel_size=2)
self.relu = nn.ReLU()
#self.dropout = nn.Dropout(p=0.6)
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.mp1(x)
x = self.conv2(x)
x = self.relu(x)
x = self.mp1(x)
x = self.mp1(x)
x = self.mp1(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
return x
train_loader = DataLoader(
dataset=train,
shuffle=True,
batch_size=batsize,
drop_last=False,
num_workers=1,
)
val_loader = DataLoader(
dataset=test,
shuffle=True,
batch_size=batsize,
drop_last=False,
num_workers=1,
)
torch.backends.cudnn.benchmark = True
model = CNN().to(dev)
model.dropout = nn.Dropout(p=drop)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss().to(dev)
train_losses, test_losses = [] , []
train_acc, test_acc = [] , []
epoch_data = {}
epochs = 100
for e in range(epochs):
model = model.train()
for batch in train_loader:
X, y = batch[0], batch[1].to(dev)
X = torch.permute(X, (0, 3, 2, 1))
X = X.to(torch.float).to(dev)
pred = model(X)
loss = criterion(pred, y)
train_acc.append(calc_accuracy(pred, y))
train_losses.append(loss.item())
loss.backward()
optimizer.step()
model = model.eval()
for batch in val_loader:
X, y = batch[0], batch[1].to(dev)
X = torch.permute(X, (0, 3, 2, 1))
X = X.to(torch.float).to(dev)
pred = model(X)
loss = criterion(pred, y)
test_acc.append(calc_accuracy(pred, y))
test_losses.append(loss.item())
Also maybe worth mentioning that the NN works fine with CPU.
Thank you in advance.