Hi, I am learning pytorch by building a dnn and I met an error but I don’t know how to fix it, the error and all of the code are put below. I am using python 3.7
cuda 9
in ubuntu 18.10
. Thanks a lot in advance!
Traceback (most recent call last):
File "dnn.py", line 54, in <module>
train(N, D_in, D_out, H, H2, x, y, x_test, y_test, epoch, learning_rate)
File "dnn.py", line 26, in train
y_pred = model(x)
File "/home/research/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/research/anaconda3/lib/python3.7/site-packages/torch/nn/modules/container.py", line 92, in forward
input = module(input)
File "/home/research/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/research/anaconda3/lib/python3.7/site-packages/torch/nn/modules/batchnorm.py", line 76, in forward
exponential_average_factor, self.eps)
File "/home/research/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 1623, in batch_norm
training, momentum, eps, torch.backends.cudnn.enabled
RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED
import torch
def train(N, D_in, D_out, H, H2, x, y, x_test, y_test, epoch, learning_rate):
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.Dropout(0.5),
torch.nn.BatchNorm1d(H),
torch.nn.ReLU(),
torch.nn.Linear(H, H2),
torch.nn.Dropout(0.5),
torch.nn.BatchNorm1d(H2),
torch.nn.ReLU(),
torch.nn.Linear(H2, D_out),
torch.nn.BatchNorm1d(D_out),
torch.nn.Sigmoid(),
).cuda()
x = x.cuda()
y = y.cuda()
x_test = x_test.cuda()
y_test = y_test.cuda()
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(epoch):
y_pred = model(x)
y_test_pred = model(x_test)
loss = loss_fn(y_pred, y)
test_loss = loss_fn(y_test_pred, y_test)
print(t, loss.item(), test_loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
return model
if __name__ == '__main__':
N = 64 # batch size
D_in = 100 # input dimension
D_out = 1 # output dimension
H = 128 # hidden layer 1 dimension
H2 = 128 # hidden layer 2 dimension
epoch = 500 # total epoch
learning_rate = 1e-4 # learning rate
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
x_test = torch.randn(N, D_in)
y_test = torch.randn(N, D_out)
train(N, D_in, D_out, H, H2, x, y, x_test, y_test, epoch, learning_rate)