RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED (batch_norm)

RuntimeError Traceback (most recent call last)
in ()
15 pretrained = pretrained,
16 dataset=dataset,
—> 17 noise_rate=0.45
18 )

in run_vae(train_loader, test_loader, batch_size, epochs, z_dim, est_loader, cls_model, out_dir, select_ratio, pretrained, dataset, noise_rate)
57
58 adjust_learning_rate(optimizers[‘vae2’], epoch)
—> 59 train(epoch, model, train_loader, optimizers, device)
60
61

in train(epoch, model, train_loader, optimizers, device)
19
20 #forward
—> 21 x_hat1, n_logits1, mu1, log_var1, c_logits1, y_hat1 = vae_model1(data)
22 x_hat2, n_logits2, mu2, log_var2, c_logits2, y_hat2 = vae_model2(data)
23 #calculate acc

/home/ubuntu/anaconda3/envs/idnl/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
→ 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)

/media/ubuntu/Storage/Noisy_Labels/IDLN/mylib/models/vae.py in forward(self, x)
32 def forward(self, x):
33 ### trick 1, add a softmax function to logits
—> 34 c_logits = self.y_encoder(x)
35 y_hat = self._y_hat_reparameterize(c_logits)
36 mu, logvar = self.z_encoder(x, y_hat)

/home/ubuntu/anaconda3/envs/idnl/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
→ 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)

/media/ubuntu/Storage/Noisy_Labels/IDLN/mylib/models/resnet.py in forward(self, x, revision, output_f)
220
221 def forward(self, x, revision=False, output_f=False):
→ 222 return self._forward_impl(x,revision,output_f)
223
224

/media/ubuntu/Storage/Noisy_Labels/IDLN/mylib/models/resnet.py in _forward_impl(self, x, revision, output_f)
199 # See note [TorchScript super()]
200 x = self.conv1(x)
→ 201 x = self.bn1(x)
202 x = self.relu(x)
203 x = self.maxpool(x)

/home/ubuntu/anaconda3/envs/idnl/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
→ 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)

/home/ubuntu/anaconda3/envs/idnl/lib/python3.6/site-packages/torch/nn/modules/batchnorm.py in forward(self, input)
79 input, self.running_mean, self.running_var, self.weight, self.bias,
80 self.training or not self.track_running_stats,
—> 81 exponential_average_factor, self.eps)
82
83 def extra_repr(self):

/home/ubuntu/anaconda3/envs/idnl/lib/python3.6/site-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
1654 return torch.batch_norm(
1655 input, weight, bias, running_mean, running_var,
→ 1656 training, momentum, eps, torch.backends.cudnn.enabled
1657 )
1658

RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED

I am running this on pytorch==1.2 and I have tried both cuda 9.0 and 10.0; can’t figure out the error
@ptrblck