I am running into a strange cudnn
error when trying to profile a network with nvprof
.
The model runs fine without the profiler, but when I add the profiler with nvprof --metrics flop_count_sp,flop_count_hp --log-file mylog.log python main.py
I get the following error:
Traceback (most recent call last):
File "main.py", line 285, in <module>
main()
File "main.py", line 272, in main
loss = train(epoch, train_loader, optimizer, criterion, train_size, args)
File "main.py", line 100, in train
out_subsite, out_laterality, out_behavior, out_histology, out_grade = model(sentence)
File "/home/ygx/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/ygx/support/mtcnn/hongjunsynth/model.py", line 138, in forward
conv_results.append(self.convblock1(x).view(-1, self.num_filters1))
File "/home/ygx/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/ygx/anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py", line 91, in forward
input = module(input)
File "/home/ygx/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/ygx/anaconda3/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 176, in forward
self.padding, self.dilation, self.groups)
RuntimeError: CUDNN_STATUS_NOT_SUPPORTED. This error may appear if you passed in a non-contiguous input.
Here is a quick sanity check for the model:
import torch
import torch.nn as nn
class MTCNN(nn.Module):
def __init__(self, kernel1=3, kernel2=4, kernel3=5, kernel4=6, num_filters1=50,
num_filters2=50, num_filters3=50, num_filters4=50, dropout1=0.5, dropout2=0.5, dropout3=0.5,
dropout4=0.0, max_sent_len=3000, word_dim=256, vocab_size=35095, subsite_size=34, #35093
laterality_size=4, behavior_size=3, grade_size=5, histology_size=44, alt_model_type=None):
super(MTCNN, self).__init__()
"""Multi-task CNN"""
self.kernel1 = kernel1
self.kernel2 = kernel2
self.kernel3 = kernel3
self.kernel4 = kernel4
self.num_filters1 = num_filters1
self.num_filters2 = num_filters2
self.num_filters3 = num_filters3
self.num_filters4 = num_filters4
self.max_sent_len = max_sent_len
self.dropout1 = dropout1
self.dropout2 = dropout2
self.dropout3 = dropout3
self.dropout4 = dropout4
self.word_dim = word_dim
self.vocab_size = vocab_size
self.subsite_size = subsite_size
self.laterality_size = laterality_size
self.histology_size = histology_size
self.behavior_size = behavior_size
self.grade_size = grade_size
self.alt_model_type = alt_model_type
self._filter_sum = None
self._sum_filters()
self.embedding = nn.Embedding(self.vocab_size + 2, self.word_dim, padding_idx=0)
#self.embedding.weight.data.copy_(torch.from_numpy(self.wv_matrix))
if self.alt_model_type == 'static':
self.embedding.weight.requires_grad = False
elif self.alt_model_type == 'multichannel':
self.embedding2 = nn.Embedding(self.vocab_size + 100, self.word_dim, padding_idx=self.vocab_size + 1)
self.embedding2.weight.data.copy_(torch.from_numpy(self.wv_matrix))
self.embedding2.weight.requires_grad = False
self.IN_CHANNEL = 2
self.convblock1 = nn.Sequential(
nn.Conv1d(1, self.num_filters1, self.kernel1),
nn.ReLU(),
nn.AdaptiveMaxPool1d(1),
nn.Dropout(p=self.dropout1)
)
self.convblock2 = nn.Sequential(
nn.Conv1d(1, self.num_filters2, self.kernel2),
nn.ReLU(),
nn.AdaptiveMaxPool1d(1),
nn.Dropout(p=self.dropout2)
)
self.convblock3 = nn.Sequential(
nn.Conv1d(1, self.num_filters3, self.kernel3),
nn.ReLU(),
nn.AdaptiveMaxPool1d(1),
nn.Dropout(p=self.dropout3)
)
self.convblock4 = nn.Sequential(
nn.Conv1d(1, self.num_filters4, self.kernel4),
nn.ReLU(),
nn.AdaptiveMaxPool1d(1),
nn.Dropout(p=self.dropout4)
)
self.fc1 = nn.Linear(self._filter_sum, self.subsite_size)
self.fc2 = nn.Linear(self._filter_sum, self.laterality_size)
self.fc3 = nn.Linear(self._filter_sum, self.behavior_size)
self.fc4 = nn.Linear(self._filter_sum, self.histology_size)
self.fc5 = nn.Linear(self._filter_sum, self.grade_size)
def _sum_filters(self):
"""Get the total number of convolutional filters."""
self._filter_sum = self.num_filters1 + self.num_filters2 + self.num_filters3 + self.num_filters4
def forward(self, x):
#x = self.embedding(x).view(-1, 1, 3000)
x = self.embedding(x).view(-1, 1, self.word_dim * self.max_sent_len)
if self.alt_model_type == "multichannel":
x2 = self.embedding2(x).view(-1, 1, self.word_dim * self.max_sent_len)
x = torch.cat((x, x2), 1)
conv_results = []
conv_results.append(self.convblock1(x).view(-1, self.num_filters1))
conv_results.append(self.convblock2(x).view(-1, self.num_filters2))
conv_results.append(self.convblock3(x).view(-1, self.num_filters3))
conv_results.append(self.convblock4(x).view(-1, self.num_filters4))
x = torch.cat(conv_results, 1)
out_subsite = self.fc1(x)
out_laterality = self.fc2(x)
out_behavior = self.fc3(x)
out_histology = self.fc4(x)
out_grade = self.fc5(x)
return out_subsite, out_laterality, out_behavior, out_histology, out_grade
model = MTCNN()
model = model.cuda().half()
x = torch.arange(0, 3000, dtype=torch.long).cuda()
model(x)
Has anyone run into cudnn
errors when running nvidia’s profiler?
Here is my setup:
pytorch version: 0.4.0
cudnn version: 7102
cuda 9.0