I’m trying to train my model, and although it’s working, I’m getting the warning:
TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes:
%input.22 : Float(100:210, 210:1, requires_grad=1, device=cuda:0) = aten::dropout(%input.21, %261, %262) # /home/wilson/anaconda3/envs/cv/lib/python3.6/site-packages/torch/nn/functional.py:983:0
%input.27 : Float(100:52, 52:1, requires_grad=1, device=cuda:0) = aten::dropout(%input.26, %274, %275) # /home/wilson/anaconda3/envs/cv/lib/python3.6/site-packages/torch/nn/functional.py:983:0
This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace()
_module_class,
/home/wilson/anaconda3/envs/cv/lib/python3.6/site-packages/torch/jit/_trace.py:966: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error:
With rtol=1e-05 and atol=1e-05, found 698 element(s) (out of 700) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 0.040440917015075684 (-1.8279550075531006 vs. -1.787514090538025), which occurred at index (93, 1).
_module_class,
To fix it, I’ve already tried model.eval()
(recommended by other similar posts), although not sure why I’d set it to evaluation model during training.
My model:
class Net(nn.Module):
'''
Neural network for binary classification with 4 classes
Params:
batch_size: -
'''
def __init__(self, batch_size):
super(Net, self).__init__()
# Convolutional Layers
stride = 2
padding = 5
image_size = 720
conv_kernel_size = 5
pool_kernel_size = 2
num_kernels = [3,
15, 30, 45,
60, 75, 90,
105, 120, 135, 150]
output_size = 7
conv1_shape = self.conv_layer_shape("conv1_shape", batch_size, image_size, num_kernels[1], conv_kernel_size, padding, stride)
conv2_shape = self.conv_layer_shape("conv2_shape", batch_size, conv1_shape[2], num_kernels[2], conv_kernel_size, padding, stride)
conv3_shape = self.conv_layer_shape("conv3_shape", batch_size, conv2_shape[2], num_kernels[3], conv_kernel_size, padding, stride)
conv4_shape = self.conv_layer_shape("conv4_shape", batch_size, conv3_shape[2], num_kernels[4], conv_kernel_size, padding, stride)
pool4_shape = self.pool_layer_shape('pool4_shape', batch_size, conv4_shape[2], num_kernels[4], pool_kernel_size, stride)
conv5_shape = self.conv_layer_shape("conv5_shape", batch_size, pool4_shape[2], num_kernels[5], conv_kernel_size, padding, stride)
pool5_shape = self.pool_layer_shape('pool5_shape', batch_size, conv5_shape[2], num_kernels[5], pool_kernel_size, stride)
conv6_shape = self.conv_layer_shape("conv6_shape", batch_size, pool5_shape[2], num_kernels[6], conv_kernel_size, padding, stride)
pool6_shape = self.pool_layer_shape('pool6_shape', batch_size, conv6_shape[2], num_kernels[6], pool_kernel_size, stride)
conv7_shape = self.conv_layer_shape("conv7_shape", batch_size, pool6_shape[2], num_kernels[7], conv_kernel_size, padding, stride)
pool7_shape = self.pool_layer_shape('pool7_shape', batch_size, conv7_shape[2], num_kernels[7], pool_kernel_size, stride)
self.conv1 = nn.Conv2d(num_kernels[0], num_kernels[1], conv_kernel_size, stride, padding)
self.conv2 = nn.Conv2d(num_kernels[1], num_kernels[2], conv_kernel_size, stride, padding)
self.conv3 = nn.Conv2d(num_kernels[2], num_kernels[3], conv_kernel_size, stride, padding)
self.conv4 = nn.Conv2d(num_kernels[3], num_kernels[4], conv_kernel_size, stride, padding)
self.conv5 = nn.Conv2d(num_kernels[4], num_kernels[5], conv_kernel_size, stride, padding)
self.conv6 = nn.Conv2d(num_kernels[5], num_kernels[6], conv_kernel_size, stride, padding)
self.conv7 = nn.Conv2d(num_kernels[6], num_kernels[7], conv_kernel_size, stride, padding)
self.max_pool = nn.MaxPool2d(pool_kernel_size, stride, padding=0)
# Fully Connected Layers
fc1_size = pool7_shape[1] * pool7_shape[2] * pool7_shape[3]
print('fc1_size:', fc1_size)
fc2_size = fc1_size // 2
print('fc2_size:', fc2_size)
fc3_size = fc2_size // 2
print('fc3_size:', fc3_size)
fc4_size = fc3_size // 2
print('fc4_size:', fc4_size)
fc5_size = fc4_size // 2
print('fc5_size:', fc5_size)
print('output_size:', output_size)
self.fc1 = nn.Linear(fc1_size, fc2_size)
self.fc2 = nn.Linear(fc2_size, fc3_size)
self.fc3 = nn.Linear(fc3_size, fc4_size)
self.fc4 = nn.Linear(fc4_size, fc5_size)
self.fc5 = nn.Linear(fc5_size, output_size)
def conv_layer_shape(self, layer_name, batch_size, w_in, num_filters, kernel_size, padding, stride):
'''
Returns shape of a convolutional layer
Parameters:
layer_name: Name of layer
batch_size: Batch size
w_in: Width/Height of Previous Layer
num_filters: Number of Filters
kernel_size: Filter/Kernel Size
padding: Padding
stride: Stride
Returns:
shape: Shape of convolutional layer
'''
w_out = int((w_in - kernel_size + 2*padding)/stride + 1)
shape = (batch_size, num_filters, w_out, w_out)
print('{}: {}'.format(layer_name, shape))
return shape
def pool_layer_shape(self, layer_name, batch_size, w_in, num_filters, kernel_size, stride):
'''
Returns shape of a pooling layer
Params:
batch_size: Batch size
w_in: Width/Height of previous layer
num_filters: Number of filers
kernel_size: Filter/Kernel size
stride: Stride
Returns:
shape: Shape of pooling layer
'''
w_out = int((w_in * (kernel_size-1) - 1) / stride)
shape = (batch_size, num_filters, w_out, w_out)
print('{}: {}'.format(layer_name, shape))
return shape
def forward(self, x):
'''
Feed-forward
Params:
x: Batch of images
'''
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = self.max_pool(F.relu(self.conv4(x)))
x = self.max_pool(F.relu(self.conv5(x)))
x = self.max_pool(F.relu(self.conv6(x)))
x = self.max_pool(F.relu(self.conv7(x)))
x = x.view(x.size(0), -1)
x = F.dropout(F.relu(self.fc1(x)))
x = F.relu(self.fc2(x))
x = F.dropout(F.relu(self.fc3(x)))
x = F.relu(self.fc4(x))
x = self.fc5(x)
x = F.log_softmax(x, dim=1)
return x
Edit:
The warning has disappeared after removing writer.add_graph(net, images)
from SummaryWritter (TensorBoard)
How can I add a graph to TensorBoard and remove these warnings?