I am trying to train a very simple network on CIFAR10 Dataset

```
class Net(nn.Module):
def __init__(self,filters):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, filters, 5,padding=2)
self.fc1 = nn.Linear(filters * 16 * 16, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = self.fc1(x)
return x
def num_flat_features(self, x):
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features
def train_random_model(filter,learning_rate,epochs,modelname,dataloader):
net = Net(filter).cuda()
net.train()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0)
for epoch in range(epochs):
running_loss = 0.0
correct = 0
total = 0
for key, value in dataloader.items():
inputs1, labels1 = value
inputs, labels = Variable(inputs1.cuda()), Variable(labels1.cuda())
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.data[0]
_, predicted = torch.max(outputs.data, 1)
total += labels1.size(0)
labels1=labels1.cuda()
corr_pred= (predicted == labels1).sum()
correct += corr_pred
trainerror=100.0 * (total-correct) / total
print('Finished Training',epoch,"Train Error",trainerror)
if(trainerror<=1.0):
break
print("Saving model at",modelname)
torch.save(net,modelname)
print("Sanity Check")
correct=0
total=0
net.eval()
for key, value in dataloader.items():
inputs1, labels1 = value
inputs, labels = Variable(inputs1.cuda()), Variable(labels1.cuda())
outputs = net(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
labels1=labels1.cuda()
correct += (predicted == labels1).sum()
trainerror=100.0 * (total-correct) / total
print("Train Error",trainerror,count,total,correct)
```

Unfortunately final train error printed while training is very different from train error computed while evaluating.

Do you have any idea what might be causing this ?

I understand Batch Normalization typically causes these kind of errors but i am not using that layer.