I am trying to evaluate my model and find that I get different results while using bs=1 and bs=2(the length of test set is odd so there shouldn’t be any problem about truncature).

I looked up other topics and still failed to solve my problem.

The dataset is MNIST in csv format, I also uploaded it to OneDrive(15MB)

Here is my code:

```
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
from sklearn.utils import shuffle
import cv2
import numpy as np
import random
import matplotlib.pyplot as plt
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#===================Building Model===================
class Flatten(nn.Module):
def __init__(self):
super(Flatten, self).__init__()
def forward(self, x):
return x.view(x.size(0), -1)
class Softmax(nn.Module):
def __init__(self):
super(Softmax, self).__init__()
def forward(self, x):
return F.log_softmax(x)
class CNN(nn.Module):
def __init__(self, sz=28, nf=64):
super(CNN, self).__init__()
self.model=nn.Sequential(*[
nn.Conv2d(1, nf, 4, 2, 1), nn.BatchNorm2d(nf), nn.LeakyReLU(0.2, True),
nn.Conv2d(nf, nf*2, 4, 2, 1), nn.BatchNorm2d(nf*2), nn.LeakyReLU(0.2, True),
nn.Conv2d(nf*2, nf*4, 3, 1, 0), nn.BatchNorm2d(nf*4), nn.LeakyReLU(0.2, True),
Flatten(), nn.Linear(5*5*nf*4, 10), Softmax(),
])
def forward(self, x):
return self.model(x)
model=CNN().to(device)
===================Loading data===================
train_data=pd.read_csv("./data/train.csv")
test_data=pd.read_csv("./data/test.csv")
X_train=train_data.values[:, 1:]
Y_train=train_data.values[:, 0:1]
X_test_all, X_train_all = np.split(X_train, [5000], axis=0)
Y_test_all, Y_train_all = np.split(Y_train, [5000], axis=0)
===================Measuring===================
def measure(model, X_test, Y_test, bs=1):
model.eval()
batch_size=bs
accuracy=0.0
for st in range(0, len(Y_test)//batch_size):
X_batch, Y_batch=X_test[st:st+batch_size], Y_test[st:st+batch_size]
model.zero_grad()
model.eval()
X_batch=X_batch.reshape([X_batch.shape[0],1]+[28,28])
X_batch=torch.tensor(X_batch).type(torch.FloatTensor)/255.0
Y_batch=torch.tensor(Y_batch).type(torch.LongTensor).squeeze(1)
X_batch, Y_batch=X_batch.to(device), Y_batch.to(device)
predict=model(X_batch)
predict=predict.max(1)[1]
accuracy+=predict.eq(Y_batch).double().sum().item()
return accuracy/len(Y_test)
print(len(Y_test_all))
print(measure(model, X_test_all, Y_test_all, 2))
print("===================================")
print(measure(model, X_test_all, Y_test_all))
```

And it turns out the accuracy is 0.0636 when batch size is set to 2, and 0.0674 when batch size is set to 1.