I am new to PyTorch and trying to implement a simple image classification (binary classification, 1/0) network in PyTorch. I have already got good results (~90 %) with a TensorFlow/Tflearn implementation and trying to regenerate it in PyTorch. However I get high training accuracy but very low test accuracy in PyTorch. Below are the two implementations.
What could be the reason for the difference? Is it because of a difference in the two network implementations or training procedure, if so, what is it?
TensorFlow/Tflearn implementation that works well:
# ConvNet on stacks of grayscale images
def apply_3d_cnn_single_view(train_data, train_labels, test_data, w=100, h=100, NS=6):
train_labels = to_categorical(train_labels, nb_classes=2)
da = tflearn.data_augmentation.ImageAugmentation()
da.add_random_blur (sigma_max=1.5)
net = tflearn.input_data(shape=[None, h, w, NS], data_augmentation=da)
net = tflearn.conv_2d(net, 32, (3,3), activation='relu', regularizer="L2")
net = tflearn.conv_2d(net, 32, (3,3), activation='relu', regularizer="L2")
net = tflearn.max_pool_2d(net, 3, 2) # 50x50
net = tflearn.conv_2d(net, 64, (3,3), activation='relu', regularizer="L2")
net = tflearn.conv_2d(net, 64, (3,3), activation='relu', regularizer="L2")
net = tflearn.max_pool_2d(net, 3, 3) # 16x16
net = tflearn.conv_2d(net, 32, (3,3), activation='relu', regularizer="L2")
net = tflearn.conv_2d(net, 32, (3,3), activation='relu', regularizer="L2")
net = tflearn.max_pool_2d(net, 3, 3) # 5x5
net = tflearn.fully_connected(net, 32, activation='relu', regularizer="L2")
net = tflearn.fully_connected(net, 2, activation='softmax', regularizer="L2")
net = tflearn.regression(net, optimizer='adam', learning_rate=0.0001, loss='categorical_crossentropy')
model = tflearn.DNN(net)
model.fit(train_data, train_labels, n_epoch=100, show_metric=True, batch_size=10)
pre_labels = model.predict_label(test_data)
print 'pre_labels:', pre_labels
tf.reset_default_graph()
return category_to_class_labels(pre_labels)
PyTorch implementation, which gives low/zero test accuracy
def random_blur_img(image, sigma_max=1.5):
if bool(random.getrandbits(1)):
# Random sigma
sigma = random.uniform(0., sigma_max)
return scipy.ndimage.filters.gaussian_filter(image, sigma)
return image
class DataSet(torchdata.Dataset):
def __init__(self, data, labels, transform=None):
self.data = data
self.labels = labels
self.transform = transform
def __len__(self):
return len(self.labels)
def __getitem__(self, index):
image = self.data[index]
label = self.labels[index]
if self.transform:
image = self.transform(image)
return image, label # torch cross entropy loss requires label id (0...C-1)
# CNN Model
class CNN(nn.Module):
def __init__(self, num_channels=3, net='small'):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(num_channels, 32, kernel_size=3, padding=1, dilation=1),
#nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, padding=1, dilation=1),
#nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(3,2))
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, padding=1, dilation=1),
#nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, padding=1, dilation=1),
#nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(3,3))
self.layer3 = nn.Sequential(
nn.Conv2d(64, 32, kernel_size=3, padding=1, dilation=1),
nn.ReLU(),
#nn.BatchNorm2d(32),
nn.Conv2d(32, 32, kernel_size=3, padding=1, dilation=1),
#nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(3,3))
self.fc1 = nn.Linear(5*5*32, 32)
self.fc2 = nn.Linear(32, 2)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.fc2(out)
return out
def train_net(self, train_data, train_labels, lrate=0.001, epochs=100, batch_size=10, shuffle=True):
self.cuda()
self.train()
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(self.parameters(), lr=lrate)
dataset = DataSet(train_data, train_labels, transform=random_blur_img)
data_loader = torchdata.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=1)
timer = Timer()
for epoch in range(epochs):
timer.tic()
correct = 0 # number of correct predictions
epoch_loss = 0.0 # total loss for the current epoch
for i,data in enumerate(data_loader):
batch,labels = data
inputs = Variable(batch.float().cuda())
vlabels = Variable(labels.cuda())
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = self.forward(inputs)
loss = criterion(outputs, vlabels)
loss.backward()
optimizer.step()
predicted = outputs.data.max(1)[1] # get the index of the max log-probability
correct += predicted.cpu().eq(labels.cpu()).sum()
epoch_loss += loss.data[0]
cls_accuracy = 100*(float(correct)/len(dataset))
runtime = timer.toc()
print('Epoch %d accuracy : %.2f loss : %f \t time : %f'%(epoch, cls_accuracy, epoch_loss, runtime))
def test_net(self, test_data, test_labels, batch_size=10):
self.cuda()
self.eval()
dataset = DataSet(test_data, test_labels)
data_loader = torchdata.DataLoader(dataset, batch_size=batch_size, num_workers=1)
pre_labels = []
for i,data in enumerate(data_loader):
batch,labels = data
inputs = Variable(batch.float().cuda())
outputs = self.forward(inputs)
predicted = outputs.data.max(1)[1] # get the index of the max log-probability
pre_labels += list(predicted.cpu().numpy().flatten())
return pre_labels
def apply_3d_cnn(train_data, train_labels, test_data, test_labels, w=100, h=100, NS=6):
network = CNN(num_channels=6)
timer = Timer()
timer.tic()
network.train_net(train_data, train_labels, lrate=0.0001, epochs=100, batch_size=10, shuffle=True)
runtime = timer.toc()
print('Training time: %f'%(runtime))
timer.tic()
pre_labels = network.test_net(test_data, test_labels, batch_size=10)
runtime = timer.toc()
print('Test time: %f'%(runtime))
report_class_performance(test_labels, pre_labels)
Notes:
- It was hard to indent the code here; you can assume the indentation is correct.
- PyTorch network converges very quickly, 100 % accuracy on training set in 30 epochs, whereas TF reaches ~97 % in 30 epochs and never reaches exactly 100 %.
- I run trained PyTorch network on the training set and get 100 % (to check if there is a problem in evaluation)
- I am aware of the different input formats (WxHxC in TF, CxWxH in PyTorch) and prepare the input data accordingly.
Is there a difference between the two network implementations and training procedure that may cause the problem?