Hi, I recently tried some code using Resnet50 on MNIST, however I only got 93% while other results on the web said that they got to around 99%, can anyone check my code to see if there is anything wrong ? Thank you very much.
Here is my code:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)Input data files are available in the read-only “…/input/” directory
For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk(‘/kaggle/input’):
for filename in filenames:
print(os.path.join(dirname, filename))You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using “Save & Run All”
You can also write temporary files to /kaggle/temp/, but they won’t be saved outside of the current session
In[22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pltimport torch
import torch.nn as nn
from torch.autograd import Variableimport torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrixfrom pdb import set_trace
In[23]:
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
deviceIn[24]:
train_augment_transforms = transforms.Compose([
transforms.RandomHorizontalFlip(0.5),transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
transforms.ToTensor(),
transforms.RandomErasing(),
])
#train_set = torchvision.datasets.FashionMNIST(“./data”, download=True, transform=
train_augment_transforms)
#test_set = torchvision.datasets.FashionMNIST(“./data”, download=True, train=False, transform=
transforms.Compose([transforms.ToTensor()]))
train_set = torchvision.datasets.MNIST(“./data”, download=True, train=True, transform=
train_augment_transforms)
test_set = torchvision.datasets.MNIST(“./data”, download=True, train=False, transform=
transforms.Compose([transforms.ToTensor()]))train_loader = torch.utils.data.DataLoader(train_set,
batch_size=8192)
test_loader = torch.utils.data.DataLoader(test_set,
batch_size=8192)In[30]:
def output_label(label):
output_mapping = {
0: “T-shirt/Top”,
1: “Trouser”,
2: “Pullover”,
3: “Dress”,
4: “Coat”,
5: “Sandal”,
6: “Shirt”,
7: “Sneaker”,
8: “Bag”,
9: “Ankle Boot”
}
input = (label.item() if type(label) == torch.Tensor else label)
return output_mapping[input]#demo_loader = torch.utils.data.DataLoader(train_set, batch_size=10)
#batch = next(iter(demo_loader))
#images, labels = batch#grid = torchvision.utils.make_grid(images, nrow=10)
#plt.figure(figsize=(15, 20))
#plt.imshow(np.transpose(grid, (1, 2, 0)))
#print("labels: “, end=” ")
#for i, label in enumerate(labels):print(output_label(label), end=", ")
In[31]:
def plot_grad_flow(named_parameters):
ave_grads = []
layers = []
for n, p in named_parameters:
if(p.requires_grad) and (“bias” not in n):
layers.append(n)
ave_grads.append(p.grad.abs().mean().detach().cpu().numpy())
plt.plot(ave_grads, alpha=0.3, color=“b”)
plt.hlines(0, 0, len(ave_grads)+1, linewidth=1, color=“k” )
plt.xticks(range(0,len(ave_grads), 1), layers, rotation=“vertical”)
plt.xlim(xmin=0, xmax=len(ave_grads))
plt.xlabel(“Layers”)
plt.ylabel(“average gradient”)
plt.title(“Gradient flow”)
plt.grid(True)In[32]:
class BasicConv2d(nn.Module):
def __init__(self, input_channels, output_channels, **kwargs): super().__init__() self.conv = nn.Conv2d(input_channels, output_channels, bias=False, **kwargs) self.bn = nn.BatchNorm2d(output_channels) self.relu = nn.ReLU(inplace=True) self.pool = nn.MaxPool2d(kernel_size=2, stride=2) # set_trace() def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) x = self.pool(x) return x
class Flatten(nn.Module):
def forward(self, input):
return input.view(input.size(0), -1)def multi_acc(y_pred, y_test):
try:
y_pred_softmax = torch.log_softmax(y_pred, dim = 0)
except:
set_trace()
_, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
try:
correct_pred = (y_pred_tags == y_test).float()
except:
set_trace()
acc = correct_pred.sum() / len(correct_pred)
acc = torch.round(acc * 100)
return accdef make_train_step(model, model_name, loss_fn, optimizer):
“”" Function to make one training step
“”"
def perform_train_step(X_train_batch, y_train_batch):
model.train()
# Finetuning Torchvision Models — PyTorch Tutorials 1.2.0 documentation
if model_name == “inception_v3”:
y_train_pred, aux_outputs = model(X_train_batch)
y_train_pred = y_train_pred.squeeze()
aux_outputs = aux_outputs.squeeze()
train_loss_1 = loss_fn(y_train_pred, y_train_batch)
train_loss_2 = loss_fn(aux_outputs, y_train_batch)
train_loss = train_loss_1 + 0.4*train_loss_2
else:
y_train_pred = model(X_train_batch).squeeze()
train_loss = loss_fn(y_train_pred, y_train_batch)train_acc = multi_acc(y_train_pred, y_train_batch) train_loss.backward() plot_grad_flow(model.named_parameters()) optimizer.step() optimizer.zero_grad() return (train_acc.item(), train_loss.item()) return perform_train_step
def make_val_step(model, model_name, loss_fn, optimizer):
“”" Function to make one validation step
“”"
def perform_val_step(X_val_batch, y_val_batch):
model.eval()
y_val_pred = model(X_val_batch).squeeze()
val_acc = multi_acc(y_val_pred, y_val_batch)
val_loss = loss_fn(y_val_pred, y_val_batch)
return (val_acc.item(), val_loss.item())return perform_val_step
def mini_batch(device, data_loader, step_fn):
“”" Function to run through a mini-batch (train or validation)
“”"
mini_batch_acc_list = []
mini_batch_loss_list = []
for (X_batch, y_batch) in data_loader:
X_batch = X_batch.to(device)
y_batch = y_batch.to(device)(mini_batch_acc, mini_batch_loss) = step_fn(X_batch, y_batch) mini_batch_acc_list.append(mini_batch_acc) mini_batch_loss_list.append(mini_batch_loss) loss = np.mean(mini_batch_loss_list) acc = np.mean(mini_batch_acc_list)
set_trace()
return (acc, loss)
import torch
import torch.nn as nn
import torch.nn.functional as Fclass Bottleneck(nn.Module):
expansion = 4
def init(self, in_channels, out_channels, i_downsample=None, stride=1):
super(Bottleneck, self).init()self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) self.batch_norm1 = nn.BatchNorm2d(out_channels) self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1) self.batch_norm2 = nn.BatchNorm2d(out_channels) self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0) self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion) self.i_downsample = i_downsample self.stride = stride self.relu = nn.ReLU() def forward(self, x): identity = x.clone() x = self.relu(self.batch_norm1(self.conv1(x))) x = self.relu(self.batch_norm2(self.conv2(x))) x = self.conv3(x) x = self.batch_norm3(x) #downsample if needed if self.i_downsample is not None: identity = self.i_downsample(identity) #add identity x+=identity x=self.relu(x) return x
class Block(nn.Module):
expansion = 1
def init(self, in_channels, out_channels, i_downsample=None, stride=1):
super(Block, self).init()self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False) self.batch_norm1 = nn.BatchNorm2d(out_channels) self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=stride, bias=False) self.batch_norm2 = nn.BatchNorm2d(out_channels) self.i_downsample = i_downsample self.stride = stride self.relu = nn.ReLU() def forward(self, x): identity = x.clone() x = self.relu(self.batch_norm2(self.conv1(x))) x = self.batch_norm2(self.conv2(x)) if self.i_downsample is not None: identity = self.i_downsample(identity) print(x.shape) print(identity.shape) x += identity x = self.relu(x) return x
class ResNet(nn.Module):
def init(self, ResBlock, layer_list, num_classes, num_channels=3):
super(ResNet, self).init()
self.in_channels = 64self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) self.batch_norm1 = nn.BatchNorm2d(64) self.relu = nn.ReLU() self.max_pool = nn.MaxPool2d(kernel_size = 3, stride=2, padding=1) self.layer1 = self._make_layer(ResBlock, layer_list[0], planes=64) self.layer2 = self._make_layer(ResBlock, layer_list[1], planes=128, stride=2) self.layer3 = self._make_layer(ResBlock, layer_list[2], planes=256, stride=2) self.layer4 = self._make_layer(ResBlock, layer_list[3], planes=512, stride=2) self.avgpool = nn.AdaptiveAvgPool2d((1,1)) self.fc = nn.Linear(512*ResBlock.expansion, num_classes) def forward(self, x): x = self.relu(self.batch_norm1(self.conv1(x))) x = self.max_pool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = x.reshape(x.shape[0], -1) x = self.fc(x) return x def _make_layer(self, ResBlock, blocks, planes, stride=1): ii_downsample = None layers = [] if stride != 1 or self.in_channels != planes*ResBlock.expansion: ii_downsample = nn.Sequential( nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=stride), nn.BatchNorm2d(planes*ResBlock.expansion) ) layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride)) self.in_channels = planes*ResBlock.expansion for i in range(blocks-1): layers.append(ResBlock(self.in_channels, planes)) return nn.Sequential(*layers)
def ResNet50(num_classes, channels=3):
return ResNet(Bottleneck, [3,4,6,3], num_classes, channels)model = ResNet50(10, channels=1)
model.to(device)
error = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print(model)
num_epochs = 300
train_step_fn = make_train_step(model, “FashionCNN”, error, optimizer)
val_step_fn = make_val_step(model, “FashionCNN”, error, optimizer)def perform_training(model, train_loader, val_loader, num_epochs = 20):
count = 0
accuracy_stats = {
‘train’: [],
“val”: []
}
loss_stats = {
‘train’: [],
“val”: []
}for epoch in range(num_epochs): (train_epoch_acc, train_epoch_loss) = mini_batch(device, train_loader, train_step_fn) with torch.no_grad(): (val_epoch_acc, val_epoch_loss) = mini_batch(device, val_loader, val_step_fn) loss_stats['val'].append(val_epoch_loss) accuracy_stats['val'].append(val_epoch_acc) print("Epoch: %d, train loss: %f, val loss: %f, train acc: %f, val acc: %f" % (epoch, train_epoch_loss, val_epoch_loss, train_epoch_acc, val_epoch_acc))
perform_training(model, train_loader, test_loader, num_epochs=num_epochs)