Hey, I’ve been trying to train a model for the GTSRB street signs dataset, I tried to improve the accuracy but so far only getting 5.6% even with epoch = 100
Here’s my code after importing everything I need:
train_transform_list = [
transforms.RandomHorizontalFlip(),
transforms.Resize([30,30]),
transforms.ToTensor()
]
test_transform_list = [
transforms.Resize([30,30]),
transforms.ToTensor()
]
train_transform = transforms.Compose(train_transform_list)
test_transform = transforms.Compose(test_transform_list)
# Create training set and define training dataloader
## YOUR CODE HERE ##
train_set = torchvision.datasets.GTSRB(root="./data/train_images_NEW", download=True, transform=train_transform)
trainloader = torch.utils.data.DataLoader(train_set, batch_size = 250)
# Create test set and define test dataloader
## YOUR CODE HERE ##
test_set = torchvision.datasets.GTSRB(root='./data/test_images_NEW', download=True, transform = test_transform)
testloader = torch.utils.data.DataLoader(test_set, batch_size = 250)
classes = ('0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15',
'16','17','18','19','20','21','22','23','24','25','26','27','28','29',
'30','31','32','33','34','35','36','37','38','39','40','41','42')
nclasses = 43
class Network(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 100, kernel_size=5)
self.bn1 = nn.BatchNorm2d(100)
self.conv2 = nn.Conv2d(100, 150, kernel_size=3)
self.bn2 = nn.BatchNorm2d(150)
self.conv3 = nn.Conv2d(150, 250, kernel_size=3)
self.bn3 = nn.BatchNorm2d(250)
self.conv_drop = nn.Dropout2d()
self.fc1 = nn.Linear(250, 180)
self.fc2 = nn.Linear(180, 48)
self.fc3 = nn.Linear(48, nclasses)
self.localization = nn.Sequential(
nn.Conv2d(3, 8, kernel_size=7),
nn.MaxPool2d(2, stride=2),
nn.ReLU(True),
nn.Conv2d(8, 10, kernel_size=5),
nn.MaxPool2d(2, stride=2),
nn.ReLU(True)
)
self.fc_loc = nn.Sequential(
nn.Linear(10 * 4 * 4, 32),
nn.ReLU(True),
nn.Linear(32, 3 * 2)
)
def stn(self, x):
xs = self.localization(x)
xs = xs.view(-1, 10 * 4 * 4)
theta = self.fc_loc(xs)
theta = theta.view(-1, 2, 3)
grid = F.affine_grid(theta, x.size())
x = F.grid_sample(x, grid)
return x
def forward(self, x):
x = self.stn(x)
# Perform forward pass
x = self.bn1(F.max_pool2d(F.leaky_relu(self.conv1(x)),2))
x = self.conv_drop(x)
x = self.bn2(F.max_pool2d(F.leaky_relu(self.conv2(x)),2))
x = self.conv_drop(x)
x = self.bn3(F.max_pool2d(F.leaky_relu(self.conv3(x)),2))
x = self.conv_drop(x)
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.dropout(x, training=self.training)
x = self.fc3(x)
return F.log_softmax(x, dim=1)
model = Network()
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
epochs = 1000
steps = 0
running_loss = 0
print_every = 5
train_losses, test_losses, accuracy_score = [], [], []
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
#log_ps == output
log_ps = model(images)
loss = criterion(log_ps, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
# print(running_loss)
else:
test_loss = 0
accuracy = 0
with torch.no_grad():
#I used model.eval() to go in evaluation mode for testing
model.eval()
for images, labels in testloader:
images, labels = images.to(device), labels.to(device)
log_ps = model(images)
test_loss += criterion(log_ps, labels)
ps = torch.exp(log_ps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
#I used model.train() to go in training mode for training
model.train()
train_losses.append(running_loss/len(trainloader))
test_losses.append(test_loss/len(testloader))
accuracy_score.append(accuracy/len(testloader))
print("Epoch: {}/{}.. ".format(e+1, epochs),
"Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),
"Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
"Test Accuracy: {:.3f}".format(accuracy/len(testloader)))