Hi, I just started working with PyTorch (and Neural Nets). I was practicing image detection with ResNet, VGG and SqueezeNet on STL10 dataset. To save time, I used pretrained models. I had to modify my code for 10 labels instead of 1000. While it worked fine for VGG and ResNet, it showed above mentioned error for SqueezeNet. Can anyone explain to me why this happened and how to fix this?
Error:
RuntimeError Traceback (most recent call last)
<ipython-input-72-037ed8f15d61> in <module>()
----> 1 SqueezeNet(trainset,testset)
6 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/conv.py in forward(self, input)
336 _pair(0), self.dilation, self.groups)
337 return F.conv2d(input, self.weight, self.bias, self.stride,
--> 338 self.padding, self.dilation, self.groups)
339
340
RuntimeError: Given groups=1, weight of size 10 512 1 1, expected input[16, 3, 224, 224] to have 512 channels, but got 3 channels instead
Code:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import copy
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform_train = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
transform_test = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
trainset = torchvision.datasets.STL10(root='./data', split='train',
download=True,
transform=transform_train)
testset = torchvision.datasets.STL10(root='./data', split='test',
download=True,
transform=transform_test)
num_classes = 10
batch_size = 4
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
def SqueezeNet(trainset,testset):
batch_size = 16
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)
squiz = models.squeezenet1_0(pretrained=True)
for param in squiz.parameters():
param.requires_grad = False
squiz.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
squiz.num_classes=num_classes
squiz = squiz.to(device)
loss_fn = nn.CrossEntropyLoss()
opt = optim.SGD(squiz.parameters(), lr=0.05)
loss_epoch_arr = []
max_epochs = 1
min_loss = 1000
n_iters = np.ceil(50000/batch_size)
for epoch in range(max_epochs):
for i, data in enumerate(trainloader, 0):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
opt.zero_grad()
outputs = squiz(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
opt.step()
if min_loss > loss.item():
min_loss = loss.item()
best_model = copy.deepcopy(squiz.state_dict())
print('Min loss %0.2f' % min_loss)
if i % 100 == 0:
print('Iteration: %d/%d, Loss: %0.2f' % (i, n_iters, loss.item()))
del inputs, labels, outputs
torch.cuda.empty_cache()
loss_epoch_arr.append(loss.item())
squiz.load_state_dict(best_model)
print(evaluation(trainloader, squiz), evaluation(testloader, squiz))
SqueezeNet(trainset,testset)