I’m trying to implement my personal version of SqueezeNet using Pytorch. I have some base code that I can not modify but I can modify all the code inside the class APNet and also all the other methods in the same script. When I try to start training I obtain always the error in the description.
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torchvision.models.resnet import BasicBlock
import math
import torch
def _weights_init(m):
if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight)
class fire(nn.Module):
def __init__(self, inplanes, squeeze_planes, expand_planes):
super(fire, self).__init__()
self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1)
self.bn1 = nn.BatchNorm2d(squeeze_planes)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1)
self.bn2 = nn.BatchNorm2d(expand_planes)
self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1)
self.bn3 = nn.BatchNorm2d(expand_planes)
self.relu2 = nn.ReLU(inplace=True)
# using MSR initilization
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
m.weight.data.normal_(0, math.sqrt(2./n))
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
out1 = self.conv2(x)
out1 = self.bn2(out1)
out2 = self.conv3(x)
out2 = self.bn3(out2)
out = torch.cat([out1, out2], 1)
out = self.relu2(out)
return out
class APNet(nn.Module):
"""
Parameters
----------
block : callable
Factory/constructor creating the block to be used.
layers : list of int
Number of blocks in each layer.
num_classes : int
Number of output neurons.
input_channels : int
Number of input channels.
shortcut_downsampling : {'pad', 'conv'}
Downsampling mode for the shortcut.
'pad' will subsample the input using strided slicing and pad the channels with zeros.
'conv' will use a strided convolution instead.
"""
def __init__(self, block, layers, num_classes=10, input_channels=3, shortcut_downsampling='pad', groups=1):
super(APNet, self).__init__()
self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1) # 32
self.bn1 = nn.BatchNorm2d(96)
self.relu = nn.ReLU(inplace=True)
self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 16
self.fire2 = fire(96, 16, 64)
self.fire3 = fire(128, 16, 64)
self.fire4 = fire(128, 32, 128)
self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 8
self.fire5 = fire(256, 32, 128)
self.fire6 = fire(256, 48, 192)
self.fire7 = fire(384, 48, 192)
self.fire8 = fire(384, 64, 256)
self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2) # 4
self.fire9 = fire(512, 64, 256)
self.conv2 = nn.Conv2d(512, 10, kernel_size=1, stride=1)
self.avg_pool = nn.AvgPool2d(kernel_size=4, stride=4)
#self.softmax = nn.LogSoftmax(dim=1)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
@staticmethod
def get_classifiers():
return ['apnet0']
@classmethod
def build_classifier(cls, arch: str, num_classes: int, input_channels: int):
depth = 3
cls_instance = cls(int(depth), num_classes, input_channels=input_channels)
return cls_instance
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool1(x)
x = self.fire2(x)
x = self.fire3(x)
x = self.fire4(x)
x = self.maxpool2(x)
x = self.fire5(x)
x = self.fire6(x)
x = self.fire7(x)
x = self.fire8(x)
x = self.maxpool3(x)
x = self.fire9(x)
x = self.conv2(x)
x = self.avg_pool(x)
#x = self.fc(x)
#x = self.softmax(x)
return x
def fire_layer(inp, s, e):
f = fire(inp, s, e)
return f
I tried also to add a linear layer at the end but the result does not change. All the above code is modifiable and all the code below is unmodifiable.
"""
Parameters
----------
model : torch.nn.Module
The model to be trained.
Might yield multiple outputs, but the first ones will be considered to
be class scores for accuracy computation.
loader : iterable
The data loader, yielding batches of samples and labels.
optimizer : torch.optim.Optimizer
The optimizer to be used for the backward pass and model update.
criterion : callable
The loss function.
All outputs of the model will be passed as argument, followed by
the class labels.
scheduler : torch.optim.lr_scheduler._LRScheduler, optional
A learning rate scheduler to be called after every iteration.
regularizer : callable(torch.nn.Module), optional
A function taking the model as argument and returning a regularization
loss as scalar tensor that will be added to the total loss function.
show_progress : bool, default: True
Whether to show a tqdm progress bar updated after every iteration.
Returns
-------
loss : float
accuracy : float
"""
model.train()
total_loss = total_acc = num_samples = 0
for X, y in tqdm(loader, leave=False, disable=not show_progress):
X, y = X.cuda(), y.cuda()
optimizer.zero_grad(set_to_none=True)
output = model(X)
if not isinstance(output, tuple):
output = (output,)
loss = criterion(*output, y)
total_loss += loss.item() * len(X)
total_acc += (output[0].argmax(dim=-1) == y).sum().item()
num_samples += len(X)
if regularizer is not None:
loss = loss + regularizer(model)
loss.backward()
optimizer.step()
if scheduler is not None:
scheduler.step()