I am trying to adapt the tutorial on a DenseNet from https://goosemi.wordpress.com/2018/05/30/first-blog-post/ to the Fashion MNIST dataset which can be downloaded from https://github.com/zalandoresearch/fashion-mnist
The formats of MNIST images of hand-written digits and fashion items seem to be the same, i.e. these are 28x28 greyscale images. However, something does not match up, as the code from the tutorial throws an error. Does anyone has a clue where the problem(s) in the code is(are)?
Below is the code. First, I import the necessary modules/classes.
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn.functional as F
Then I import the dataset (and apply normalization).
mean = 0.2860347330570221
std = 0.3530242443084717
# Normalised train set
train_set_normal = torchvision.datasets.FashionMNIST(
root='./data'
,train=True
,download=True
,transform=transforms.Compose([
transforms.ToTensor()
, transforms.Normalize(mean, std)
])
)
Then there I define the DenseNet architecture as it is defined in the tutorial.
class Dense_Block(nn.Module):
def __init__(self, in_channels):
super(Dense_Block, self).__init__()
self.relu = nn.ReLU(inplace = True)
self.bn = nn.BatchNorm2d(num_features = in_channels)
self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = 32, kernel_size = 3, stride = 1, padding = 1)
self.conv2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 3, stride = 1, padding = 1)
self.conv3 = nn.Conv2d(in_channels = 64, out_channels = 32, kernel_size = 3, stride = 1, padding = 1)
self.conv4 = nn.Conv2d(in_channels = 96, out_channels = 32, kernel_size = 3, stride = 1, padding = 1)
self.conv5 = nn.Conv2d(in_channels = 128, out_channels = 32, kernel_size = 3, stride = 1, padding = 1)
def forward(self, x):
bn = self.bn(x)
conv1 = self.relu(self.conv1(bn))
conv2 = self.relu(self.conv2(conv1))
c2_dense = self.relu(torch.cat([conv1, conv2], 1))
conv3 = self.relu(self.conv3(c2_dense))
c3_dense = self.relu(torch.cat([conv1, conv2, conv3], 1))
conv4 = self.relu(self.conv4(c3_dense))
c4_dense = self.relu(torch.cat([conv1, conv2, conv3, conv4], 1))
conv5 = self.relu(self.conv5(c4_dense))
c5_dense = self.relu(torch.cat([conv1, conv2, conv3, conv4, conv5], 1))
return c5_dense
class Transition_Layer(nn.Module):
def __init__(self, in_channels, out_channels):
super(Transition_Layer, self).__init__()
self.relu = nn.ReLU(inplace = True)
self.bn = nn.BatchNorm2d(num_features = out_channels)
self.conv = nn.Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size = 1, bias = False)
self.avg_pool = nn.AvgPool2d(kernel_size = 2, stride = 2, padding = 0)
def forward(self, x):
bn = self.bn(self.relu(self.conv(x)))
out = self.avg_pool(bn)
return out
class DenseNet(nn.Module):
def __init__(self, nr_classes):
super(DenseNet, self).__init__()
self.lowconv = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 7, padding = 3, bias = False)
self.relu = nn.ReLU()
# Make Dense Blocks
self.denseblock1 = self._make_dense_block(Dense_Block, 64)
self.denseblock2 = self._make_dense_block(Dense_Block, 128)
self.denseblock3 = self._make_dense_block(Dense_Block, 128)
# Make transition Layers
self.transitionLayer1 = self._make_transition_layer(Transition_Layer, in_channels = 160, out_channels = 128)
self.transitionLayer2 = self._make_transition_layer(Transition_Layer, in_channels = 160, out_channels = 128)
self.transitionLayer3 = self._make_transition_layer(Transition_Layer, in_channels = 160, out_channels = 64)
# Classifier
self.bn = nn.BatchNorm2d(num_features = 64)
self.pre_classifier = nn.Linear(64*4*4, 512)
self.classifier = nn.Linear(512, nr_classes)
def _make_dense_block(self, block, in_channels):
layers = []
layers.append(block(in_channels))
return nn.Sequential(*layers)
def _make_transition_layer(self, layer, in_channels, out_channels):
modules = []
modules.append(layer(in_channels, out_channels))
return nn.Sequential(*modules)
def forward(self, x):
out = self.relu(self.lowconv(x))
out = self.denseblock1(out)
out = self.transitionLayer1(out)
out = self.denseblock2(out)
out = self.transitionLayer2(out)
out = self.denseblock3(out)
out = self.transitionLayer3(out)
out = self.bn(out)
out = out.view(-1, 64*4*4)
out = self.pre_classifier(out)
out = self.classifier(out)
return out
Finally, I am trying to train the network.
dense_network = DenseNet(nr_classes=10)
loader = DataLoader(train_set_normal,batch_size=10,shuffle=True,num_workers=0)
optimizer = optim.Adam(dense_network.parameters(), lr=0.01)
N_epochs = 10
for epoch in range(N_epochs):
total_loss = 0
total_correct = 0
for batch in loader: # Get Batch
images, labels = batch
preds = dense_network(images) # Pass Batch
loss = F.cross_entropy(preds, labels)
optimizer.zero_grad()
loss.backward() # Calculate Gradients
optimizer.step() # Update Weights
total_loss += loss.item()
total_correct += get_num_correct(preds, labels)
print(
"epoch", epoch,
"total_correct:", total_correct,
"loss:", total_loss
)
As a result, I see the following error-message.
RuntimeError Traceback (most recent call last)
<ipython-input-4-84df8dc8fb2f> in <module>
14 images, labels = batch
15
---> 16 preds = dense_network(images) # Pass Batch
17
18 loss = F.cross_entropy(preds, labels)
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
<ipython-input-3-417f3f93841b> in forward(self, x)
80
81 def forward(self, x):
---> 82 out = self.relu(self.lowconv(x))
83
84 out = self.denseblock1(out)
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
~\anaconda3\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
351
352 def forward(self, input):
--> 353 return self._conv_forward(input, self.weight)
354
355 class Conv3d(_ConvNd):
~\anaconda3\lib\site-packages\torch\nn\modules\conv.py in _conv_forward(self, input, weight)
348 _pair(0), self.dilation, self.groups)
349 return F.conv2d(input, weight, self.bias, self.stride,
--> 350 self.padding, self.dilation, self.groups)
351
352 def forward(self, input):
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[10, 1, 28, 28] to have 3 channels, but got 1 channels instead