The network should tell if an image is rotated, and how much.
The training data is simple and contains faces, and it should be able to tell which of the 90* positions is the image on.
The data is all stored are standard upward facing faces, and they are rotated on the fly by a transformation.
Before showing the network, this is what the results look like (it predicts always the same labels):
Click to see the output (snippet)
loss: 1.392787 [ 16/ 406]
Test Error:
Accuracy: 26.7%, Avg loss: 1.467627
Last y: tensor([2, 2, 0, 1, 1, 0, 1, 2, 2, 2, 1, 0], device='mps:0')
Last pred: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='mps:0')
Epoch 2
-------------------------------
loss: 1.556168 [ 16/ 406]
Test Error:
Accuracy: 38.3%, Avg loss: 1.379085
Last y: tensor([1, 0, 0, 2, 0, 3, 0, 0, 1, 1, 0, 3], device='mps:0')
Last pred: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], device='mps:0')
Click to see The ConvNet
class SpotRotBackbone(nn.Module):
"""
Backbone (first part) of the spot rotated neural network
It is just a set of convolution.
Important: The next layer expects a flattened tensor.
Args: None
"""
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(0.3)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.relu2 = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=(2, 2))
self.flatten = nn.Flatten()
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.conv1(x)
x = self.relu1(x)
x = self.dropout1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.maxpool(x)
x = self.flatten(x)
return x
# %% [markdown]
# Classification Head
#
# %%
class SpotRotHead(nn.Module):
"""
Classification of the image rotation angle.
Args:
input_size: the size of the input image
"""
def __init__(self, input_size: int):
super().__init__()
self.fc1 = nn.Linear(input_size, 32)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(0.5)
self.fc2 = nn.Linear(32, 4)
self.softmax = nn.Softmax(dim=1)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.fc1(x)
x = self.relu1(x)
x = self.dropout1(x)
x = self.fc2(x)
x = self.softmax(x)
return x
# %%
class NeuralNetwork(nn.Module):
"""
A PyTorch neural network for image classification.
Args:
input_size: Size of the 2D dimension i.e width or height (we consider square images only).
"""
def __init__(self, input_size: int):
super().__init__()
self.backbone = SpotRotBackbone()
# I tried removing and passing values manually, in case this was interfering (it is not though.)
output_dimensions = self.backbone.forward(
torch.zeros(1, 3, input_size, input_size)
).shape
self.head = SpotRotHead(output_dimensions[-1])
def forward(self, x):
x = self.backbone(x)
x = self.head(x)
return x
Is there any obvious error I am committing here ?