I am trying to reproduce the multi-input neural network of this tutorial: tutorial. This article uses PyTorch Lightning, while I want to use PyTorch, so I am adapting to my case. Basically I created my dataloaders and my network:
# Define loaders
from torch.utils.data import DataLoader
train_loader = DataLoader(train_set, batch_size=64, num_workers=2, drop_last=True, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64, num_workers=2, drop_last=False, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, num_workers=2, drop_last=False, shuffle=False)
def conv_block(input_size, output_size):
block = nn.Sequential(
nn.Conv2d(input_size, output_size, (3, 3)), nn.BatchNorm2d(output_size), nn.ReLU(), nn.MaxPool2d((2, 2)),
)
return block
class SimpleCNN(nn.Module):
#Constructor
def __init__(self):
# Call parent contructor
super().__init__()
self.conv1 = conv_block(3, 16)
self.conv2 = conv_block(16, 32)
self.conv3 = conv_block(32, 64)
self.ln1 = nn.Linear(64 * 26 * 26, 16)
self.relu = nn.ReLU()
self.batchnorm = nn.BatchNorm1d(16)
self.dropout = nn.Dropout2d(0.5)
self.ln2 = nn.Linear(16, 5)
self.ln4 = nn.Linear(5, 10)
self.ln5 = nn.Linear(10, 10)
self.ln6 = nn.Linear(10, 5)
self.ln7 = nn.Linear(10, 1)
# Forward
def forward(self, img, tab):
img = self.conv1(img)
img = self.conv2(img)
img = self.conv3(img)
img = img.reshape(img.shape[0], -1)
img = self.ln1(img)
img = self.relu(img)
img = self.batchnorm(img)
img = self.dropout(img)
img = self.ln2(img)
img = self.relu(img)
tab = self.ln4(tab)
tab = self.relu(tab)
tab = self.ln5(tab)
tab = self.relu(tab)
tab = self.ln6(tab)
tab = self.relu(tab)
x = torch.cat((img, tab), dim=1)
x = self.relu(x)
return self.ln7(x)
and then I defined my optimizer and criterion (the same of the tutorial):
optimizer = optim.SGD(model.parameters(), lr = 0.01)
criterion = nn.L1Loss()
Now, this below is my training function:
def train(net, loaders, optimizer, criterion, epochs=100, dev=torch.device('cpu')):
try:
net = net.to(dev)
#print(net)
# Initialize history
history_loss = {"train": [], "val": [], "test": []}
history_accuracy = {"train": [], "val": [], "test": []}
# Process each epoch
for epoch in range(epochs):
# Initialize epoch variables
sum_loss = {"train": 0, "val": 0, "test": 0}
sum_accuracy = {"train": 0, "val": 0, "test": 0}
# Process each split
for split in ["train", "val", "test"]:
# Process each batch
for (image, tabular, labels) in loaders[split]:
# Move to CUDA
image = image.to(dev)
tabular = tabular.to(dev)
labels = labels.to(dev)
# Reset gradients
optimizer.zero_grad()
# Compute output
#pred = torch.flatten((image, tabular))
pred = net(image, tabular)
#y_pred = y_pred.double()
loss = criterion(pred, labels)
# Update loss
sum_loss[split] += loss.item()
# Check parameter update
if split == "train":
# Compute gradients
loss.backward()
# Optimize
optimizer.step()
# Compute accuracy
_,pred_labels = pred.max(1)
batch_accuracy = (pred_labels == labels).sum().item()/image.size(0)
# Update accuracy
sum_accuracy[split] += batch_accuracy
# Compute epoch loss/accuracy
epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}
# Update history
for split in ["train", "val", "test"]:
history_loss[split].append(epoch_loss[split])
history_accuracy[split].append(epoch_accuracy[split])
# Print info
print(f"Epoch {epoch+1}:",
f"TrL={epoch_loss['train']:.4f},",
f"TrA={epoch_accuracy['train']:.4f},",
f"VL={epoch_loss['val']:.4f},",
f"VA={epoch_accuracy['val']:.4f},",
f"TeL={epoch_loss['test']:.4f},",
f"TeA={epoch_accuracy['test']:.4f},")
except KeyboardInterrupt:
print("Interrupted")
finally:
# Plot loss
plt.title("Loss")
for split in ["train", "val", "test"]:
plt.plot(history_loss[split], label=split)
plt.legend()
plt.show()
# Plot accuracy
plt.title("Accuracy")
for split in ["train", "val", "test"]:
plt.plot(history_accuracy[split], label=split)
plt.legend()
plt.show()
that I call in this way:
# Define dictionary of loaders
loaders = {"train": train_loader,
"val": val_loader,
"test": test_loader}
# Train model
train(model, loaders, optimizer, criterion, epochs=10, dev=dev)
Training starts and it completes all 10 epochs. However, results are really bad, because I am doing something wrong. These are the results:
Epoch 1: TrL=756382.4643, TrA=0.0000, VL=724350.7875, VA=0.0000, TeL=810417.3250, TeA=0.0000,
Epoch 2: TrL=767425.5143, TrA=0.0000, VL=724348.9250, VA=0.0000, TeL=810415.4375, TeA=0.0000,
Epoch 3: TrL=769819.8732, TrA=0.0000, VL=724341.4625, VA=0.0000, TeL=810408.1375, TeA=0.0000,
Epoch 4: TrL=769039.4804, TrA=0.0000, VL=724228.2875, VA=0.0000, TeL=810297.6250, TeA=0.0000,
Epoch 5: TrL=687138.2839, TrA=0.0000, VL=720732.6250, VA=0.0000, TeL=807107.3750, TeA=0.0000,
Epoch 6: TrL=637015.2786, TrA=0.0000, VL=723909.0375, VA=0.0000, TeL=809951.0625, TeA=0.0000,
Epoch 7: TrL=601827.3125, TrA=0.0000, VL=575946.9625, VA=0.0000, TeL=565301.9250, TeA=0.0000,
Epoch 8: TrL=600566.4304, TrA=0.0000, VL=646973.8250, VA=0.0000, TeL=729645.0250, TeA=0.0000,
Epoch 9: TrL=574847.1312, TrA=0.0000, VL=326207.9562, VA=0.0000, TeL=369593.9562, TeA=0.0000,
Epoch 10: TrL=630909.6888, TrA=0.0000, VL=723533.1000, VA=0.0000, TeL=809632.1750, TeA=0.0000,
Basically, each accuracy is always 0. I think that the problem is in the labels that I give to my training function. Indeed, the tutorial of the multi input network, in its training, validation and test functions has these lines of code:
def training_step(self, batch, batch_idx):
image, tabular, y = batch
criterion = torch.nn.L1Loss()
y_pred = torch.flatten(self(image, tabular))
y_pred = y_pred.double()
loss = criterion(y_pred, y)
However, I am not flattening anything, neither in my training function, neither in my neural network.