@Torch-sharp your question is marked with “ignite” category, but the content seems like to be more generic and unrelated to GitHub - pytorch/ignite: High-level library to help with training and evaluating neural networks in PyTorch flexibly and transparently. project. Please update or confirm the category.
Concerning the question,
Here is how I would make the dataset (assuming you know how to read your images and transform them into tensors):
import torch
from torch.utils.data import Dataset, DataLoader
class My3PicsDataset(Dataset):
def __len__(self):
return 100
def __getitem__(self, index):
img1 = torch.rand(3, 32, 32) # RGB Float32 32x32 image
img2 = torch.rand(3, 32, 32)
img3 = torch.rand(3, 32, 32)
labels = torch.randint(0, 10, size=(5, )) # 5 Labels, values between 0 and 10
return {
"img1": img1,
"img2": img2,
"img3": img3,
"labels": labels
}
train_set = My3PicsDataset()
train_loader = DataLoader(train_set, batch_size=4, num_workers=4, shuffle=True)
batch = next(iter(train_loader))
print(batch["img1"].shape, batch["labels"].shape)
> torch.Size([4, 3, 32, 32]) torch.Size([4, 5])
Here is how I would implement a basic model working on 3 input images and outputs 5 labels each varying betweem 0 and 10, for example (I assume you can improve and adapt it to your needs).
import torch.nn as nn
from torchvision.models import resnet18
class Dummy3PicsModel(nn.Module):
def __init__(self):
super().__init__()
# Setup a resnet18 feature extractor
m = resnet18(pretrained=False)
m.fc = nn.Identity() # replaced final FC layer with identity
self.resnet18_fe = m
# resnet18_fe provide (512, ) features x 3 images concatenated together
self.fc1 = nn.Linear(3 * 512, 10) # 10 is the max value of each label
self.fc2 = nn.Linear(3 * 512, 10)
self.fc3 = nn.Linear(3 * 512, 10)
self.fc4 = nn.Linear(3 * 512, 10)
self.fc5 = nn.Linear(3 * 512, 10)
def forward(self, x):
y1 = self.resnet18_fe(x["img1"])
y2 = self.resnet18_fe(x["img2"])
y3 = self.resnet18_fe(x["img3"])
y = torch.concat([y1, y2, y3], dim=-1)
return [
self.fc1(y),
self.fc2(y),
self.fc3(y),
self.fc4(y),
self.fc5(y),
]
model = Dummy3PicsModel()
out = model(batch)
out[0].shape
How to train that using ignite:
# pip install pytorch-ignite
from ignite.engine import Engine
from ignite.utils import convert_tensor
from ignite.contrib.handlers import ProgressBar
from torch.optim import SGD
device = "cuda"
model.to(device)
optim = SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
def train_step(_, batch):
model.train()
batch = convert_tensor(batch, device=device, non_blocking=True)
out = model(batch)
loss1 = criterion(out[0], batch["labels"][:, 0])
loss2 = criterion(out[1], batch["labels"][:, 1])
loss3 = criterion(out[2], batch["labels"][:, 2])
loss4 = criterion(out[3], batch["labels"][:, 3])
loss5 = criterion(out[4], batch["labels"][:, 4])
total_loss = loss1 + loss2 + loss3 + loss4 + loss4
total_loss.backward()
optim.step()
optim.zero_grad()
return {
"total_loss": total_loss.item(),
"loss1": loss1.item(),
"loss2": loss2.item(),
"loss3": loss3.item(),
"loss4": loss4.item(),
"loss5": loss5.item(),
}
trainer = Engine(train_step)
ProgressBar(persist=True).attach(trainer, output_transform=lambda out: out)
trainer.run(train_loader, max_epochs=5)
Epoch [1/5]: [25/25] 100%, total_loss=19.1, loss1=2.47, loss2=2.67, loss3=3.14, loss4=5.4, loss5=2.34 [00:00<00:00]
Epoch [2/5]: [25/25] 100%, total_loss=18.5, loss1=2.92, loss2=3.55, loss3=3.4, loss4=4.34, loss5=2.57 [00:00<00:00]
Epoch [3/5]: [25/25] 100%, total_loss=18.6, loss1=2.83, loss2=3.42, loss3=3.57, loss4=4.37, loss5=2.32 [00:00<00:00]
Epoch [4/5]: [25/25] 100%, total_loss=17.3, loss1=3.25, loss2=2.59, loss3=2.39, loss4=4.54, loss5=2.34 [00:00<00:00]
Epoch [5/5]: [25/25] 100%, total_loss=15.6, loss1=3.52, loss2=2.55, loss3=2.66, loss4=3.43, loss5=2.21 [00:00<00:00]
Hope this helps