Can someone help me create a custom dataloader where each image and its corresponding segmentation mask is stored as a numpy array?
I have no clue how to create loaders for this segmentation task
Sure!
The important part creating your own Dataset
is to get the shapes for the data and target right.
In a simple segmentation use case where each pixel can only belongs to a single class, we would want to use a criterion like nn.CrossEntropyLoss
.
Therefore your data should have the input shape of images, i.e. [batch__size, c, h, w]
.
Our target should contain the class indices for each pixel, thus the channel dimension is missing: [batch_size, h, w]
, and should be of type torch.long
.
I’ve created a dummy example using a Dataset
, DataLoader
, and a very simple model:
class MyDataset(Dataset):
def __init__(self, data, target, transform=None):
self.data = data
self.target = target
self.transform = transform
def __getitem__(self, index):
x = self.data[index]
y = self.target[index]
if self.transform:
x = self.transform(x)
return x, y
def __len__(self):
return len(self.data)
class MyModel(nn.Module):
def __init__(self, in_channels, nb_classes):
super(MyModel, self).__init__()
self.conv1 = nn.Conv2d(in_channels, 16, 3, 1, 1)
self.conv2 = nn.Conv2d(16, nb_classes, 3, 1, 1)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.conv2(x)
return x
device = 'cuda' if torch.cuda.is_available() else 'cpu'
nb_samples = 10
c, h, w = 3, 24, 24
nb_classes = 5
data_arr = np.random.randint(0, 255, (nb_samples, h, w, c), dtype=np.uint8)
target_arr = torch.from_numpy(np.random.randint(0, nb_classes, (nb_samples, h, w)))
transform = transforms.ToTensor()
dataset = MyDataset(data_arr, target_arr, transform)
loader = DataLoader(
dataset,
batch_size=2,
num_workers=1,
shuffle=True,
pin_memory=torch.cuda.is_available()
)
model = MyModel(c, nb_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# Training routine
for epoch in range(10):
for batch_idx, (data, target) in enumerate(loader):
data = data.to(device)
target = target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
print('Epoch {}, batch {}, loss {}'.format(
epoch, batch_idx, loss.cpu().item()))
You can skip some of the transformations, if your numpy image array is already in np.float32
type and just might call torch.from_numpy
directly.
Let me know, if you can adapt this code to your use case.