I am trying to do semantic segmentation with two classes.
I have 224x224x3 images and 224x224 binary segmentation masks. I am reshaping the masks to be 224x224x1 (I read somewhere that this is the format that I should pass to the model).
When I try to loop through the train data loader it either runs without errors or I get the following error:
Traceback (most recent call last):
File "/Users/nikolaykolibarov/Desktop/GATE/rooftop-recognition/rooftop-edge-segmentation-ml/main.py", line 43, in <module>
for i, sample in enumerate(train_loader):
File "/Users/nikolaykolibarov/opt/anaconda3/envs/dtcc/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 346, in __next__
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/Users/nikolaykolibarov/opt/anaconda3/envs/dtcc/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
return self.collate_fn(data)
File "/Users/nikolaykolibarov/opt/anaconda3/envs/dtcc/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 79, in default_collate
return [default_collate(samples) for samples in transposed]
File "/Users/nikolaykolibarov/opt/anaconda3/envs/dtcc/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 79, in <listcomp>
return [default_collate(samples) for samples in transposed]
File "/Users/nikolaykolibarov/opt/anaconda3/envs/dtcc/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 55, in default_collate
return torch.stack(batch, 0, out=out)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 224 and 244 in dimension 2 at /tmp/pip-req-build-9oilk29k/aten/src/TH/generic/THTensor.cpp:689
Most of the issues with the same exception I found online don’t have matching numbers like I do (Got 224 and 224). I don’t understand, this exception should occur when they aren’t equal (if I am not wrong).
When I run it, occasionally I get this error a few times, and then it doesn’t occur a few times.
I am not sure if I messed something up with the data types and shapes or it is something else.
Here is the code:
roof_edges_dataset.py:
import os
import cv2 as cv
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from utils import create_binary_mask, get_labelme_shapes, plot_segmentation_dataset
class RoofEdgesDataset(Dataset):
def __init__(self, im_path, ann_path, transform=None):
self.im_path = im_path
self.ann_path = ann_path
self.transform = transform
self.im_fn_list = sorted(os.listdir(im_path), key=lambda x: int(x.split('.')[0]))
self.ann_fn_list = sorted(os.listdir(ann_path), key=lambda x: int(x.split('.')[0]))
def __len__(self):
return len(self.im_fn_list)
def __getitem__(self, index):
im_path = os.path.join(self.im_path, self.im_fn_list[index])
im = cv.imread(im_path)
ann_path = os.path.join(self.ann_path, self.ann_fn_list[index])
ann = create_binary_mask(im, get_labelme_shapes(ann_path))
ann = ann.reshape(ann.shape[0], ann.shape[1], 1)
ann = transforms.ToTensor()(ann)
# ann = torch.tensor(ann)
# ann = ann.permute(2, 0, 1)
if self.transform:
im = self.transform(im)
return im, ann
main.py
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from roof_edges_dataset import RoofEdgesDataset
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Hyperparameters
in_im_shape = (3, 224, 224)
num_classes = 2 # Edge / Non-edge
learning_rate = 0.001
batch_size = 4
n_epochs = 10
# Data - 60% Train - 20% Val - 20% Test
transformations = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = RoofEdgesDataset(im_path='data/images', ann_path='data/annotations', transform=transformations)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
print(len(train_loader))
print(len(val_loader))
print(len(test_loader))
# Model
# Loss and Optimizer
# Train
for epoch in range(n_epochs):
for batch_idx, (image, annotation) in enumerate(train_loader):
image = image.to(device=device)
annotation = annotation.to(device=device)
print(image.shape)
print(annotation.shape)
break
# Evaluate
I would be also very thankful if you let me know if something is done in a wrong or stupid way and how to do it better.
Thanks in advance!