Iβm trying to train a Unit model on LandCoverNet dataset, which is a satellite imagery dataset that contains input images and corresponding land cover type masks.
I have created a custom dataset to get my images and masks:
# Create custom dataset that accepts 4 channels images
from torch.utils.data import Dataset, DataLoader, sampler
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import os
import numpy as np
import rasterio as rio
from torchvision import transforms, datasets, models
# We have two dir: inputs(folder for each image) and tatgets
class LandCoverNetDataset(BaseDataset):
CLASSES = ['otherland', 'cropland', 'pastureland', 'bare soil', 'openwater', 'forestland']
def __init__(self, inputs_dir, targets_dir,
classes = None,
augmentation=None ,
preprocessing = False,
pytorch=True):
super().__init__()
self.samples = []
self.pytorch = pytorch
self.augmentation = augmentation
self.preprocessing = preprocessing
# Convert str names to class values on masks
self.class_value = [self.CLASSES.index(cls.lower()) for cls in classes]
# Create dictionary for images and targets
for sub_dir in os.listdir(inputs_dir):
files = {}
files = {
'img_bands' : os.path.join(inputs_dir, sub_dir),
'target' : os.path.join(targets_dir, sub_dir[:13] + "_LC_10m.png")
}
self.samples.append(files)
def __len__(self):
return len(self.samples)
def normalize(self, band):
'''Notmalize a numpy array to have values between 0 and 1'''
band_min, band_max = band.min(), band.max()
np.seterr(divide='ignore', invalid='ignore')
normalized_band = ((band - band_min)/(band_max - band_min))
#Remove any nan value and subtitute by zero
where_are_NaNs = isnan(normalized_band)
normalized_band[where_are_NaNs] = 0
return normalized_band
def open_as_array(self, idx, include_ndvi = False):
'''
Merge the 4 bands into one image and normalize the bands
'''
# List indivisual bands in each image folder
# Stack them togather
list_bands = []
for img_file in os.listdir(self.samples[idx]['img_bands']):
# Get the ndvi band
if 'NDVI' in img_file:
ndvi_band = os.path.join(self.samples[idx]['img_bands'], img_file)
else:
# Get the rgb bands
band = rio.open(os.path.join(self.samples[idx]['img_bands'], img_file)).read(1)
if self.preprocessing:
# preprocess the bands before stacking them (only rgb)
band = self.normalize(band)
list_bands.append(band)
# Stack the bands
raw_rgb = np.stack(list_bands, axis=2).astype('float32')
if include_ndvi:
# Include the NDVI band in the input images
ndvi = np.expand_dims(rio.open(ndvi_band).read(1).astype('float32'), 2)
raw_rgb = np.concatenate([raw_rgb, ndvi], axis=2)
if self.augmentation:
transformed = self.augmentation(image = raw_rgb)
raw_rgb = transformed["image"]
if self.preprocessing:
# transpose to tensor shape
raw_rgb = raw_rgb.transpose((2,0,1)).astype('float32')
return raw_rgb
def open_mask(self, idx):
# Extract certain classes from mask
mask = cv2.imread(self.samples[idx]['target'], 0)
masks = [(mask == v) for v in self.class_value]
mask = np.stack(masks, axis=-1).astype('long')
if self.augmentation:
transformed = self.augmentation(image = mask)
mask = transformed["image"]
if self.preprocessing:
# preprocess the mask
mask = self.normalize(mask)
# transpose to tensor shape
mask = mask.transpose((2, 0, 1)).astype('long')
mask = mask[0, :, :]
return mask
def __getitem__(self, idx):
x = torch.tensor(self.open_as_array(idx, include_ndvi=True), dtype=torch.float)
y = torch.tensor(self.open_mask(idx), dtype=torch.long)
return x, y
def open_as_pil(self, idx):
arr = 256*self.open_as_array(idx)
return Image.fromarray(arr.astype(np.uint8), 'RGB')
def __repr__(self):
s = 'Dataset class with {} files'.format(self.__len__())
return s
The input here is 4 bands.
This is the shape of the first batch for both input/target
torch.Size([16, 4, 224, 224])
torch.Size([16, 224, 224])
Iβm using a model from segmentation-models-pytorch
library, and here is how I customized it for my case:
ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
ACTIVATION = 'softmax2d'
DEVICE = 'cuda'
model = smp.FPN(ENCODER, classes=len(CLASSES), activation=ACTIVATION)
# Replace the model.conv1 to accept 4 channels
# first: copy the layer's weights
weight = model.encoder.layer0.conv1.weight.clone()
model.encoder.layer0.conv1 = nn.Conv2d(4, 64,kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
with torch.no_grad():
model.encoder.layer0.conv1.weight[:, :3] = weight
model.encoder.layer0.conv1.weight[:, 3] = model.encoder.layer0.conv1.weight[:, 0]
loss = smp.utils.losses.NLLLoss()
metrics = [
smp.utils.metrics.IoU(threshold=0.5),
]
optimizer = torch.optim.SGD([
dict(params=model.parameters(), lr=0.001, weight_decay=1e-8, momentum=0.9),
])
# create epoch runners
# it is a simple loop of iterating over dataloader`s samples
train_epoch = smp.utils.train.TrainEpoch(
model,
loss=loss,
metrics=metrics,
optimizer=optimizer,
device=DEVICE,
verbose=True,
)
valid_epoch = smp.utils.train.ValidEpoch(
model,
loss=loss,
metrics=metrics,
device=DEVICE,
verbose=True,
)
# train model for 40 epochs
And here is my training loop
# train model for 40 epochs
max_score = 0
for i in range(0, 40):
print('\nEpoch: {}'.format(i))
train_logs = train_epoch.run(train_loader)
valid_logs = valid_epoch.run(valid_loader)
# do something (save model, change lr, etc.)
if max_score < valid_logs['iou_score']:
max_score = valid_logs['iou_score']
torch.save(model, './best_model.pth')
print('Model saved!')
if i == 25:
optimizer.param_groups[0]['lr'] = 1e-5
print('Decrease decoder learning rate to 1e-5!')
At first, the target shape was [16, 6, 224, 224]
but I had an error and found this thread that it should be [batch_size, height, width]
Thatβs why I added this line in the Dataset class : mask = mask[0, :, :]
to get ride of the number of classes dim, and here where things get confusing for me, because the output of me model is torch.Size([10, 6, 224, 224])
.
This is the entire error message:
Epoch: 0
train: 0%| | 0/157 [00:00<?, ?it/s]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-215-2ae39e205dee> in <module>()
7
8 print('\nEpoch: {}'.format(i))
----> 9 train_logs = train_epoch.run(train_loader)
10 valid_logs = valid_epoch.run(valid_loader)
11
3 frames
/usr/local/lib/python3.6/dist-packages/segmentation_models_pytorch/utils/functional.py in iou(pr, gt, eps, threshold, ignore_channels)
32 pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
33
---> 34 intersection = torch.sum(gt * pr)
35 union = torch.sum(gt) + torch.sum(pr) - intersection + eps
36 return (intersection + eps) / union
RuntimeError: The size of tensor a (16) must match the size of tensor b (6) at non-singleton dimension 1
I have been working on this for days now and would appreciate any help!
Thanks a lot!