I am using a unet model for segmentation on custom dataset
# import the necessary packages
from . import config
from torch.nn import ConvTranspose2d
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Module
from torch.nn import ModuleList
from torch.nn import ReLU
from torchvision.transforms import CenterCrop
from torch.nn import functional as F
import torch
class Block(Module):
def __init__(self, inChannels, outChannels):
super().__init__()
# store the convolution and RELU layers
self.conv1 = Conv2d(inChannels, outChannels, 3)
self.relu = ReLU()
self.conv2 = Conv2d(outChannels, outChannels, 3)
def forward(self, x):
# apply CONV => RELU => CONV block to the inputs and return it
return self.conv2(self.relu(self.conv1(x)))
class Encoder(Module):
def __init__(self, channels=(3, 16, 32, 64)):
super().__init__()
# store the encoder blocks and maxpooling layer
self.encBlocks = ModuleList(
[Block(channels[i], channels[i + 1])
for i in range(len(channels) - 1)])
self.pool = MaxPool2d(2)
def forward(self, x):
# initialize an empty list to store the intermediate outputs
blockOutputs = []
# loop through the encoder blocks
for block in self.encBlocks:
# pass the inputs through the current encoder block, store
# the outputs, and then apply maxpooling on the output
x = block(x)
blockOutputs.append(x)
x = self.pool(x)
# return the list containing the intermediate outputs
return blockOutputs
class Decoder(Module):
def __init__(self, channels=(64, 32, 16)):
super().__init__()
# initialize the number of channels, upsampler blocks, and
# decoder blocks
self.channels = channels
self.upconvs = ModuleList(
[ConvTranspose2d(channels[i], channels[i + 1], 2, 2)
for i in range(len(channels) - 1)])
self.dec_blocks = ModuleList(
[Block(channels[i], channels[i + 1])
for i in range(len(channels) - 1)])
def forward(self, x, encFeatures):
# loop through the number of channels
for i in range(len(self.channels) - 1):
# pass the inputs through the upsampler blocks
x = self.upconvs[i](x)
# crop the current features from the encoder blocks,
# concatenate them with the current upsampled features,
# and pass the concatenated output through the current
# decoder block
encFeat = self.crop(encFeatures[i], x)
x = torch.cat([x, encFeat], dim=1)
x = self.dec_blocks[i](x)
# return the final decoder output
return x
def crop(self, encFeatures, x):
# grab the dimensions of the inputs, and crop the encoder
# features to match the dimensions
(_, _, H, W) = x.shape
encFeatures = CenterCrop([H, W])(encFeatures)
# return the cropped features
return encFeatures
class UNet(Module):
def __init__(self, encChannels=(3, 16, 32, 64),
decChannels=(64, 32, 16),
nbClasses=1, retainDim=True,
outSize=(config.INPUT_IMAGE_HEIGHT, config.INPUT_IMAGE_WIDTH)):
super().__init__()
# initialize the encoder and decoder
self.encoder = Encoder(encChannels)
self.decoder = Decoder(decChannels)
# initialize the regression head and store the class variables
self.head = Conv2d(decChannels[-1], nbClasses, 1)
self.retainDim = retainDim
self.outSize = outSize
def forward(self, x):
# grab the features from the encoder
encFeatures = self.encoder(x)
# pass the encoder features through decoder making sure that
# their dimensions are suited for concatenation
decFeatures = self.decoder(encFeatures[::-1][0],
encFeatures[::-1][1:])
# pass the decoder features through the regression head to
# obtain the segmentation mask
map = self.head(decFeatures)
# check to see if we are retaining the original output
# dimensions and if so, then resize the output to match them
if self.retainDim:
map = F.interpolate(map, self.outSize)
# return the segmentation map
return map
the test loss is constant and only changes slightly
[INFO] saving testing image paths...
[INFO] found 4353 examples in the training set...
[INFO] found 769 examples in the test set...
[INFO] training the network...
0% 0/40 [00:00<?, ?it/s][INFO] EPOCH: 1/40
Train loss: 0.111058, Test loss: 0.0145
2% 1/40 [14:40<9:32:05, 880.13s/it][INFO] EPOCH: 2/40
Train loss: 0.013129, Test loss: 0.0140
5% 2/40 [15:20<4:04:26, 385.96s/it][INFO] EPOCH: 3/40
Train loss: 0.013229, Test loss: 0.0140
8% 3/40 [16:00<2:20:37, 228.05s/it][INFO] EPOCH: 4/40
Train loss: 0.012924, Test loss: 0.0140
10% 4/40 [16:40<1:32:15, 153.76s/it][INFO] EPOCH: 5/40
Train loss: 0.012981, Test loss: 0.0140
12% 5/40 [17:20<1:05:44, 112.69s/it][INFO] EPOCH: 6/40
Train loss: 0.013169, Test loss: 0.0140
15% 6/40 [17:59<49:49, 87.92s/it] [INFO] EPOCH: 7/40
Train loss: 0.013158, Test loss: 0.0140
18% 7/40 [18:39<39:41, 72.15s/it][INFO] EPOCH: 8/40
Train loss: 0.013200, Test loss: 0.0139
20% 8/40 [19:19<32:59, 61.85s/it][INFO] EPOCH: 9/40
Train loss: 0.013065, Test loss: 0.0139
22% 9/40 [19:59<28:24, 54.98s/it][INFO] EPOCH: 10/40
Train loss: 0.013046, Test loss: 0.0139
25% 10/40 [20:39<25:10, 50.36s/it][INFO] EPOCH: 11/40
Train loss: 0.012856, Test loss: 0.0139
28% 11/40 [21:18<22:45, 47.09s/it][INFO] EPOCH: 12/40
Train loss: 0.013120, Test loss: 0.0138
30% 12/40 [21:58<20:56, 44.89s/it][INFO] EPOCH: 13/40
Train loss: 0.012986, Test loss: 0.0138
32% 13/40 [22:38<19:30, 43.36s/it][INFO] EPOCH: 14/40
Train loss: 0.012794, Test loss: 0.0138
35% 14/40 [23:17<18:14, 42.11s/it][INFO] EPOCH: 15/40
Train loss: 0.013119, Test loss: 0.0138
38% 15/40 [23:57<17:13, 41.35s/it][INFO] EPOCH: 16/40
Train loss: 0.013101, Test loss: 0.0138
40% 16/40 [24:37<16:19, 40.82s/it][INFO] EPOCH: 17/40
Train loss: 0.012831, Test loss: 0.0138
42% 17/40 [25:17<15:37, 40.76s/it][INFO] EPOCH: 18/40
Train loss: 0.012973, Test loss: 0.0138
45% 18/40 [25:57<14:51, 40.54s/it][INFO] EPOCH: 19/40
Train loss: 0.012753, Test loss: 0.0138
48% 19/40 [26:37<14:06, 40.31s/it][INFO] EPOCH: 20/40
Train loss: 0.012880, Test loss: 0.0138
50% 20/40 [27:16<13:18, 39.90s/it][INFO] EPOCH: 21/40
Train loss: 0.013107, Test loss: 0.0138