I am using the Deeplab V3+ resnet 101 to perform binary semantic segmentation.
import torch
import torchvision
import loader
from loader import DataLoaderSegmentation
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
batch_size = 1
validation_split = .2
shuffle_dataset = True
random_seed= 66
n_class = 2
num_epochs = 1
lr = 1e-4
momentum = 0.9
w_decay = 1e-5
step_size = 50
gamma = 0.5
traindata = DataLoaderSegmentation('/home/ubuntu/Downloads/imgs/lensonly/')
model = torchvision.models.segmentation.fcn_resnet101(pretrained=False, progress=True, num_classes=2).cuda()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.train()
for iter in range(num_epochs):
for (i,l) in trainloader:
optimizer.zero_grad()
i= i.to(device)
l = l.to(device)
l[l!=0]=1
l=l.long()
outt = model(i)
loss = criterion(outt['out'], l.squeeze(0))
print(loss)
l loss.backward()
optimizer.step()
print(iter)
torch.save(model, '/home/ubuntu/Downloads/newnet.pth')
once I get the model trained I evalute the model on 1 of the images
import torch
import torchvision
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as T
img = cv2.imread('/home/ubuntu/Downloads/Brain/test0716/train/slice_src_BN01002_032.png')
trf = T.Compose([T.ToTensor(),T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
inp = trf(img).unsqueeze(0).cuda()
fcn = torch.load('newnet.pth')
fcn.eval()
sam_out = fcn(inp)['out']
om = torch.argmax(sam_out.squeeze(), dim=0).cpu().numpy()
The output i get for “sam_out” has a shape of [1 2 417 417]. it is given below
>>> sam_out
tensor([[[[ 2.3502, 2.3502, 2.3502, ..., 1.9911, 1.9911, 1.9911],
[ 2.3502, 2.3502, 2.3502, ..., 1.9911, 1.9911, 1.9911],
[ 2.3502, 2.3502, 2.3502, ..., 1.9911, 1.9911, 1.9911],
...,
[ 1.8227, 1.8227, 1.8227, ..., 2.0846, 2.0846, 2.0846],
[ 1.8227, 1.8227, 1.8227, ..., 2.0846, 2.0846, 2.0846],
[ 1.8227, 1.8227, 1.8227, ..., 2.0846, 2.0846, 2.0846]],
[[-1.7641, -1.7641, -1.7641, ..., -1.8655, -1.8655, -1.8655],
[-1.7641, -1.7641, -1.7641, ..., -1.8655, -1.8655, -1.8655],
[-1.7641, -1.7641, -1.7641, ..., -1.8655, -1.8655, -1.8655],
...,
[-1.6157, -1.6157, -1.6157, ..., -1.8989, -1.8989, -1.8989],
[-1.6157, -1.6157, -1.6157, ..., -1.8989, -1.8989, -1.8989],
[-1.6157, -1.6157, -1.6157, ..., -1.8989, -1.8989, -1.8989]]]],
device='cuda:0', grad_fn=<UpsampleBilinear2DBackward>)
now if you notice that for the two [417 417] output layers if I apply argmax it will give me all pixel labels as 0 and no pixel labels as 1. This happens because in one layer all values are positive while the second layer all values are negative. Am I missing something, any and all help is appreciated.
Thanks
Nishanth