/home/v2m/anaconda3/envs/my_env3/lib/python3.7/site-packages/torch/nn/_reduction.py:46: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
warnings.warn(warning.format(ret))
/home/v2m/anaconda3/envs/my_env3/lib/python3.7/site-packages/torch/nn/functional.py:2622: UserWarning: nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.
warnings.warn("nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.")
0.4137219845991996
0.4524866500379986
0.5172407126352913
0.5623468630359362
Traceback (most recent call last):
File "pytorch_segmentation_detection/recipes/pascal_voc/segmentation/psp_resnet_50_8s_synch_bn_train.py", line 405, in <module>
optimizer.step()
File "/home/v2m/anaconda3/envs/my_env3/lib/python3.7/site-packages/torch/optim/adam.py", line 107, in step
p.data.addcdiv_(-step_size, exp_avg, denom)
RuntimeError: value cannot be converted to type float without overflow: (3.52033e-08,-1.14383e-08)
My script is below:
#!/usr/bin/env python
# coding: utf-8
# In[2]:
#get_ipython().run_line_magic('matplotlib', 'notebook')
import sys, os
sys.path.append("/home/v2m/projects/pytorch-segmentation-detection/")
sys.path.append("/home/v2m/projects/pytorch-segmentation-detection/synchronized_batchnorm/")
sys.path.insert(0, '/home/v2m/projects/pytorch-segmentation-detection/vision/')
import torch.nn as nn
import torchvision.models as models
import torch
from pytorch_segmentation_detection.datasets.pascal_voc import PascalVOCSegmentation
from pytorch_segmentation_detection.transforms import (ComposeJoint,
RandomHorizontalFlipJoint,
RandomScaleJoint,
CropOrPad,
ResizeAspectRatioPreserve)
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision.transforms as transforms
import numbers
import random
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
from sklearn.metrics import confusion_matrix
def flatten_logits(logits, number_of_classes):
"""Flattens the logits batch except for the logits dimension"""
logits_permuted = logits.permute(0, 2, 3, 1)
logits_permuted_cont = logits_permuted.contiguous()
logits_flatten = logits_permuted_cont.view(-1, number_of_classes)
return logits_flatten
def flatten_annotations(annotations):
return annotations.view(-1)
def get_valid_annotations_index(flatten_annotations, mask_out_value=255):
return torch.squeeze( torch.nonzero((flatten_annotations != mask_out_value )), 1)
def adjust_learning_rate(optimizer, iteration):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
max_iteration = 13000.0
multiplier = (1.0 - (iteration / max_iteration)) ** (0.9)
lr = 0.0001 * multiplier
for param_group in optimizer.param_groups:
param_group['lr'] = lr
from pytorch_segmentation_detection.transforms import RandomCropJoint
number_of_classes = 21
labels = range(number_of_classes)
train_transform = ComposeJoint(
[
RandomHorizontalFlipJoint(),
RandomCropJoint(crop_size=(513, 513)),
#[ResizeAspectRatioPreserve(greater_side_size=384),
# ResizeAspectRatioPreserve(greater_side_size=384, interpolation=Image.NEAREST)],
#RandomCropJoint(size=(274, 274))
# RandomScaleJoint(low=0.9, high=1.1),
#[CropOrPad(output_size=(288, 288)), CropOrPad(output_size=(288, 288), fill=255)],
[transforms.ToTensor(), None],
[transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), None],
[None, transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long()) ]
])
trainset = PascalVOCSegmentation(download=False,
joint_transform=train_transform,
split_mode=1)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=4, drop_last=True)
valid_transform = ComposeJoint(
[
[transforms.ToTensor(), None],
[transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), None],
[None, transforms.Lambda(lambda x: torch.from_numpy(np.asarray(x)).long()) ]
])
valset = PascalVOCSegmentation(train=False,
download=False,
joint_transform=valid_transform,
split_mode=1)
valset_loader = torch.utils.data.DataLoader(valset, batch_size=1,
shuffle=False, num_workers=2)
train_subset_sampler = torch.utils.data.sampler.SubsetRandomSampler(range(904))
train_subset_loader = torch.utils.data.DataLoader(dataset=trainset, batch_size=1,
sampler=train_subset_sampler,
num_workers=2)
# Define the validation function to track MIoU during the training
def validate():
fcn.eval()
overall_confusion_matrix = None
for image, annotation in valset_loader:
image = Variable(image.cuda())
logits = fcn(image)
# First we do argmax on gpu and then transfer it to cpu
logits = logits.data
_, prediction = logits.max(1)
prediction = prediction.squeeze(1)
prediction_np = prediction.cpu().numpy().flatten()
annotation_np = annotation.numpy().flatten()
# Mask-out value is ignored by default in the sklearn
# read sources to see how that was handled
current_confusion_matrix = confusion_matrix(y_true=annotation_np,
y_pred=prediction_np,
labels=labels)
if overall_confusion_matrix is None:
overall_confusion_matrix = current_confusion_matrix
else:
overall_confusion_matrix += current_confusion_matrix
intersection = np.diag(overall_confusion_matrix)
ground_truth_set = overall_confusion_matrix.sum(axis=1)
predicted_set = overall_confusion_matrix.sum(axis=0)
union = ground_truth_set + predicted_set - intersection
intersection_over_union = intersection / union.astype(np.float32)
mean_intersection_over_union = np.mean(intersection_over_union)
fcn.train()
return mean_intersection_over_union
def validate_train():
fcn.eval()
overall_confusion_matrix = None
for image, annotation in train_subset_loader:
image = Variable(image.cuda())
logits = fcn(image)
# First we do argmax on gpu and then transfer it to cpu
logits = logits.data
_, prediction = logits.max(1)
prediction = prediction.squeeze(1)
prediction_np = prediction.cpu().numpy().flatten()
annotation_np = annotation.numpy().flatten()
# Mask-out value is ignored by default in the sklearn
# read sources to see how that was handled
current_confusion_matrix = confusion_matrix(y_true=annotation_np,
y_pred=prediction_np,
labels=labels)
if overall_confusion_matrix is None:
overall_confusion_matrix = current_confusion_matrix
else:
overall_confusion_matrix += current_confusion_matrix
intersection = np.diag(overall_confusion_matrix)
ground_truth_set = overall_confusion_matrix.sum(axis=1)
predicted_set = overall_confusion_matrix.sum(axis=0)
union = ground_truth_set + predicted_set - intersection
intersection_over_union = intersection / union.astype(np.float32)
mean_intersection_over_union = np.mean(intersection_over_union)
fcn.train()
return mean_intersection_over_union
class Resnet18_16s(nn.Module):
def __init__(self, num_classes=1000):
super(Resnet18_16s, self).__init__()
# Load the pretrained weights, remove avg pool
# layer and get the output stride of 16
resnet18_16s = models.resnet50(fully_conv=True,
pretrained=True,
output_stride=8,
remove_avg_pool_layer=True,
additional_blocks=0)
# Randomly initialize the 1x1 Conv scoring layer
resnet18_16s.fc = nn.Conv2d(resnet18_16s.inplanes * 2, num_classes, 1)
self.resnet18_16s = resnet18_16s
self.reduction_pooled_1 = nn.Conv2d(resnet18_16s.inplanes, 512, 1)
self.reduction_pooled_2 = nn.Conv2d(resnet18_16s.inplanes, 512, 1)
self.reduction_pooled_3 = nn.Conv2d(resnet18_16s.inplanes, 512, 1)
self.reduction_pooled_4 = nn.Conv2d(resnet18_16s.inplanes, 512, 1)
self._normal_initialization(self.resnet18_16s.fc)
def _normal_initialization(self, layer):
layer.weight.data.normal_(0, 0.01)
layer.bias.data.zero_()
def forward(self, x):
input_spatial_dim = x.size()[2:]
x = self.resnet18_16s.conv1(x)
x = self.resnet18_16s.bn1(x)
x = self.resnet18_16s.relu(x)
x = self.resnet18_16s.maxpool(x)
x = self.resnet18_16s.layer1(x)
x = self.resnet18_16s.layer2(x)
x = self.resnet18_16s.layer3(x)
x = self.resnet18_16s.layer4(x)
fcn_features_spatial_dim = x.size()[2:]
pooled_1 = nn.functional.adaptive_avg_pool2d(x, 1)
pooled_1 = self.reduction_pooled_1(pooled_1)
pooled_1 = nn.functional.upsample_bilinear(pooled_1, size=fcn_features_spatial_dim)
pooled_2 = nn.functional.adaptive_avg_pool2d(x, 2)
pooled_2 = self.reduction_pooled_1(pooled_2)
pooled_2 = nn.functional.upsample_bilinear(pooled_1, size=fcn_features_spatial_dim)
pooled_3 = nn.functional.adaptive_avg_pool2d(x, 3)
pooled_3 = self.reduction_pooled_1(pooled_3)
pooled_3 = nn.functional.upsample_bilinear(pooled_1, size=fcn_features_spatial_dim)
pooled_4 = nn.functional.adaptive_avg_pool2d(x, 6)
pooled_4 = self.reduction_pooled_1(pooled_4)
pooled_4 = nn.functional.upsample_bilinear(pooled_1, size=fcn_features_spatial_dim)
x = torch.cat([x, pooled_1, pooled_2, pooled_3, pooled_4],
dim=1)
x = self.resnet18_16s.fc(x)
x = nn.functional.upsample_bilinear(input=x, size=input_spatial_dim)
return x
# In[2]:
#get_ipython().run_line_magic('matplotlib', 'notebook')
from matplotlib import pyplot as plt
# Create the training plot
loss_current_iteration = 0
loss_history = []
loss_iteration_number_history = []
validation_current_iteration = 0
validation_history = []
validation_iteration_number_history = []
train_validation_current_iteration = 0
train_validation_history = []
train_validation_iteration_number_history = []
f, (loss_axis, validation_axis) = plt.subplots(2, 1)
loss_axis.plot(loss_iteration_number_history, loss_history)
validation_axis.plot(validation_iteration_number_history, validation_history, 'b',
train_validation_iteration_number_history, train_validation_history, 'r')
loss_axis.set_title('Training loss')
validation_axis.set_title('MIoU on validation dataset')
plt.tight_layout()
# In[3]:
from sync_batchnorm import SynchronizedBatchNorm2d, DataParallelWithCallback
def make_batchnorm_syncronized(module):
for child_module_name, child_module in module.named_children():
if isinstance(child_module, nn.BatchNorm2d):
sync_bn = SynchronizedBatchNorm2d(child_module.num_features)
sync_bn.weight = child_module.weight
sync_bn.bias = child_module.bias
sync_bn.running_var = child_module.running_var
sync_bn.running_mean = child_module.running_mean
module.__setattr__(child_module_name, sync_bn)
fcn = Resnet18_16s(num_classes=21)
fcn.apply(make_batchnorm_syncronized)
fcn = DataParallelWithCallback(fcn, device_ids=[0, 1])
fcn.cuda()
fcn.train()
criterion = nn.CrossEntropyLoss(size_average=False).cuda()
optimizer = optim.Adam(fcn.parameters(), lr=0.0001)
# In[ ]:
best_validation_score = 0
loss_current_iteration = 0
iter_size = 20
for epoch in range(1000): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
img, anno = data
# We need to flatten annotations and logits to apply index of valid
# annotations. All of this is because pytorch doesn't have tf.gather_nd()
anno_flatten = flatten_annotations(anno)
index = get_valid_annotations_index(anno_flatten, mask_out_value=255)
anno_flatten_valid = torch.index_select(anno_flatten, 0, index)
# wrap them in Variable
# the index can be acquired on the gpu
img, anno_flatten_valid, index = Variable(img.cuda()), Variable(anno_flatten_valid.cuda()), Variable(index.cuda())
# zero the parameter gradients
optimizer.zero_grad()
adjust_learning_rate(optimizer, loss_current_iteration)
# forward + backward + optimize
logits = fcn(img)
logits_flatten = flatten_logits(logits, number_of_classes=21)
logits_flatten_valid = torch.index_select(logits_flatten, 0, index)
loss = criterion(logits_flatten_valid, anno_flatten_valid)
loss.backward()
optimizer.step()
# print statistics
running_loss += (loss.data.item() / logits_flatten_valid.size(0))
if i % 2 == 1:
loss_history.append(running_loss / 2)
loss_iteration_number_history.append(loss_current_iteration)
loss_current_iteration += 1
loss_axis.lines[0].set_xdata(loss_iteration_number_history)
loss_axis.lines[0].set_ydata(loss_history)
loss_axis.relim()
loss_axis.autoscale_view()
loss_axis.figure.canvas.draw()
loss_current_iteration += 1
running_loss = 0.0
current_validation_score = validate()
validation_history.append(current_validation_score)
validation_iteration_number_history.append(validation_current_iteration)
validation_current_iteration += 1
validation_axis.lines[0].set_xdata(validation_iteration_number_history)
validation_axis.lines[0].set_ydata(validation_history)
current_train_validation_score = validate_train()
train_validation_history.append(current_train_validation_score)
train_validation_iteration_number_history.append(train_validation_current_iteration)
train_validation_current_iteration += 1
validation_axis.lines[1].set_xdata(train_validation_iteration_number_history)
validation_axis.lines[1].set_ydata(train_validation_history)
validation_axis.relim()
validation_axis.autoscale_view()
validation_axis.figure.canvas.draw()
# Save the model if it has a better MIoU score.
if current_validation_score > best_validation_score:
torch.save(fcn.state_dict(), 'resnet_50_psp_check.pth')
best_validation_score = current_validation_score
print(best_validation_score)
print('Finished Training')
How to solve this issue?