Hi everyone,
I have a convolutional network that gets images, but also a colored border on each image for additional information input to the network. Now I want to calculate the loss, but the usual loss function will also take the predicted border into account. The border is completely random, and is just an input to the system. I don’t want the model to think it has performed badly, when it predicted the wrong color.
How can I get the smaller images out of the stacked tensor list.
In particular, I have 6 images, converted to a tensor ( img_tensor = ToTensor()(img).float() ) and put in a list: imgs.append(img_tensor). So all 6 images as tensor in a list. Then this is going to be a tensor too: imgs = torch.stack(imgs, dim=0)
This happens in the DataLoader.getitem:
def __getitem__(self, index):
path = self.input_data[index]
imgs_path = sorted(glob.glob(path + '/*.png'))
#read light conditions
lightConditions = []
with open(path +"/lightConditions.json", 'r') as file:
lightConditions = json.load(file)
#shift light conditions
lightConditions.pop(0)
lightConditions.append(False)
frameNumber = 0
imgs = []
for img_path in imgs_path:
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
im_pil = Image.fromarray(img)
#img = cv2.resize(img, (256,448))
if lightConditions[frameNumber] ==False:
imgBorder = ImageOps.expand(im_pil,border = 6, fill='black')
else:
imgBorder = ImageOps.expand(im_pil, border = 6, fill='orange')
img = np.asarray(imgBorder)
img = cv2.resize(img, (256,448))
#img = cv2.resize(img, (0, 0), fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC) #has been 0.5 for official data, new is fx = 2.63 and fy = 2.84
img_tensor = ToTensor()(img).float()
imgs.append(img_tensor)
frameNumber +=1
imgs = torch.stack(imgs, dim=0)
return imgs
In the training, the stacked 6 image tensor is read and divided in 3 images to use as an input and 3 as targets.
for idx_epoch in range(startEpoch, nEpochs):
#set epoch in dataloader for right shuffle ->set seed really random
val_loader.sampler.set_epoch(idx_epoch)
#Remember time for displaying time for epoch
startTimeEpoch = datetime.now()
i = 0
if processGPU==0:
running_loss = 0
beenValuated = False
for index, data_sr in enumerate(train_loader):
#Transfer Data to GPU but don't block other processes because this only effects this single process
data_sr = data_sr.cuda(processGPU, non_blocking=True)
startTimeIteration = time.time()
#Remove all dimensions of size 1
data_sr = data_sr.squeeze()
# calculate the index of the input images and GT images
num_f = len(data_sr)
#If model_type is 0 -> only calculate one frame that is marked with gt
if cfg.model_type == 0:
idx_start = random.randint(-2, 2)
idx_all = list(np.arange(idx_start, idx_start + num_f).clip(0, num_f - 1))
idx_gt = [idx_all.pop(int(num_f / 2))]
idx_input = idx_all
#Else when model_type is 1 then input frames 1,2,3 and predict frame 4 to number of cfg.dec_frames. Set all images that will be predicted to 'gt' images
else:
idx_all = np.arange(0, num_f)
idx_input = list(idx_all[0:4])
idx_gt = list(idx_all[4:4+cfg.dec_frames])
imgs_input = data_sr[idx_input]
imgs_gt = data_sr[idx_gt]
# get predicted result
imgs_pred = model(imgs_input)
so, we use cfg.model_type = 1. This model will give me new images with also a colored border. And usually here follows a loss calculation:
loss = criterion_mse(imgs_pred, imgs_gt)
But I can no longer use this. How to get only the inside of the images.