Hello,
I am trying to customize the finetuning tutorial to my Datas. I have created a dataset class. I try to train the network but the program runs out of memory. The memory usage increases after every batch.
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
class PennFudanDataset(torch.utils.data.Dataset):
def __init__(self, root, transforms=None):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "input"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "gt"))))
def __getitem__(self, idx):
# load images ad masks
img_path = os.path.join(self.root, "input", self.imgs[idx])
mask_path = os.path.join(self.root, "gt", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
obj_ids=[]
class_ids=[]
kontrol= np.zeros(4)
mymask = np.zeros((img.size[1], img.size[0]))
mask = np.array(mask)
denemece=np.where(mask[:][:]!=kontrol)
# baskadenemece=np.where(mask[:][:]==np.array(mask[denemece[0][0]][denemece[1][0]]))
boxes = []
while len(denemece[0])!=0:
kontrol1 =np.array(mask[denemece[0][0]][denemece[1][0]],dtype=float)
ilkeslesme=np.where(np.all(mask[:][:]==kontrol1,axis=-1))
n=(mask[ilkeslesme[0][0]][ilkeslesme[1][0]][1]*256)+mask[ilkeslesme[0][0]][ilkeslesme[1][0]][2]
obj_ids.append(n)
class_ids.append(mask[ilkeslesme[0][0]][ilkeslesme[1][0]][0])
for j in range(len(ilkeslesme[0])):
mymask[ilkeslesme[0][j]][ilkeslesme[1][j]] = n
mask[ilkeslesme[0][j]][ilkeslesme[1][j]]=kontrol
denemece=np.where((mask[:][:]!=kontrol1)&(mask[:][:]!=kontrol))
# split the color-encoded mask into a set
#print(mask[baskadeneme[-1][0]+1][baskadeneme[-1][0]+1] == kontrol1)
xmin = np.min(ilkeslesme[1])
xmax = np.max(ilkeslesme[1])
ymin = np.min(ilkeslesme[0])
ymax = np.max(ilkeslesme[0])
boxes.append([xmin, ymin, xmax, ymax])
obj_ids=np.array(obj_ids)
# of binary masks
masks = mymask == obj_ids[:, None, None]
num_objs=len(obj_ids)
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
#labels = torch.ones((num_objs,), dtype=torch.int64)
labels = torch.as_tensor(np.array(class_ids), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
del img_path,mask_path,mask,obj_ids,class_ids,kontrol,mymask,denemece,boxes,kontrol1,ilkeslesme,n,xmin,xmax,ymin,ymax,masks,num_objs,labels,image_id,area,iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
the rest is the same as https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html .
Thanks in advice
Muhammet