Hello,
I’m running into troubles while training a U-net like model. I defined my own dataset class as follows:
class CARVANA(Dataset):
"""
CARVANA dataset that contains car images as .jpg. Each car has 16 images
taken in different angles and a unique id: id_01.jpg, id_02.jpg, ..., id_16.jpg
The labels are provided as a .gif image that contains the manually cutout mask
for each training image
"""
def __init__(self, root, train=True, transform=None):
"""
:param root: it has to be a path to the folder that contains the dataset folders
:param train: boolean true if you want the train set false for the test one
:param transform: transform the images and labels
"""
# initialize variables
self.root = os.path.expanduser(root)
self.transform = transform
self.train = train
self.data, self.labels = [], []
def load_images(path, data):
"""
loads all the images in path and stores them in data.
:param path:
:param data:
:return: tensor with all the images from path loaded
"""
# read path content
images_dir = [f for f in os.listdir(path) if isfile(join(path, f))]
images_dir.sort()
# load images
for image in tqdm(images_dir, desc="loading data"):
data.append(Image.open(join(path, image)))
return data
if self.train:
self.data = load_images(self.root + "/train", self.data)
self.labels = load_images(self.root + "/train_masks", self.labels)
else:
self.data = load_images(self.root + "/test", self.data)
self.labels = None
def __getitem__(self, index):
"""
:param index:
:return: tuple (img, target) with the input data and its label
"""
# load image and labels
img = self.data[index]
target = self.labels[index] if self.train else None
# apply transforms to both
if self.transform is not None:
img = self.transform(img)
target = self.transform(target)
return img, target
def __len__(self):
return len(self.data)
Then I load it using:
train_dataset = dsets.CARVANA(root="./data/",
train=True,
transform=transforms.Compose([
transforms.Scale(256),
transforms.RandomCrop(256),
transforms.ToTensor(),
])
)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True,
pin_memory=False,
num_workers=1)
And finally, I run a simple training loop
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Convert torch tensor to Variable
images = Variable(images.cuda())
labels = Variable(labels.cuda())
# Forward + Backward + Optimize
optimizer.zero_grad() # zero the gradient buffer
outputs = unet(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
gc.collect()
I have tracked which lines increase the RAM usage. In particular, only in for i, (images, labels) in enumerate(train_loader):
the usage increases about 200MB each iteration making it really slow after a few (I just have 32GB). I tried to make both images, labes = None, None
before gc.collect()
but it did not help.
Any ideas on what the problem could be?
Probably it is a stupid mistake I am doing while loading the batch in the loop since I am pretty new to pytorch, but I could not find any similar error in the forum (error looks similar to Data loaders, memory issues and circular references).
Thanks in advance!
Manu.