Thanks. Your comment jives with another error I get if I run the DataLoader on my custom dataset in shuffle mode (not an issue if Shuffle=False). Do I need to set a non-null defailt value to annotations, so that RandomSampler doesn’t run this with Null annotations? Bit out of my depth here, so appreciate any other pointers …
class SkinEczemaDataset(torch.utils.data.Dataset):
# annotations are the raw-json file exported thru LabelBox'es 'export labels'
def __init__(self, annotations, transforms=None):
#load the raw-json structure that contains both image and mask metadata
self.annotations = annotations
self.transforms = transforms
# as alluded to here - https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
# getitem should return a PIL image and a target dict consisting of boxes/labels/
def __getitem__(self, idx):
# load images ad masks
img_uri = annotations[idx]['Labeled Data'] # get image URI from raw json metadata
resp = urllib.request.urlopen(img_uri) #get image array
image_array = np.asarray(bytearray(resp.read()), dtype="uint8") #convert to numpy array
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR) #convert numpy array to opencv2 image
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #change color map to RGB
img = Image.fromarray(img) #translate from opencv to PIL image. what pytorch dataloader expects
mask_uri = annotations[idx]['Label']['objects'][0]['instanceURI']
#mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
resp = urllib.request.urlopen(mask_uri)
mask_image = np.asarray(bytearray(resp.read()), dtype="uint8")
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.annotations)
The error message is on using this dataset with a DataLoader in shuffle=True mode is as below …
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-21-91f1554f7cdb> in <module>()
27 data_loader = torch.utils.data.DataLoader(
28 dataset, batch_size=2, shuffle=True, num_workers=2,
---> 29 collate_fn=utils.collate_fn)
30
31 data_loader_test = torch.utils.data.DataLoader(
1 frames
/usr/local/lib/python3.7/dist-packages/torch/utils/data/sampler.py in __init__(self, data_source, replacement, num_samples, generator)
101 if not isinstance(self.num_samples, int) or self.num_samples <= 0:
102 raise ValueError("num_samples should be a positive integer "
--> 103 "value, but got num_samples={}".format(self.num_samples))
104
105 @property
ValueError: num_samples should be a positive integer value, but got num_samples=0