Hello, I am implementing an algorithm for object Detection. I have written a custom data loader as below:
def getTransform():
transformList = []
transformList += [transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))] #channel-wise, width-wise, and height-wise mean and standard deviation
return transforms.Compose(transformList)
class CreateDataLoader(Dataset):
#constructor
def __init__(self, root_dir, transforms=None):
self.root = root_dir
self.csvs = list(sorted(os.listdir(os.path.join(root_dir, "csv")))) #Get list of all csv files
self.images = list(sorted(os.listdir(os.path.join(root_dir, "images"))))#Get list of all images
self.transforms = transforms
def __getitem__(self, index):
# acquire image, label, its bounding box coordinates and the distance to object
imagePath = os.path.join(self.root, "images", self.images[index])
filename, ext = os.path.splitext(os.path.basename(imagePath))
csvFilename = filename.replace('camera', 'CSV')
#print(filename)
#print("For index ", index)
csvFile = os.path.join(self.root, "csv", (csvFilename + ".csv"))
#print(imagePath)
print(csvFile)
image = Image.open(imagePath).convert("RGB")
bboxes = []
objectLabels = []
distances = []
with open(csvFile, 'r') as read_obj:
csv_reader = csv.reader(read_obj)
for row in csv_reader:
objectLabel = row[0]
Xmin = np.array(row[1])
Ymin = np.array(row[2])
Xmax = np.array(row[3])
Ymax = np.array(row[4])
distance = np.array(row[5])
bbox = np.array([Xmin, Ymin, Xmax, Ymax], dtype=int)
bboxes.append(bbox)
objectLabels.append(int(objectLabel))
distances.append(distance)
distances = np.array(distances, dtype=float)
objectLabels = np.array(objectLabels, dtype=float)
#make everything to torch tensor, important question is it required for bounding boxes??
bboxes = torch.as_tensor(bboxes, dtype=torch.float32)
objectLabels = torch.as_tensor(objectLabels, dtype=torch.float32)
distances = torch.as_tensor(distances, dtype=torch.float32)
if self.transforms is not None:
image = self.transforms(image)
#permute to get channel as first dim CxHxW
#print(image.shape)
image = image.permute(2, 0, 1)
#print(image.shape)
return image, objectLabels, bboxes, distances
Further I get the data loader as below:
trainDataLoader = torch.utils.data.DataLoader(trainDataset, batch_size=config.BATCH_SIZE, shuffle=True,
num_workers=config.NUM_WORKER, collate_fn=utils.collate_fn)
In the training loop, I access the data loader as below:
for epoch in tqdm(range(config.NUM_EPOCHS)):
for i, data in enumerate(trainDataLoader, 0):
image, objectLabel, bboxe, distance = data
#place input to device which is GPU
image = image.to(config.DEVICE)
bboxe = bboxe.to(config.DEVICE)
distance = distance.to(config.DEVICE)
objectLabel = objectLabel.to(config.DEVICE)
However, when I execute the script, I get the error as below.
Traceback (most recent call last):
File ".\train.py", line 197, in <module>
run(opt)
File ".\train.py", line 94, in run
image = image.to(config.DEVICE)
AttributeError: 'tuple' object has no attribute 'to'
I do understand the Traceback, but I cannot understand why this behavior is happening. I have closely followed the example from Training a Classifier — PyTorch Tutorials 1.10.0+cu102 documentation.
Just for further information, every image has its corresponding csv file (containing class id, and bboxes).
Any help would be appreciated. Thanks.