Hello,
I’ve got a custom dataset:
class FlowersDataset(Dataset):
def __init__(self, transform, test=False, csv_path=""):
self.test = test
if not test:
self.train_df = pd.read_csv(csv_path, sep=",")
else:
self.test_list = test_paths
self.transform = transform
def __len__(self):
if self.test:
return len(self.test_list)
else:
return len(self.train_df)
def label_count(self):
return len(self.train_df)
def __getitem__(self, idx):
# We're returning paid (image, class) from the df
if torch.is_tensor(idx):
index = idx.tolist
if not self.test:
img_name = os.path.join(self.train_df.iloc[idx, 0])
image = Image.open(ROOT + img_name)
label = self.train_df.iloc[idx, 1]
else:
img_name = os.path.join(self.test_list[idx])
image = Image.open(img_name)
label = self.test_list[idx]
if self.transform:
image = self.transform(image)
return image, label, img_name, idx
I’ve trained my model with high Val accuracy and Train accuracy:
[INFO] EPOCH: 100/100
Train loss: 0.084340, Train accuracy: 0.9770
Val loss: 0.261234, Val accuracy: 0.9490
So now I’m trying to write the predictions to a .csv for a kaggle competition submission. However, my score is very low, around 20%.
Here’s how I’m writing to a .csv.
# finish measuring how long training took
endTime = time.time()
print("[INFO] total time taken to train the model: {:.2f}s".format(
endTime - startTime))
# we can now evaluate the network on the test set
print("[INFO] evaluating network...")
# turn off autograd for testing evaluation
with torch.no_grad():
# set the model in evaluation mode
model.eval()
# initialize a list to store our predictions
paths = []
# loop over the test set
with open('submission.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(['file_path', 'label'])
for (x, y, img_paths, idx) in test_loader:
preds = []
# send the input to the device
x = x.to(device)
# make the predictions and add them to the list
preds += model(x).argmax(dim=1).tolist()
for i in range(len(idx)):
writer.writerow([img_paths[i].replace("/kaggle/input/com3025-2023-challenge1/", ""), str(preds[i])])
My batch size is 20, so I understand when I iterate over the dataloaders, image, label, img_name, idx
are returned in tuples of 20. Therefore, I look through the tuple of idx to line the preds[] up with their image path.
Here’s the datasets. I then load these into Dataloaders.
train_data = FlowersDataset(csv_path='/kaggle/input/com3025-2023-challenge1/train.csv', transform=train_transform)
test_data = FlowersDataset(transform=test_transform, test=True)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE,
num_workers=WORKERS, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE,
num_workers=WORKERS)
valDataLoader = torch.utils.data.DataLoader(valData, batch_size=BATCH_SIZE)