flummery
(Zheng Yao)
#1
I’m getting the following error:
KeyError: tensor(0)
This happens after running the following for loop:
train_dataset = CovidDataset(train, transform=torchvision.transforms.ToTensor())
print(f"Original Train dataset length: {train_dataset.__len__()}")
indices = torch.arange(3000)
train_dataset = torch.utils.data.Subset(train_dataset, indices)
print(f"New Train dataset length: {train_dataset.__len__()}")
for idx, (data, img) in enumerate(train_dataset):
print(idx)
However when I try it on CIFAR dataset, it works fine:
# import the required modules
import torch
import torchvision
from torchvision.datasets import CIFAR10
from collections import Counter
trainset = CIFAR10(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor())
indices = torch.arange(3000)
trainset = torch.utils.data.Subset(trainset, indices)
for idx, (data, img) in enumerate(trainset):
print(idx)
Why does it work on CIFAR dataset but not on my dataset?
flummery
(Zheng Yao)
#2
Full Code:
Link to dataset: COVIDx CT | Kaggle
import torch
import torchvision
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import PIL
import os
# Convert .txt to .csv
TXT_FILE_PATH = r"C:\Users\ASUS\Machine Learning\data"
train = pd.read_csv(TXT_FILE_PATH + r"\train_COVIDx-CT.txt", sep=" ")
train.columns=['filename', 'label', 'xmin','ymin','xmax','ymax']
train=train.drop(['xmin', 'ymin','xmax', 'ymax'], axis=1 )
# Put file path to start of image so that it can be loaded
IMG_FILE_PATH = "C:\\Users\\ASUS\\Machine Learning\\data\\images\\"
train['filename'] = IMG_FILE_PATH + train['filename']
# Hyperparameters
BATCH_SIZE = 32
IMG_SIZE = (224,224)
learning_rate = 0.001
num_epochs = 10
in_channels = 1
# Custom Dataset
class CovidDataset(torch.utils.data.Dataset):
def __init__(self, file_type, transform=None):
self.file_type = file_type
self.transform = transform
def __getitem__(self, idx):
img_name = self.file_type['filename'][idx]
img = PIL.Image.open(img_name)
label = self.file_type['label'][idx]
if self.transform:
img = self.transform(img)
return img, label
def __len__(self):
return len(self.file_type)
train_dataset = CovidDataset(train, transform=torchvision.transforms.ToTensor())
print(f"Original Train dataset length: {train_dataset.__len__()}")
indices = torch.arange(3000)
train_dataset = torch.utils.data.Subset(train_dataset, indices)
print(f"New Train dataset length: {train_dataset.__len__()}")
for idx, (data, img) in enumerate(train_dataset):
print(idx)