Hi, I am newbie to Pytorch, I am trying to build a multi label image classification model for some dance images, but I have two file one is images file and another one containing train csv file with images ids and target labels, please help me , how to prepare the data for cnn model.
here my data:
train.head()
|Image|target|
|0|96.jpg|manipuri|
|1|163.jpg|bharatanatyam|
|2|450.jpg|odissi|
|3|219.jpg|kathakali|
|4|455.jpg|odissi|
Here I was tried some code , but it not working.
lb=LabelEncoder()
train['encoded_labels']= lb.fit_transform(train['target'])
class MyDataset(Dataset):
def __init__(self , csv_file , img_dir , transforms=None ):
self.df = pd.read_csv(csv_file)
self.img_dir = img_dir
self.transforms = transforms
def __getitem__(self,idx):
d = self.df.iloc[idx.item()]
image = Image.open(self.img_dir/d.image).convert("RGB")
label = torch.tensor(self.img_data.loc[index, 'encoded_labels'])
if self.transforms is not None:
image = self.transforms(image)
return image,label
def __len__(self):
return len(self.df)
transform = transforms.Compose([transforms.Resize((224,224)) ,
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
dataset = MyDataset("dance//dataset//train.csv" , Path("dance//dataset//train") , transform)
batch_size=32
valid_no = int(len(dataset)*0.12)
trainset ,valset = random_split( dataset , [len(dataset) -valid_no ,valid_no])
print(f"trainset len {len(trainset)} valset len {len(valset)}")
dataloader = {"train":DataLoader(trainset , shuffle=True , batch_size=batch_size),
"val": DataLoader(valset , shuffle=True , batch_size=batch_size)}