I try to create a custom dataset.
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 21 22:00:45 2019
@author: melik
"""
import pandas as pd
data = pd.read_csv('data.csv')
import requests
import re
import numpy as np
from torch.utils.data.dataset import Dataset
from torchvision import transforms
import torch
from torchvision import transforms, datasets
# getem.py
# python2 script to download all images in a given url
# use: python getem.py http://url.where.images.are
import os
from PIL import Image
import requests
from io import BytesIO
a = []
def web(x):
for i,each in enumerate(x):
try:
response = requests.get(each)
img = Image.open(BytesIO(response.content))
img.save('image'+str(i)+'.png')
except:
pass
if i == 100:
break
folder = os.path.join('images')
images = []
for i,filename in enumerate(os.listdir(folder)):
images.append(filename)
if i ==99:
break
label = np.linspace(0,99,100)
df = pd.DataFrame({'images':images,'labels':label})
df.to_csv('out.csv', encoding='utf-8', index=False)
class CustomDatasetFromImages(Dataset):
def __init__(self, csv_path):
"""
Args:
csv_path (string): path to csv file
img_path (string): path to the folder where images are
transform: pytorch transforms for transforms and tensor conversion
"""
# Transforms
self.crop = transforms.RandomSizedCrop(48)
self.to_tensor = transforms.ToTensor()
# Read the csv file
self.data_info = pd.read_csv(csv_path,sep=',')
# First column contains the image paths
self.image_arr = np.asarray(self.data_info.iloc[:, 0])
# Second column is the labels
self.label_arr = np.asarray(self.data_info.iloc[:, 1])
# Third column is for an operation indicator
self.data_len = len(self.data_info.index)
def __getitem__(self, index):
# Get image name from the pandas df
os.chdir('images')
single_image_name = self.image_arr[index]
# Open image
img_as_img = Image.open(single_image_name)
# Check if there is an operation
img_as_tensor = self.to_tensor(img_as_img)
img_as_tensor = img_as_tensor[1,:,:]
img_as_tensor = img_as_tensor.unsqueeze(0)
# Get label(class) of the image based on the cropped pandas column
single_image_label = self.label_arr[index]
return (img_as_tensor, single_image_label)
def __len__(self):
return self.data_len
a = CustomDatasetFromImages('out.csv')
dataset_loader = torch.utils.data.DataLoader(a,
batch_size=4, shuffle=True,
num_workers=0)
** I can succesfully run iter but when I input dataset_loader.next() kernel
never stops.
Thanks for your help**