import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from torch.utils.data import DataLoader
from torch import nn
import os
import pandas as pd
class CustomDataset(Dataset):
def __init__(self, featureDataFrame, targetDataFrame):
self.features = featureDataFrame
self.targets = targetDataFrame
def __len__(self):
return len(self.targets)
def __getitem__(self, idx):
feature = self.features.iloc[idx] #select a row from the dataframe
feature = torch.tensor(feature, dtype=torch.float32) #turn that row into a tensor
target = self.targets.iloc[idx]
target = torch.tensor(target,dtype=torch.long)
return feature, target #you can return more, but generally return your features and targets.
data = fetch_california_housing(as_frame=True)
X = data['data']
y= data['target']
test_dataset = CustomDataset(X, y)
test_dataloader = DataLoader(test_dataset, batch_size=10, num_workers=10, shuffle=False)
for x,y in test_dataloader:
print(x.shape,x.dtype)
print(y.shape,y.dtype)
break
class SimpleNetwork(nn.Module):
def __init__(self):
super().__init__()
self.linear_relu_stack = nn.Sequential(
nn.Linear(8,10),
nn.ReLU(),
nn.Linear(10, 1),
)
def forward(self, x):
logits = self.linear_relu_stack(x)
return logits
model = SimpleNetwork()
loss = nn.MSELoss()
for x,y in test_dataloader:
p = model(x)
print(p.dtype)
print(y.dtype)
print(p.shape)
print(y.shape)
print("This gives a warning")
print(loss(p,y)) # gives a warning
print("This does not!")
print(loss(p,y.unsqueeze(dim=1))) #no warning
break
Classification Example that fails:
class CustomDataset(Dataset):
def __init__(self, featureDataFrame, targetDataFrame):
self.features = featureDataFrame
self.targets = targetDataFrame
def __len__(self):
return len(self.targets)
def __getitem__(self, idx):
feature = self.features.iloc[idx] #select a row from the dataframe
feature = torch.tensor(feature, dtype=torch.float32) #turn that row into a tensor
target = self.targets.iloc[idx]
target = torch.tensor(target,dtype=torch.float)
return feature, target #you can return more, but generally return your features and targets.
data = load_breast_cancer(as_frame=True)
X = data['data']
y= data['target']
test_dataset = CustomDataset(X, y)
test_dataloader = DataLoader(test_dataset, batch_size=10, num_workers=10, shuffle=False)
for x,y in test_dataloader:
print(x.shape,x.dtype)
print(y.shape,y.dtype)
break
class SimpleNetwork(nn.Module):
def __init__(self):
super().__init__()
self.linear_relu_stack = nn.Sequential(
nn.Linear(30,10),
nn.ReLU(),
nn.Linear(10, 1),
)
def forward(self, x):
logits = self.linear_relu_stack(x)
return logits
model = SimpleNetwork()
loss = nn.BCEWithLogitsLoss()
for x,y in test_dataloader:
p = model(x)
print(p.dtype)
print(y.dtype)
print(p.shape)
print(y.shape)
try:
print(loss(p,y)) # gives a warning
except ValueError as e:
print("This doesn't work, wrong dims!")
try:
print("Unsqueeze works fine")
print(loss(p,y.unsqueeze(dim=1))) #no warning
except Error as e:
pass
break
I realize pandas throws all those warnings about indicies, but our grading sheets regularly come in csv which lend themselves to using pandas to transform data into tensors.