import numpy as np
import pandas as pd
import os
import pickle
from glob import glob
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn as nn
import torch
import torchvision
import seaborn as sns
from tqdm import tqdm
from PIL import Image
from itertools import chain
import torch.nn.functional as F
import torch.optim as optim
from sklearn.utils import resample
import torch.optim.lr_scheduler as lr_scheduler
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score, \
multilabel_confusion_matrix, roc_curve, auc, classification_report
from torchsampler import ImbalancedDatasetSampler
# Device configuration GPU support for MAC
if torch.backends.mps.is_available():
mps_device = torch.device("mps")
else:
print("MPS device not found.")
# Paths to Images and DataEntry file
all_xray_df = pd.read_csv('NihXrayData/Data_Entry_2017_v2020.csv')
allImagesGlob = glob('NihXrayData/images*/images/*.png')
# eof
all_image_paths = {os.path.basename(x): x for x in
allImagesGlob}
# print('Scans found:', len(all_image_paths), ', Total Headers', all_xray_df.shape[0])
all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get)
all_xray_df.sample(3)
# # Data Pre Processing ####
# # Simplifying to 15 primary classes (adding No Finding as the 15th class)
condition_labels = ['Atelectasis', 'Consolidation', 'Infiltration', 'Pneumothorax', 'Edema', 'Emphysema', 'Fibrosis',
'Effusion', 'Pneumonia', 'Pleural_Thickening',
'Cardiomegaly', 'Nodule', 'Mass', 'Hernia', 'No Finding']
for label in condition_labels:
all_xray_df[label] = all_xray_df['Finding Labels'].map(lambda result: 1.0 if label in result else 0)
all_xray_df.head(20)
all_xray_df['disease_vec'] = all_xray_df.apply(lambda target: [target[condition_labels].values], 1).map(
lambda target: target[0])
all_xray_df.head()
print(all_xray_df[condition_labels].sum())
train_df, test_df = train_test_split(all_xray_df, test_size=0.30, random_state=2020)
class XrayDataset(torch.utils.data.Dataset):
def __init__(self, data_frame, transform=None):
self.data_frame = data_frame
self.transform = transform
def __getitem__(self, idx):
row = self.data_frame.iloc[idx]
address = row['path']
data = Image.open(address).convert('RGB')
label = np.array(row['disease_vec'], dtype=np.float32)
if self.transform:
data = self.transform(data)
return data, torch.FloatTensor(label)
def __len__(self):
return len(self.data_frame)
# Define data augmentation for training
train_transform = transforms.Compose([
# transforms.RandomResizedCrop(224),
transforms.Resize(224),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation(10),
transforms.RandomGrayscale(p=0.4),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
# Data Sets
train_dataset = XrayDataset(train_df, transform=train_transform)
test_dataset = XrayDataset(test_df, transform=transforms.Compose([
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]))
trainSampler = ImbalancedDatasetSampler(train_dataset)
# Data Loaders
train_loader = DataLoader(
train_dataset,
batch_size=64,
num_workers=0,
sampler=trainSampler,
shuffle=True,
)
test_loader = torch.utils.data.DataLoader(
test_dataset,
batch_size=64,
num_workers=0,
shuffle=False,
)
Traceback (most recent call last):
File "/Users/dougietownsell/PycharmProjects/Independent-Study/Xray.py", line 101, in <module>
trainSampler = ImbalancedDatasetSampler(train_dataset)
File "/Users/dougietownsell/PycharmProjects/Independent-Study/venv/lib/python3.9/site-packages/torchsampler/imbalanced.py", line 37, in __init__
df["label"] = self._get_labels(dataset) if labels is None else labels
File "/Users/dougietownsell/PycharmProjects/Independent-Study/venv/lib/python3.9/site-packages/torchsampler/imbalanced.py", line 61, in _get_labels
return dataset.get_labels()
AttributeError: 'XrayDataset' object has no attribute 'get_labels'
Hello I’m trying to use the ImbalancedDataSampler but I keep getting this error not sure how to fix it