Hey everyone. I’m dealing with an issue regarding data type mismatch, and I’m not sure how I could solve this. It says my inputs are of type float, but I have made sure to cast them to a double. Here is my dataset class:
class OsicDataset(Dataset):
def __init__(self, df_list):
self.df_list = df_list
self.seq_list = self.preprocess()
self.target = self.seq_list[0].columns[-1]
self.features = self.seq_list[0].columns[1:-1]
self.transforms = transforms.Compose([
transforms.ToTensor(),
])
def __len__(self):
return len(self.seq_list)
def __getitem__(self, idx):
X = torch.from_numpy(np.array(self.seq_list[idx][self.features].values)).float()
y = torch.from_numpy(np.array(self.seq_list[idx][self.target].values[-1])).float()
img_paths = glob.glob('/kaggle/input/osic-pulmonary-fibrosis-progression/train/' + self.seq_list[idx]['Patient'].values[0] + '/*.dcm')
img = self.get_img(np.random.choice(img_paths))
img = self.transforms(img)
return {
'X': X.type(torch.double),
'img': img.type(torch.double),
'y': y.type(torch.double),
}
def preprocess(self):
cleaned_df_list = []
for df in self.df_list:
df_clean = df.dropna()
df_clean['FVC'] = (df_clean['FVC'] - fvc_mean) / fvc_std
df_clean = self.shift(df_clean)
df_clean = df_clean.reset_index(drop=True)
cleaned_df_list.append(df_clean)
seq_list = []
for df in cleaned_df_list:
for i in range(len(df) - sequence_len):
seq = df.iloc[i:i+sequence_len]
seq_list.append(seq)
print(seq_list[0])
return seq_list
def shift(self, x: pd.DataFrame):
x['Weeks(t+1)'] = x['Weeks'].shift(-1)
x['FVC(t+1)'] = x['FVC'].shift(-1)
x = x.iloc[:-1]
return x
def get_img(self, path):
ds = dcmread(path)
pxls = (ds.pixel_array - ds.RescaleIntercept) / (ds.RescaleSlope * 1000)
return cv2.resize(pxls, (224, 224))
Any help in this regard would be greatly appreciated.