Loading the data.
Due to the resource constraints, the data and knowledge features have already been computed.
There are 1696 training data, 425 test data
Shape of X: (4, 3000) = (#channels, n)
Shape of beat feature: (4, 3000) = (#channels, n)
Shape of rhythm feature: (4, 60) = (#channels, M)
Shape of frequency feature: (4, 1) = (#channels, 1)
There are 1696 training samples
ie: len(train_dict[‘Y’] = 1696 training samples
There are 425 test samples
ie: len(test_dict[‘Y’] = 425
Shape of X,
ie: train_dict[‘X’][:, 0,:]: (4, 3000) = (#channels, n)
Shape of beat feature,
ie: train_dict[‘K_beat’][:, 0, :]: (4, 3000) = (#channels, n)
Shape of rhythm feature,
ie: train_dict[‘K_rhythm’][:, 0, :]: (4, 60) = (#channels, M)
Shape of frequency feature:
ie: train_dict[‘K_freq’][:, 0, :]: (4, 1) = (#channels, 1)
We need to define a ECGDataset class, and then define the DataLoader as well.
I have for i in range(len(batch[i])):
, but this gives me an error because the 2nd i is not initialized. If I change all instances of batch[i] to batch. I get an error because a tensor is expected where theres a tuple. I tried converting batch to a tensor in every way mentioned in convert-a-tuple-into-tensor and they all resulted in errors.
from torch.utils.data import Dataset
class ECGDataset(Dataset):
def __init__(self, data_dict):
"""
TODO: init the Dataset instance.
"""
# code
self.dataset = data_dict
# END
def __len__(self):
"""
TODO: Denotes the total number of samples
"""
# CODE
return len(self.dataset['Y'])
#return len(self.y)
# END
def __getitem__(self, i):
"""
TODO: Generates one sample of data as?
return the ((X, K_beat, K_rhythm, K_freq), Y) for the i-th data.
Do not return ((X, K_beat, K_rhythm, K_freq), Y)
Be careful which dimension you are indexing.
"""
# CODE
return ((torch.tensor(self.dataset['X'][:, i, :], dtype = float),
torch.tensor(self.dataset['K_beat'][:, i, :], dtype = float),
torch.tensor(self.dataset['K_rhythm'][:, i, :], dtype = float),
torch.tensor(self.dataset['K_freq'][:, i, :], dtype = float)),
torch.tensor(self.dataset['Y'][i], dtype=torch.long))
# END
Return a DataLoader instance basing on a Dataset instance, with batch_size specified.
Note that since the data has already been shuffled, we set shuffle=False`
from torch.utils.data import DataLoader
def load_data(dataset, batch_size=128):
def my_collate(batch):
"""
param: batch: this is essentially [dataset[i] for i in [...]]
batch[i] should be ((Xi, Ki_beat, Ki_rhythm, Ki_freq), Yi)
TODO: write a collate function such that it outputs ((X, K_beat, K_rhythm, K_freq), Y)
each output variable is a batched version of what's in the input *batch*, essentially
[dataset[i] for i in [...]]
each output variable should be either float tensor, except Y is long tensor.
"""
from torch.utils.data import Dataset
class ECGDataset(Dataset):
def __init__(self, data_dict):
"""
TODO: init the Dataset instance.
"""
# code
self.dataset = data_dict
# END
def __len__(self):
"""
TODO: Denotes the total number of samples
"""
# CODE
return len(self.dataset['Y'])
#return len(self.y)
# END
def __getitem__(self, i):
"""
TODO: Generates one sample of data as?
return the ((X, K_beat, K_rhythm, K_freq), Y) for the i-th data.
Do not return ((X, K_beat, K_rhythm, K_freq), Y)
Be careful which dimension you are indexing.
"""
# CODE
return ((torch.tensor(self.dataset['X'][:, i, :], dtype = float),
torch.tensor(self.dataset['K_beat'][:, i, :], dtype = float),
torch.tensor(self.dataset['K_rhythm'][:, i, :], dtype = float),
torch.tensor(self.dataset['K_freq'][:, i, :], dtype = float)),
torch.tensor(self.dataset['Y'][i], dtype=torch.long))
# If applicable, channel dim precedes batch dim
# e.g. the shape of each Xi is (# channels, n). In the output, X should be of shape (# channels, batch_size, n)
"""
# CODE
# collect X1, X2, ... , X(LEN(BATCH), each w size = (# channels, n) = (4, 3000) into X.shape:(4, LEN(BATCH), 3000)
# create empty float tensors:
# X.shape:(4, 0, 3000),
# K_beat.shape(4, 0, 3000),
# K_rhythm(4, 0, 60),
# K_freq.shape(4, 0, 1)
# create empty long tensor Y(bs,)
# append batch to tensors until len(batch)
X = torch.empty((4, 0, 3000), dtype=torch.float)
K_beat = torch.empty((4, 0, 3000), dtype=torch.float)
K_rhythm = torch.empty((4, 0, 60), dtype=torch.float)
K_freq = torch.empty((4, 0, 1), dtype=torch.float)
Y = torch.empty((4, len(batch), 3000), dtype=torch.long)
#batch_after = []
for i in range(len(batch[i])):
(X, K_beat, K_rhythm, K_freq), Y = torch.cat((((X,
K_beat,
K_rhythm,
K_freq
), Y
), batch[i]
), dim = 1
)
# END
# ANOTHER APPROACH
#(list_X, list_K_beat, list_K_rhythm, list_K_freq), list_Y = zip(batch)
# #Create all individual tensors
# X = torch.tensor(list_X)
# K_beat = torch.tensor(list_K_beat)
# K_rhythm = torch.tensor(list_K_rhythm)
# K_freq = torch.tensor(list_K_freq)
# Y = torch.tensor(list_Y)
# => ValueError: too many values to unpack (expected 2)
return (X, K_beat, K_rhythm, K_freq), Y
return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=my_collate)
train_loader = load_data(ECGDataset(train_dict))
test_loader = load_data(ECGDataset(test_dict))
I tried this and got this error:
78 for i in range(len(batch)):
---> 79 (X, K_beat, K_rhythm, K_freq), Y = torch.cat((torch.tensor(((X,
80 K_beat,
81 K_rhythm,
82 K_freq
83 ), Y
84 )), batch
85 ), dim = 1
86 )
87 # END
89 return (X, K_beat, K_rhythm, K_freq), Y
ValueError: only one element tensors can be converted to Python scalars.
I dont know what that means. Where am I converting a tensor to a scalar when I want to convert something into a tensor?