Hello
so i have multiple files in my directory that begin with either P or C. I am trying to train a RNN to predict values of C given a sequence of P
Now each file has a signal. I will break the signal into smaller part each with dimension (sequence length, 1) as there is only feature. Ideally my output dimension should be something like (num_batches, batch_size, seq_length, features). However as i have multiple files, i get something like (num_files,num_batches, batch_size, seq_length, features)
Here’s my code
class MyDataset(Dataset):
def __init__(self, PATH, seq_length):
self.seq_length=seq_length
self.c_paths=[]
self.p_paths=[]
for i in os.scandir(PATH):
name=i.name
if name.split('.')[-1] == 'mat':
file_name = name.split('.')[0]
if 'C' in file_name:
self.c_paths.append(i.path)
if 'P' in file_name:
self.p_paths.append(i.path)
def __getitem__(self, index):
p_noise = sio.loadmat(self.p_paths[index])['P_noise']
cm = sio.loadmat(self.c_paths[index])['Cm']
inputs=[]
outputs=[]
start=0
for j in range (len(p_noise) - self.seq_length):
stop = start + self.seq_length
input = p_noise[start:stop]
output = cm[stop-1]
start += 1
inputs.append(input)
outputs.append(output)
inputs = torch.from_numpy(np.array(inputs).reshape((-1, self.seq_length,1)))
outputs= torch.from_numpy(np.array(outputs).reshape((-1, 1)))
self.x=inputs
self.y=outputs
return self.x, self.y
def __len__(self):
return len(self.c_paths)
PATH='Dataset'
dataset=MyDataset(PATH, seq_length=400)
dataloader = DataLoader(dataset=dataset, batch_size=2, shuffle=False)
datatiter=iter(dataloader)
data=datatiter.next()
x,y=data
x.shape, y.shape
Here’s the output:
(torch.Size([2, 99600, 400, 1]), torch.Size([2, 99600, 1]))