Hello,
I want to implement the Siamese Neural Networks approach with Pytorch. The approach requires two separate inputs (left and right). My data is split into train and test. I would like to use the entire data set for model training.
For this purpose, I created a custom dataset class. In order to use all data, there is a separate dataset and dataloader instance for each left and right and train and test. However, this means that the data set has to be stored twice in the data loaders. Which leads to memory problems.
Is there a way to do this with just one pair of loaders (for train and test)? The source code is roughly as follows.
Thanks for help.
import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
class MyDataset(Dataset):
def __init__(self, np_X, np_Y):
self.np_X = np_X
self.np_Y = np_Y
self.len = len(self.np_X)
def __getitem__(self, index):
to_X = torch.tensor(self.np_X[index])
to_Y = torch.tensor(self.np_Y[index])
return to_X, to_Y
def __len__(self):
return self.len
### Load Data from File System
my_X_train, my_X_test, my_Y_train, my_Y_test = LoadData()
### Dataset
# left
train_set_left = MyDataset(np_X=my_X_train, np_Y=my_Y_train)
test_set_left = MyDataset(np_X=my_X_test, np_Y=my_Y_test)
# right
train_set_right = MyDataset(np_X=my_X_train, np_Y=my_Y_train)
test_set_right = MyDataset(np_X=my_X_test, np_Y=my_Y_test)
### DataLoader
# left
train_loader_left = DataLoader(dataset=train_set_left, batch_size=self._batch_size, shuffle=True)
test_loader_left = DataLoader(dataset=test_set_left, batch_size=self._batch_size, shuffle=False)
# right
train_loader_right = DataLoader(dataset=train_set_right, batch_size=self._batch_size, shuffle=True)
test_loader_right = DataLoader(dataset=test_set_right, batch_size=self._batch_size, shuffle=False)
The training loop for a single epoch is as follows:
for i, (data_left, data_right) in (enumerate(zip(self.data_loader_left, self.data_loader_right)):
# Code