I am trying to implement this paper.
I have written the following code but since this is my first try, I am not sure about the code I have written.
class myDataset(Dataset):
...
def __getitem__(self, idx):
self.item = self.sequences_1[idx] + self.sequences_2[idx]
return self.item, self.labels[idx], self.vectorize
def collate_fn(data):
items, labels, vectorizes = zip(*data)
labels = torch.tensor(labels)
items = torch.tensor(items)
return items.double(), labels.double(), vectorizes
def fetch_dataloader(vectorize, data):
shuffle_dataset = True
random_seed= 42
batch_size = 4
dataset_size = len(data)
indices = list(range(len(data)))
split = 7000
dataset = myDataset(vectorize, data)
trainset, valset = random_split(dataset, [dataset_size-split , split])
train_loader = DataLoader(trainset, batch_size=10, shuffle=True, collate_fn=collate_fn)
validation_loader = DataLoader(valset, batch_size=10, shuffle=True, collate_fn=collate_fn)
dataloaders = {"train":train_loader, "test":validation_loader}
return dataloaders
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
self.encoder_softmax = nn.Sequential(
nn.Linear(686, 256),
nn.ReLU(True),
nn.Linear(256, 2),
nn.Softmax()
)
def forward(self, x):
x = self.encoder_softmax(x)
return x
def train():
data = InitializeDataLoader(num_test=7000)
dataloaders = fetch_dataloader('tri', data)
train_dl = dataloaders['train']
test_dl = dataloaders['test']
net = autoencoder()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
net = net.to(device)
iterations = 10
learning_rate = 0.98
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
net.parameters(), lr=learning_rate, weight_decay=1e-5)
for epoch in range(iterations):
loss = 0.0
print("train_dl len: ", len(train_dl))
# net.train()
for i, data in enumerate(train_dl, 0):
inputs, labels, vectorize = data
labels = labels.long().to(device)
inputs = inputs.float().to(device)
optimizer.zero_grad()
outputs = net(inputs)
train_loss = criterion(outputs, labels)
train_loss.backward()
optimizer.step()
loss += train_loss.item()
loss = loss / len(train_dl)
print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, iterations, loss))
# create checkpoint variable and add important data
checkpoint = {
'epoch': epoch + 1,
'state_dict': net.state_dict(),
'optimizer': optimizer.state_dict(),
}
# save checkpoint
checkpoint_path = "./checkpoint/current_checkpoint.pt"
best_model_path = "./best_model/best_model.pt"
save_ckp(checkpoint, False, checkpoint_path, best_model_path)
return net
I have emitted some parts of code that didn’t seem important. I would be happy to share them too, if it helps.