The reason I beleive it is not learning is that it always predicts 0.
class LSTM(nn.Module):
def __init__(self, num_emb, output_size, num_layers=1, hidden_size=128):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
# Create an embedding for each token
self.embedding = nn.Embedding(num_emb, 500)
self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size,
num_layers=num_layers, batch_first=True, dropout=0.5)
self.fc_out = nn.Linear(hidden_size, output_size)
def forward(self, input_seq):
input_embs = self.embedding(input_seq)
self.memory_a = False
if self.memory_a:
output, (hidden_out, mem_out) = self.lstm(input_embs, (self.hidden, self.memory))
else:
hidden = torch.zeros(self.num_layers, 512, self.hidden_size, device=device)
memory = torch.zeros(self.num_layers, 512, self.hidden_size, device=device)
output, (hidden_out, mem_out) = self.lstm(input_embs, (hidden, memory))
self.hidden = hidden_out
self.memory = mem_out
self.memory_a = True
return self.fc_out(output)
def objective(trial):
input_size = 500
hidden_size = 500
output_size = 1
num_layers = 2
model = LSTM(num_emb=32000, output_size=1,
num_layers=2, hidden_size=input_size).to(device)
# Hyperparameters to tune
learning_rate = trial.suggest_categorical('learning_rate', [1e-5, 1e-4, 1e-3, 1e-2])
beta_1 = trial.suggest_categorical('beta_1', [0.85, 0.9, 0.95])
beta_2 = trial.suggest_categorical('beta_2', [0.9, 0.95, 0.995, 0.996, 0.997, 0.998, 0.999])
epsilon = trial.suggest_categorical('epsilon', [1e-8, 1e-7, 1e-6, 1e-5, 1e-4])
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(beta_1, beta_2), eps=epsilon)
loss_fn = nn.BCEWithLogitsLoss()
train_acc = 0
test_acc = 0
bs = 512
for epoch in range(10):
model.train()
training_loss_logger = []
training_acc_logger = []
steps = 0
for batch_idx, batch in tqdm(enumerate(train_loader, 1), desc="Training", total=len(train_loader)):
text, _, labels = batch
text, labels = text.to(device), labels.to(device)
pred = model(text)
loss = loss_fn(pred[:, -1, :], labels.float().unsqueeze(1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
training_loss_logger.append(loss.item())
train_acc += (pred[:, -1, :].argmax(1) == labels).sum()
steps += bs
train_acc = (train_acc/steps).item()
training_acc_logger.append(train_acc)
print(f"VAL LOSS: {np.mean((training_acc_logger))}")
model.eval()
test_loss_logger = []
test_acc_logger = []
steps = 0
with torch.no_grad():
for batch_idx, batch in tqdm(enumerate(val_loader, 1), desc="Testing", total=len(val_loader)):
text, _, labels = batch
text, labels = text.to(device), labels.to(device)
bs = labels.shape[0]
pred = model(text)
loss = loss_fn(pred[:, -1, :], labels.float().unsqueeze(1))
test_loss_logger.append(loss.item())
test_acc += (pred[:, -1, :].argmax(1) == labels).sum()
steps += bs
test_acc = (test_acc/steps).item()
test_acc_logger.append(test_acc)
print(f"VAL LOSS: {np.mean((test_loss_logger))}")
print(f"VAL ACC: {test_acc}")
print(f"pred: {pred[:, -1, :].argmax(1)}")
print(f"labels: {labels}")
trial.set_user_attr("val_loss", np.mean((test_loss_logger)))
trial.set_user_attr("accuracy", test_acc)
trial.set_user_attr("model", model)
trial.set_user_attr("hyperparameters", {'learning_rate': learning_rate, 'beta_1': beta_1, 'beta_2': beta_2, 'epsilon': epsilon})
print(f"Used Hyperparameters: 'learning_rate': {learning_rate}, 'beta_1': {beta_1}, 'beta_2': {beta_2}, 'epsilon': {epsilon}")
return np.mean((test_loss_logger))