I have attached the code below.
No matter what size of model, or any hyperparameter change, it doesn’t seem to learn and I can’t work out why. Any help/guidance would be appreciated.
DOESN’T WORK:::
def training_loop(
num_classes: int,
model_selection: list,
num_epochs: int = 10,
lr: float = 0.1,
batch_size: int = 64,
max_seq_len: int = 128,
):
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device is", device, end="\n\n")
# Mapping
amino_acids = ["A","C","D","E","F","G","H","I","K","L",
"M","N","P","Q","R","S","T","V","W","Y","X"]
mapping = {aa:i + 1 for i, aa in enumerate(amino_acids)}
mapping.update({'X': 21, 'U': 21, 'B': 21, 'O': 21, 'Z': 21})
# Creating the datasets
train_dataset = PfamDataset("train", 100, mapping, max_seq_len)
validation_dataset = PfamDataset("validation", 100, mapping, max_seq_len)
# Creating the dataloaders
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
num_workers=0, shuffle=True)
validation_dataloader = torch.utils.data.DataLoader(validation_dataset,
batch_size=batch_size,
num_workers=0)
# Initiating the model
if model_selection[0] == "fc":
model = FullyConnected(max_seq_len, model_selection[1], num_classes).to(device)
print("Model Design (TF Format)")
summary(model, input_size=(1, max_seq_len))
print("\n\n")
# Defining the loss function and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
print("Beginning training...")
for epoch in range(1, num_epochs + 1):
# ---------- Training ----------
model.train()
train_loss = 0.0
num_correct = 0
total_examples = 0
for X, y in train_dataloader:
X = X.to(device)
y = y.to(device)
y_pred = model(X)
optimizer.zero_grad()
loss = loss_function(y_pred, y)
loss.backward()
optimizer.step()
train_loss += loss.data.item()
num_correct += (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().item()
total_examples += X.shape[0]
train_accuracy = num_correct / total_examples
train_loss = train_loss / len(train_dataloader.dataset)
# ---------- Evaluating ----------
model.eval()
eval_loss = 0.0
num_correct = 0
total_examples = 0
for X, y in validation_dataloader:
X = X.to(device)
y = y.to(device)
y_pred = model(X)
loss = loss_function(y_pred, y)
eval_loss += loss.data.item()
num_correct += (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().item()
total_examples += X.shape[0]
eval_accuracy = num_correct / total_examples
eval_loss = eval_loss / len(validation_dataloader.dataset)
if epoch == 1 or epoch % 1 == 0:
print('Epoch %3d/%3d, train loss: %3.2f, train acc: %3.2f, val loss: %3.2f, val acc: %3.2f' % \
(epoch, num_epochs, train_loss, train_accuracy, eval_loss, eval_accuracy))
training_loop(model_selection=["fc", 128], num_classes=100, max_seq_len=128, num_epochs=50)
However, if I call the datasets, dataloader etc outside of the function, and then run the loop it works. Why is this?
WORKS:::
num_classes = 100
max_seq_len = 128
batch_size = 64
# Mapping
amino_acids = ["A","C","D","E","F","G","H","I","K","L",
"M","N","P","Q","R","S","T","V","W","Y","X"]
mapping = {aa:i + 1 for i, aa in enumerate(amino_acids)}
mapping.update({'X': 21, 'U': 21, 'B': 21, 'O': 21, 'Z': 21})
# Mapping
amino_acids = ["A","C","D","E","F","G","H","I","K","L",
"M","N","P","Q","R","S","T","V","W","Y","X"]
mapping = {aa:i + 1 for i, aa in enumerate(amino_acids)}
mapping.update({'X': 21, 'U': 21, 'B': 21, 'O': 21, 'Z': 21})
# Mapping
amino_acids = ["A","C","D","E","F","G","H","I","K","L",
"M","N","P","Q","R","S","T","V","W","Y","X"]
mapping = {aa:i + 1 for i, aa in enumerate(amino_acids)}
mapping.update({'X': 21, 'U': 21, 'B': 21, 'O': 21, 'Z': 21})
model = FullyConnected(max_seq_len, 128, num_classes).to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
def train_evaluate(
model,
optimizer,
loss_function,
train_dataloader,
val_dataloader,
epochs=10,
device=device
):
history = {}
history['loss'] = []
history['accuracy'] = []
history['validation_loss'] = []
history['validation_accuracy'] = []
for epoch in range(1, epochs+1):
# --- TRAIN AND EVALUATE ON TRAINING SET
model.train()
train_loss = 0.0
num_train_correct = 0
num_train_examples = 0
for X, y in train_dataloader:
X = X.to(device)
y = y.to(device)
#print("Input shape: ", X.shape)
y_pred = model(X)
optimizer.zero_grad()
loss = loss_function(y_pred, y)
loss.backward()
optimizer.step()
train_loss += loss.data.item()
num_train_correct += (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().item()
num_train_examples += X.shape[0]
train_acc = num_train_correct / num_train_examples
train_loss = train_loss / len(train_dataloader.dataset)
# --- EVALUATE ON VALIDATION SET -------------------------------------
model.eval()
val_loss = 0.0
num_val_correct = 0
num_val_examples = 0
for X, y in val_dataloader:
X = X.to(device)
y = y.to(device)
y_pred = model(X)
loss = loss_function(y_pred, y)
val_loss += loss.data.item()
num_val_correct += (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().item()
num_val_examples += X.shape[0]
val_acc = num_val_correct / num_val_examples
val_loss = val_loss / len(val_dataloader.dataset)
if epoch == 1 or epoch % 1 == 0:
print('Epoch %3d/%3d, train loss: %5.2f, train acc: %5.2f, val loss: %5.2f, val acc: %5.2f' % \
(epoch, epochs, train_loss, train_acc, val_loss, val_acc))
history['loss'].append(train_loss)
history['accuracy'].append(train_acc)
history['validation_loss'].append(val_loss)
history['validation_accuracy'].append(val_acc)
return history