Hi, I am more or less new to pytorch so any recommendations would be appreciated!!

I am working on a project where I am trying to hit an F1 score of 0.89 and am stuck at 0.87. I am classifying “words” (not real words just sequence of some letters) of 4 letters each between two classes of active and inactive --> binary classification. Each letter was converted to a one hot vector format --> torch.cat to generate a 4 hot vector? I have implemented both CrossEntropyloss and BCEwithlogitloss (latter example is shown bellow) to optimize my code, and then on my validation set use the F1 score, after every 10 epochs) to make sure the optimizing is doing its job (reset model to 10 epochs earlier if F1 doesnt decrease and decrease learning rate).

The provided data has 100000 cases of active and 4000 cases of inactive --> for both losses I used weights initally 1/100, 1/4.

I have varied many things eg. increasing number of layers, increasing size of layers, changing the weights but it seems no matter what I reach 0.87 (or worse) and then my model gets stuck.

What can I change maybe to increase performance?

class NN(nn.Module):

definit(self, input_dim, output_dim, layer_size):

super(NN, self).init()`self.f1 = nn.Linear(input_dim, layer_size) self.f2 = nn.Linear(layer_size, layer_size) self.f3 = nn.Linear(layer_size, output_dim) def forward(self, input): s1 = F.relu(self.f1(input)) s2 = F.relu(self.f2(s1)) s3 = self.f3(s2) return s3`

batch_size, input_dim, output_dim, layer_size = 32, 84, 2, 100

def get_batch_paired(batch_size, tensor_1, tensor_2):

idx = np.arange(tensor_1.shape[0])

temp = np.random.permutation(idx)[:batch_size]

tensor_data_1 = torch.from_numpy(tensor_1[temp].astype(np.float32))

tensor_data_2 = torch.from_numpy(tensor_2[temp].astype(np.float32))

return tensor_data_1, tensor_data_2def save_checkpoint(new_score_track, new_learning_rate, new_model_state_dict, PATH):

torch.save({‘score_track’: new_score_track,

‘learning_rate’: new_learning_rate,

‘net_state_dict’: new_model_state_dict

}, PATH)####### DIVIDING TRAIN DATA INTO TRAIN AND VALIDATION

from sklearn.utils import shuffle

shuffled_x, shuffled_y = shuffle(x_train_set, y_train_set)train_input_set, validation_input_set = np.split(shuffled_x, [int(.7 * shuffled_x.shape[0])])

train_target_set, validation_target_set = np.split(shuffled_y, [int(.7 * shuffled_y.shape[0])])

train_size = train_input_set.shape[0]

######################################################################################################################### OPTIMIZING ###############################################################################

loss_fnct = nn.BCEWithLogitsLoss(weight=torch.Tensor([1/100, 1/4]))

num_iterations = train_size / batch_size

num_iterations = int(np.ceil(num_iterations))

print(“Number of Iterations Per Epoch:”, num_iterations)####### OPTIM

it = 0

epoch = 0

flag = True

start = time.time()for j in range(1000):

if flag == True:

checkpoint = torch.load(‘test_5.pth’)

net.load_state_dict(checkpoint[‘net_state_dict’])

lr = checkpoint[‘learning_rate’]

score_track = checkpoint[‘score_track’]

net.eval()

#RESET LEARNING RATE WITHOUT RESETING MODEL

“”"

if j == 0:

lr = 0.001

“”"

optimizer = optim.Adam(net.parameters(), lr=lr)

flag = False`if j % 5 == 1 or j == 0: epoch_loss_progress = [] epoch_it_progress = [] for i in range(num_iterations): ####### TRAIN ################################################################################################## train_input, train_target = get_batch_paired(batch_size, train_input_set, train_target_set) # TRAINING FORWARD train_predict = net(train_input) # TRAINING LOSS train_target = train_target.view(batch_size, 2) train_predict = train_predict.view(batch_size, 2) #print(train_target) loss = loss_fnct(train_predict, train_target) optimizer.zero_grad() loss.backward() optimizer.step() it += 1 ####### SAVING GOOD MODEL/LOADING PREVIOUS MODEL ################################################################### validation_input_set_tensor = torch.from_numpy(validation_input_set) validation_prediction_set = net(validation_input_set_tensor.float()) track_y = [] for i in range(validation_prediction_set.shape[0]): top_n, top_i = validation_prediction_set[i, :].topk(1) track_y.append(top_i[0].item()) validation_prediction_set = np.array(track_y) validation_prediction_set.reshape((-1, 1)) validation_target_score_set = validation_target_set[:, 1] end = time.time() f1_validation_score = f1_score(validation_target_score_set.astype(int), validation_prediction_set.astype(int)) print('Epoch:', epoch, '\nScore:', f1_validation_score, '\nTime:', end - start) if score_track < f1_validation_score and j % 10 == 0 and j > 0: print('############################', '\nMODEL UPDATED', '\nscore_track:', score_track, '\nLr:', lr) save_checkpoint(f1_validation_score, lr, net.state_dict(), 'test_5.pth') print('new score_track:', f1_validation_score, '\n############################') flag = True elif j % 10 == 0 and j > 0: print('############################', '\nMODEL RESET TO EPOCH BEFORE', '\nscore_track:', score_track, '\nEpoch_score:', f1_validation_score) save_checkpoint(checkpoint['score_track'], lr / 3, checkpoint['net_state_dict'], 'test_5.pth') print('New Lr:', lr / 3, '\n############################') flag = True epoch += 1`