How can I add a Dense Layer to my Pytorch LSTM Model when optimizing with Hyperopt

I have a simple LSTM Model that I want to run through Hyperopt to find optimal Hyperparameters. I already can run my model and optimize my learning rate, batch size and even the hidden dimension and number of layers but I dont know how I can change my Model structure inside my objective function. What I now want to do is to maybe add a dense layers based on the amount of layers my lstm has. So for example my model could have 2 lstm layers and 2 dense layers or 1 lstm and 3 dense layers or only 4 lstm layers.
The only solution I could think of would be to implement different Classes and let hyperopt choose one, but that would need me to implement a lot of dfferent classes and I am sure there is a nicer way to do this.

    class Model_GRU(nn.Module):
        def __init__(self, n_features, n_classes, n_hidden, n_layers):            
            self.gru = nn.GRU(
            weight = torch.zeros(n_layers,n_hidden)
            self.weight = nn.Parameter(weight)
            self.classifier = nn.Linear(n_hidden, n_classes)
        def init_hidden(self):       
            hidden_state = torch.zeros(self.gru.num_layers,batch_size,self.gru.hidden_size)
            cell_state = torch.zeros(self.gru.num_layers,batch_size,self.gru.hidden_size)
            return (hidden_state, cell_state)
        def forward(self, x):
            self.hidden = self.init_hidden()
            _, (hidden) = self.gru(x)                  
            return self.classifier(out)

# Hyperopt search space
    'learning_rate': hp.loguniform('learning_rate', np.log(0.001), np.log(1)),
    'n_hidden': hp.choice('n_hidden', [4,8,16,32,64,128]),
    'batch_size': hp.choice('batch_size',[2,4,8,16]),
    'weight_decay': hp.loguniform('weight_decay', np.log(0.00001), np.log(0.1)),
    'eta_min': hp.quniform('eta_min', 0.00001,0.01,0.00001), 
    'T_mult': hp.choice('T_mult', [1,2,3]),
    'T_0': hp.quniform('T_0', 10,500,20),

# Hyperopt Opt
num_eval = 3
best_param = fmin(objective, space, algo=tpe.suggest, max_evals=num_eval, trials=trials_obj)

# Objective function
def objective(params):
    global best_val_loss
    print("New Params")
    input_dim = feature_dim+features_added
    output_dim = 3
    lr = params['learning_rate']
    n_epochs = 4
    iterations_per_epoch = 190
    best_val_loss = 100
    patience, trials = 30, 0
    batch_size = params['batch_size']
    weight_decay= params['weight_decay']

    T_0 = int(params['T_0'])
    T_mult= params['T_mult']
    eta_min= params['eta_min']

    model = Model_GRU(input_dim, output_dim, n_hidden, n_layers)

    criterion = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    sched = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(opt, T_0=T_0, T_mult=T_mult, eta_min=eta_min, last_epoch=-1)

    for fold, (train_idx, test_idx) in enumerate(kfold.split(sequences, y_kfold)):