Timeseries Forecasting using seq to seq model (Encoder Decoder)

i am trying to build to build time series forecasting model (multistep) i have the code here. it runs fine but am getting same values in output while predicting. Could some one please look into my code and let me know what went wrong.

Input sequence is 180
target sequence is 60

#Encoder Decoder Model
MAX_OUTPUT = 1

class Forecast_EncoderDecoder(nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size,dropout,verbose=False):
        super(Forecast_EncoderDecoder, self).__init__()
        
        self.hidden_size = hidden_size
        self.output_size = output_size    
        self.dropout = nn.Dropout(dropout)

        self.encoder_rnn_cell = nn.GRU(input_size, hidden_size, dropout=dropout        
        self.decoder_rnn_cell = nn.GRU(output_size, hidden_size,dropout=dropout)
        self.h2o = nn.Linear(hidden_size, output_size)        
        self.verbose = verbose
        
    def forward(self, input,max_output = MAX_OUTPUT, batch_size=1, device = 'cpu', ground_truth = None):
        
        hidden = torch.zeros(1,batch_size, self.output_size).to(device)
        # encoder
        out, hidden = self.encoder_rnn_cell(input)#.view(input.shape[0],input.shape[1],1))
         
        # decoder
        decoder_state = hidden
        decoder_input = torch.zeros(1,batch_size, self.output_size).to(device)

        outputs = []
        
        for i in range(max_output):
            out, decoder_state = self.decoder_rnn_cell(decoder_input, decoder_state)
            out = self.h2o(decoder_state)
            
            outputs.append(out)
            
            #Teacher Force
            max_idx = out
            if not ground_truth is None:
                max_idx = ground_truth[i].unsqueeze(0)
            decoder_input = max_idx.detach()  

        return outputs
# Train Batch
def train_batch(net, opt, criterion, batch_size, device = 'cpu', teacher_force = False):
    
    net.train().to(device)
    opt.zero_grad()
    train_loader = data_utils.DataLoader(train, batch_size=batch_size, shuffle=True)
    train_data, target_data = next(iter(train_loader))
    
    train_data_ = train_data.reshape((180,batch_size,1)).to(device)
    target_data_ = target_data.reshape((60,batch_size,1)).to(device)
    
    total_loss = 0
        
    outputs = net(train_data_, target_data_.shape[0],batch_size, device, ground_truth = target_data_ if teacher_force else None)

    for index, output in enumerate(outputs):
        loss = criterion(output, target_data_[index].unsqueeze(0))/ batch_size
        loss.backward(retain_graph = True)
        total_loss += loss
        
    opt.step()
    return total_loss/batch_size
#Train Setup
def train_setup(net, lr = 0.01, n_batches = 100, batch_size = 10, momentum = 0.9, display_freq=5, device = 'cpu'):
    
    net = net.to(device)
    criterion = nn.MSELoss()
    opt = optim.Adam(net.parameters(), lr=lr)
    teacher_force_upto = n_batches//3
    
    loss_arr = np.zeros(n_batches + 1)
    
    for i in range(n_batches):
        loss_arr[i+1] = (loss_arr[i]*i + train_batch(net, opt, criterion, batch_size, device = device, teacher_force = i<teacher_force_upto ))/(i + 1)
        
        if i%display_freq == display_freq-1:
            clear_output(wait=True)
            
            print('Iteration', i, 'Loss', loss_arr[i])
            plt.figure()
            plt.plot(loss_arr[1:i], '-*')
            plt.xlabel('Iteration')
            plt.ylabel('Loss')
            plt.show()
            print('\n\n')
            
    torch.save(net, 'model.pt')
    return loss_arr

net = Forecast_EncoderDecoder(1, 256, 1, 0.25)

%%time
train_setup(net, lr=0.001, n_batches= 150, batch_size = 5, display_freq=10, device = device_gpu)

For prediction, the input is 180 sequence with 1 batch

input size (180,1,1)

net.eval().to(device='cpu')

predictions = []
for i in range(len(test_df)):
    outputs = net(test_X, 1)
    predictions.append(outputs[0][0].cpu().detach().numpy())

i am unable to figure out what went wrong