Error for not having the same size in the input and output in the seqtoseq algorithm

hi everyone
why if I write this code I will get this error

import torch
import torch.nn as nn

d_model = 4
nhead = 2
decoder_layer = nn.TransformerDecoderLayer(d_model=d_model, nhead=nhead)
transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
memory = torch.rand(1, 808, d_model)
tgt = torch.rand(1, 810, d_model)

print(f"memory shape: {memory.shape}")
print(f"tgt shape: {tgt.shape}")

    out = transformer_decoder(tgt, memory)
    print(f"output shape: {out.shape}")
except RuntimeError as e:
    print(f"RuntimeError: {e}")

RuntimeError: shape ‘[1, 1620, 2]’ is invalid for input of size 3232

it is related to seq length in input (808) and output(810)
if we change the input length to 810 everythink will be ok
in my project (seqToseq with transformer) my input and output has the same size but we should [SOS] and [EOS] to the beginning and at the end of our output seq that leads to my len(output)=len(input)+2

this is my own code for decoder

class TransformerDecoderLayer(nn.Module):
    def __init__(self, config):
        self.attention = MultiHeadAttention(config)
        self.feed_forward = FeedForward(config)
        self.addnorm = AddNorm(config)
        self.encoder_decoder_attention = MultiHeadAttention(config)
    def forward(self,encoder_output,outputs):
        # y=self.embedding_layer_output(outputs)
        y2 =self.attention(hidden_state=outputs,mask=True)
        y4 =self.encoder_decoder_attention(encoder_output,y3)
        y6 =self.feed_forward(y5)
        return y7

class TransformerDecoder(nn.Module):
    def __init__(self,config):
        self.decoderLayers = nn.ModuleList([TransformerDecoderLayer(config) for _ in range(self.num_layer)])
    def forward(self,memory,tgt,mask=None):
        for layer in self.decoderLayers:
        return output

if I run the example above with my code we have no error

decoder_layer = TransformerDecoderLayer(config)
transformer_decoder = TransformerDecoder(config)
memory = torch.rand(1, 808, 768)
tgt = torch.rand(1, 810, 768)

print(f"memory shape: {memory.shape}")
print(f"tgt shape: {tgt.shape}")

    out = transformer_decoder(memory,tgt)
    print(f"output shape: {out.shape}")
except RuntimeError as e:
    print(f"RuntimeError: {e}")

memory shape: torch.Size([1, 808, 768])
tgt shape: torch.Size([1, 810, 768])
output shape: torch.Size([1, 810, 768])