AssertionError: For batched (3-D) `query`, expected `key` and `value` to be 3-D but found 2-D and 2-D tensors respectively

So I was developing the Se12Seq architecture for language translation task, and I am testing this for random tensors before using for the proper data, the encoder is working absolutely fine but I am facing this issue with the decoder which I am unable to understand because the code runs fine when not placed in the nn.Module class

class Encoder(nn.Module):
def init(self, vocab_size, embedding_dim, num_heads):
super().init()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.encoder_layer = nn.TransformerEncoderLayer(d_model= embedding_dim, nhead= num_heads)

def forward(self, x):
encoder_stack = nn.TransformerEncoder(self.encoder_layer, num_layers=6)
x = self.embedding(x)
x = encoder_stack(x)
return x

class Decoder(nn.Module):
def init(self, vocab_size, embedding_dim, num_heads):
super().init()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
decoder_layer = nn.TransformerDecoderLayer(d_model=embedding_dim, nhead=num_heads)
self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=5)

def forward(self, memory, trg):
embeddings = self.embedding(trg)
output = self.decoder(memory, trg)
return output

src = torch.arange(0,100).unsqueeze(0)
encoder = Encoder(5000, 100, 5)
mem = encoder(src)
mem.shape

embedding = nn.Embedding(5000, 100)
tgt = torch.arange(100, 200).unsqueeze(0)

encoder = Encoder(5000, 100, 5)
encoder(src)

decoder = Decoder(5000, 100, 5)
decoder.forward(mem, tgt)