Dimension error for transformer model

I’m trying to use a transformer model (based heavily on the pytorch tutorial code, which I hope to adapt to my own problem) on the same chess data I was using before (which I was able to debug). However I am getting a dimension error.

class TransformerModel(nn.Module):

def __init__(self, ntoken: int, d_model: int, nhead: int, d_hid: int,
             nlayers: int, dropout: float = 0.5):
    super().__init__()
    self.model_type = 'Transformer'
    self.pos_encoder = PositionalEncoding(d_model, dropout)
    encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
    self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
    self.encoder = nn.Embedding(ntoken, d_model)
    self.d_model = d_model
    self.decoder = nn.Linear(d_model, 1)

    self.init_weights()

def init_weights(self) -> None:
    initrange = 0.1
    self.encoder.weight.data.uniform_(-initrange, initrange)
    self.decoder.bias.data.zero_()
    self.decoder.weight.data.uniform_(-initrange, initrange)

def forward(self, src: Tensor, src_mask: Tensor) -> Tensor:
    """
    Args:
        src: Tensor, shape [seq_len, batch_size]
        src_mask: Tensor, shape [seq_len, seq_len]

    Returns:
        output Tensor of shape [seq_len, batch_size, ntoken]
    """
    src = self.encoder(src) * math.sqrt(self.d_model)
    src = self.pos_encoder(src)
    output = self.transformer_encoder(src, src_mask)
    output = self.decoder(output)
    return output

def generate_square_subsequent_mask(sz: int) → Tensor:
“”“Generates an upper-triangular matrix of -inf, with zeros on diag.”“”
return torch.triu(torch.ones(sz, sz) * float(‘-inf’), diagonal=1)

import torch.optim as optim

Instantiate the Adam optimizer and Cross-Entropy loss function

model = TransformerModel(ntoken= 64*12+6, d_model= 200, nhead= 2, d_hid=200,
nlayers= 6, dropout= 0.2).cuda()

Here is the error
/home/ec2-user/.local/lib/python3.7/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([32, 1])) that is different to the input size (torch.Size([32, 774, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
Traceback (most recent call last):
File “transformernneval.py”, line 112, in
loss = criterion(output, target)
File “/home/ec2-user/.local/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 1194, in _call_impl
return forward_call(*input, **kwargs)
File “/home/ec2-user/.local/lib/python3.7/site-packages/torch/nn/modules/loss.py”, line 536, in forward
return F.mse_loss(input, target, reduction=self.reduction)
File “/home/ec2-user/.local/lib/python3.7/site-packages/torch/nn/functional.py”, line 3291, in mse_loss
expanded_input, expanded_target = torch.broadcast_tensors(input, target)
File “/home/ec2-user/.local/lib/python3.7/site-packages/torch/functional.py”, line 74, in broadcast_tensors
return _VF.broadcast_tensors(tensors) # type: ignore[attr-defined]
RuntimeError: The size of tensor a (774) must match the size of tensor b (32) at non-singleton dimension 1