Transformer with masking returns NaN values

I have the following code in C++ to use masking for a TransformerEncoder

#include <torch/torch.h>
#include <iostream>
#include <vector>

struct TestTransformer : torch::nn::Module {
    TestTransformer() {
        transEncoder = register_module("transEncoder", torch::nn::TransformerEncoderImpl(torch::nn::TransformerEncoderOptions(torch::nn::TransformerEncoderLayerOptions(420, 10), 4)));
    }

    torch::Tensor forward(torch::Tensor x, torch::Tensor m1, torch::Tensor m2) {
        return transEncoder->forward(x, m1, m2);
    }

    torch::nn::TransformerEncoderImpl transEncoder;
};

int main() {
    torch::Device device("cuda:0");
    TestTransformer m;
    m.to(device);
    torch::Tensor t = torch::randn({10, 1, 420});
    t = t.to(device);

    torch::Tensor mask2 = torch::tensor({1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, {1, 10});
    mask2 = mask2.to(device);

    torch::Tensor out2 = m.forward(t, nullptr, mask2);
    std::cout << out2.cpu().to(torch::kFloat) << '\n';
    std::cout << torch::any(torch::isnan(out2.cpu())).to(torch::kByte) << '\n';
}

I get the following error

expected mask dtype to be Bool but got Float

However, if I change the type to bool, I get NaNs as my output.

This only happens if I use GPU but not CPU.

Is there a fix for this?