Dropout before linear got nan

here is two module sequential

      contrastive_seq_a = orch.nn.Sequential(
            #torch.nn.Linear(2 * 46 * 296, 500),
            torch.nn.Linear(2 * 26 * 296, 500),
            torch.nn.BatchNorm1d(500),
            torch.nn.ReLU(),
            torch.nn.Dropout(self.config.dropout),
            torch.nn.Linear(500, 10),
            #torch.nn.BatchNorm1d(10),
            #torch.nn.ReLU(),
            #torch.nn.Dropout(self.config.dropout),
            torch.nn.Linear(10, 2),
            # torch.nn.Sigmoid()
        )


       contrastive_seq_b = torch.nn.Sequential(
            #torch.nn.Linear(2 * 46 * 296, 500),
            torch.nn.Linear(2 * 26 * 296, 500),
            torch.nn.BatchNorm1d(500),
            torch.nn.ReLU(),
            torch.nn.Dropout(self.config.dropout),
            torch.nn.Linear(500, 10),
            #torch.nn.BatchNorm1d(10),
            #torch.nn.ReLU(),
            torch.nn.Dropout(self.config.dropout),
            torch.nn.Linear(10, 2),
            # torch.nn.Sigmoid()
        )

when i use contrastive_seq_b ,i got err after several iterations
err is “RuntimeError: cuda runtime error (59) : device-side assert triggered at /data/TENCENT.COM/nealchai/download/pytorch/aten/src/THC/generic/THCTensorScatterGather.cu:75”
the train data is no problem。
I print the network parameters,it becames nan when the err occurs。
my loss function is ContrastiveLoss

class ContrastiveLoss(torch.nn.Module):
    """
    Contrastive loss function.
    Based on:
    """

    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def check_type_forward(self, in_types):
        assert len(in_types) == 3

        x0_type, x1_type, y_type = in_types
        # print("dim:{},{}".format(x0_type.dim(), x0_type.shape))
        assert x0_type.size() == x1_type.shape
        assert x1_type.size()[0] == y_type.shape[0]
        assert x1_type.size()[0] > 0
        assert x0_type.dim() == 2
        assert x1_type.dim() == 2
        assert y_type.dim() == 1

    def forward(self, x0, x1, y):
        self.check_type_forward((x0, x1, y))

        # euclidian distance
        diff = x0 - x1
        dist_sq = torch.sum(torch.pow(diff, 2), 1)
        dist = torch.sqrt(dist_sq)

        mdist = self.margin - dist
        dist = torch.clamp(mdist, min=0.0)
        loss = y * dist_sq + (1 - y) * torch.pow(dist, 2)
        loss = torch.sum(loss) / 2.0 / x0.size()[0]
        return loss