The output is being broadcasted

I’m training my model but the prediction is being broadcasted over the whole tensor


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=4, padding=1)
        self.conv2 = nn.Conv2d(6, 12, kernel_size=4)
        #self.conv2_drop = nn.Dropout2d()
        #self.fc1 = nn.Linear(32* 193 * 7, args.output_dim* 2)
        #self.fc2 = nn.Linear( args.output_dim* 2, args.output_dim)""

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        #x = x.view(-1, 1369720)
        #x = F.relu(self.fc1(x))
        #x = F.dropout(x,p = 0.4, training=self.training)
        #x = self.fc2(x)
        #return F.log_softmax(x, dim=1)
        return x


class Combine(nn.Module):
    def __init__(self):
        super(Combine, self).__init__()
        self.cnn = CNN()
        self.rnn = nn.LSTM(
            input_size=56856, 
            hidden_size=args.unit_dim, 
            num_layers=args.layer_dim,
            batch_first=True)
        self.linear = nn.Linear(args.unit_dim,args.output_dim)

    def forward(self, x):
        #print(x.size())
        batch_size, C, H, W = x.size()
        
        timesteps = W
        
        c_in = x.view(batch_size , C, H, W)
        c_out = self.cnn(c_in)
        batch_size, C, H, W = c_out.size()
        
        timesteps = W
        #print(c_out.shape)
        r_in = c_out.view(batch_size,timesteps,  -1)
        # Initialize hidden state with zeros
        h0 = torch.zeros(args.layer_dim, r_in.size(0), args.unit_dim).requires_grad_().cuda()

        # Initialize cell state
        c0 = torch.zeros(args.layer_dim, x.size(0), args.unit_dim).requires_grad_().cuda()
        r_out, (h_n, h_c) = self.rnn(r_in, (h0.detach(), c0.detach()))
        r_out2 = self.linear(r_out[:, -1, :])
        
        return F.log_softmax(r_out2, dim=1)
        #return r_out2



  
model = Combine()
if args.cuda:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
# Cross Entropy Loss 
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
def train(epoch):
    model.train()
    correct = 0
    total_train = 0
    correct_train = 0
    for batch_idx, (data, target,length) in enumerate(train_loader):
        
        data = np.expand_dims(data, axis=1)
        data = torch.FloatTensor(data)
        if args.cuda:
            data, target = data.cuda(), target.cuda()
            

        
        data, target = data, target
        optimizer.zero_grad()
        
        output = model(data)
    
        loss = F.nll_loss(output, torch.max(target.data, 1)[1])
        
        loss.backward()
        optimizer.step()
        
        
        pred = output.data.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        print('prediction: ',pred)
        print('target: ',target.data.max(1, keepdim=True )[1])
        correct += (pred.cpu() == torch.max(target.data, 1)[1].cpu()).sum()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test():
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    for data, target, length in validation_loader:
        
        data = np.expand_dims(data, axis=1)
        data = torch.FloatTensor(data)
        print(target.size())
        
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = data, target
        output = model(data)
        test_loss += F.nll_loss(
            output, torch.max(target.data, 1)[1], size_average=False).item()  # sum up batch loss
        pred = output.data.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        
        total += target.size(0)
        
        correct += (pred.cpu() == target.data.max(
            1, keepdim=True)[1] .cpu()).sum()
        
        print(pred)
        print( torch.max(target.data, 1)[1])
    total = target.size(0)
    test_loss /= len(validation_loader.dataset)
    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, total,
            100. * correct / total))
    torch.save({
            'drive/My Drive/epoch': args.epochs,
            'drive/My Drive/model_state_dict': model.state_dict(),
            'drive/My Drive/optimizer_state_dict': optimizer.state_dict(),
            'drive/My Drive/loss': test_loss,
          
            }, 'drive/My Drive/weights.pth')


for epoch in range(1, args.epochs + 1):
    train(epoch)
    test()

and here’s the output

prediction:  tensor([[4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4]], device='cuda:0')
target:  tensor([[ 5],
        [ 6],
        [14],
        [ 3],
        [ 6],
        [ 9],
        [10],
        [12],
        [ 3],
        [12],
        [12],
        [12],
        [ 6],
        [ 6],
        [ 5],
        [10],
        [ 3],
        [ 1],
        [13],
        [ 5],
        [11],
        [ 6],
        [ 2],
        [ 1],
        [ 0],
        [ 5],
        [ 5],
        [ 0],
        [ 6]], device='cuda:0')
Train Epoch: 1 [0/148 (0%)]	Loss: 2.725649
prediction:  tensor([[4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4]], device='cuda:0')
target:  tensor([[ 7],
        [ 5],
        [ 1],
        [ 9],
        [13],
        [11],
        [ 0],
        [ 9],
        [ 4],
        [ 3],
        [10],
        [ 1],
        [10],
        [ 2],
        [12],
        [ 1],
        [ 9],
        [ 0],
        [10],
        [ 8],
        [13],
        [ 7],
        [ 8],
        [ 9],
        [ 4],
        [ 9],
        [13],
        [ 7],
        [11]], device='cuda:0')
Train Epoch: 1 [29/148 (20%)]	Loss: 2.724051
prediction:  tensor([[4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4]], device='cuda:0')
target:  tensor([[ 7],
        [ 9],
        [ 5],
        [ 3],
        [ 7],
        [ 0],
        [13],
        [13],
        [ 2],
        [ 7],
        [14],
        [ 2],
        [11],
        [ 7],
        [14],
        [14],
        [13],
        [ 2],
        [ 8],
        [ 9],
        [ 4],
        [11],
        [ 0],
        [ 4],
        [12],
        [ 6],
        [14],
        [13],
        [ 2]], device='cuda:0')
Train Epoch: 1 [58/148 (40%)]	Loss: 2.707235
prediction:  tensor([[4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4]], device='cuda:0')
target:  tensor([[ 2],
        [14],
        [ 8],
        [11],
        [ 6],
        [ 7],
        [ 4],
        [14],
        [ 8],
        [ 9],
        [12],
        [ 2],
        [ 3],
        [ 2],
        [ 3],
        [ 1],
        [10],
        [12],
        [ 0],
        [ 3],
        [12],
        [10],
        [10],
        [ 1],
        [ 4],
        [13],
        [ 8],
        [ 8],
        [12]], device='cuda:0')
Train Epoch: 1 [87/148 (60%)]	Loss: 2.686659

can any one please tell me what’s wrong with my code

Could you print the shape or r_out before passing it to the linear layer?

r_out is: (tensor([[[ 0.0263, -0.0128, -0.0243, …, -0.0243, -0.0108, 0.0277],
[ 0.0374, -0.0202, -0.0350, …, -0.0389, -0.0158, 0.0412],
[ 0.0428, -0.0245, -0.0397, …, -0.0464, -0.0181, 0.0478],
…,
[ 0.0823, -0.0406, -0.0463, …, -0.0200, -0.0264, 0.0489],
[ 0.0881, -0.0432, -0.0463, …, -0.0086, -0.0246, 0.0496],
[ 0.0923, -0.0446, -0.0466, …, 0.0030, -0.0218, 0.0502]],

    [[ 0.0264, -0.0130, -0.0243,  ..., -0.0244, -0.0109,  0.0279],
     [ 0.0375, -0.0203, -0.0352,  ..., -0.0390, -0.0160,  0.0412],
     [ 0.0429, -0.0244, -0.0396,  ..., -0.0464, -0.0184,  0.0474],
     ...,
     [ 0.0835, -0.0419, -0.0457,  ..., -0.0199, -0.0265,  0.0497],
     [ 0.0886, -0.0442, -0.0456,  ..., -0.0093, -0.0238,  0.0505],
     [ 0.0927, -0.0456, -0.0455,  ...,  0.0015, -0.0202,  0.0511]],

    [[ 0.0263, -0.0127, -0.0244,  ..., -0.0243, -0.0107,  0.0278],
     [ 0.0376, -0.0198, -0.0356,  ..., -0.0390, -0.0157,  0.0411],
     [ 0.0433, -0.0240, -0.0403,  ..., -0.0463, -0.0180,  0.0476],
     ...,
     [ 0.0835, -0.0395, -0.0475,  ..., -0.0185, -0.0253,  0.0494],
     [ 0.0876, -0.0420, -0.0475,  ..., -0.0072, -0.0233,  0.0504],
     [ 0.0913, -0.0440, -0.0485,  ...,  0.0036, -0.0202,  0.0512]],

    ...,

    [[ 0.0265, -0.0130, -0.0243,  ..., -0.0241, -0.0107,  0.0279],
     [ 0.0379, -0.0203, -0.0350,  ..., -0.0385, -0.0157,  0.0414],
     [ 0.0434, -0.0246, -0.0397,  ..., -0.0455, -0.0183,  0.0477],
     ...,
     [ 0.0823, -0.0416, -0.0494,  ..., -0.0207, -0.0257,  0.0505],
     [ 0.0871, -0.0436, -0.0497,  ..., -0.0094, -0.0233,  0.0528],
     [ 0.0911, -0.0451, -0.0493,  ...,  0.0019, -0.0201,  0.0527]],

    [[ 0.0265, -0.0133, -0.0241,  ..., -0.0238, -0.0101,  0.0279],
     [ 0.0376, -0.0205, -0.0352,  ..., -0.0381, -0.0145,  0.0416],
     [ 0.0432, -0.0249, -0.0397,  ..., -0.0453, -0.0169,  0.0482],
     ...,
     [ 0.0844, -0.0405, -0.0468,  ..., -0.0213, -0.0263,  0.0502],
     [ 0.0888, -0.0421, -0.0473,  ..., -0.0109, -0.0249,  0.0511],
     [ 0.0924, -0.0438, -0.0476,  ...,  0.0009, -0.0214,  0.0524]],

    [[ 0.0263, -0.0129, -0.0245,  ..., -0.0243, -0.0108,  0.0277],
     [ 0.0375, -0.0202, -0.0354,  ..., -0.0389, -0.0158,  0.0410],
     [ 0.0428, -0.0243, -0.0399,  ..., -0.0466, -0.0182,  0.0472],
     ...,
     [ 0.0829, -0.0420, -0.0473,  ..., -0.0196, -0.0278,  0.0496],
     [ 0.0871, -0.0443, -0.0473,  ..., -0.0086, -0.0253,  0.0511],
     [ 0.0909, -0.0455, -0.0477,  ...,  0.0026, -0.0223,  0.0528]]],
   device='cuda:0', grad_fn=<CudnnRnnBackward>), (tensor([[[ 0.0200,  0.6911, -0.6832,  ..., -0.0039,  0.3721,  0.2176],
     [ 0.0397,  0.7053, -0.6309,  ...,  0.0570,  0.3670,  0.2009],
     [ 0.0348,  0.6988, -0.6598,  ...,  0.0264,  0.3939,  0.1875],
     ...,
     [ 0.0281,  0.6888, -0.5802,  ...,  0.0085,  0.4267,  0.2134],
     [ 0.0193,  0.6896, -0.6339,  ...,  0.0306,  0.3935,  0.1763],
     [ 0.0362,  0.6898, -0.6385,  ...,  0.0406,  0.3817,  0.1875]],

    [[-0.1313, -0.0347,  0.1207,  ..., -0.1216, -0.0032,  0.1396],
     [-0.1303, -0.0374,  0.1208,  ..., -0.1178, -0.0191,  0.1357],
     [-0.1278, -0.0331,  0.1149,  ..., -0.1220, -0.0053,  0.1441],
     ...,
     [-0.1424, -0.0219,  0.1096,  ..., -0.1120, -0.0118,  0.1349],
     [-0.1239, -0.0300,  0.1122,  ..., -0.1167, -0.0069,  0.1394],
     [-0.1242, -0.0261,  0.1106,  ..., -0.1144, -0.0133,  0.1327]],

    [[ 0.0923, -0.0446, -0.0466,  ...,  0.0030, -0.0218,  0.0502],
     [ 0.0927, -0.0456, -0.0455,  ...,  0.0015, -0.0202,  0.0511],
     [ 0.0913, -0.0440, -0.0485,  ...,  0.0036, -0.0202,  0.0512],
     ...,
     [ 0.0911, -0.0451, -0.0493,  ...,  0.0019, -0.0201,  0.0527],
     [ 0.0924, -0.0438, -0.0476,  ...,  0.0009, -0.0214,  0.0524],
     [ 0.0909, -0.0455, -0.0477,  ...,  0.0026, -0.0223,  0.0528]]],
   device='cuda:0', grad_fn=<CudnnRnnBackward>), tensor([[[ 3.5384e-01,  1.0493e+00, -2.2504e+00,  ..., -8.9872e-03,
       9.8480e-01,  1.0078e+00],
     [ 5.4717e-01,  1.0476e+00, -2.1838e+00,  ...,  1.3820e-01,
       9.4552e-01,  1.0322e+00],
     [ 6.0499e-01,  1.0538e+00, -2.4513e+00,  ...,  6.8737e-02,
       1.0223e+00,  9.5667e-01],
     ...,
     [ 5.0454e-01,  1.0370e+00, -2.2494e+00,  ...,  2.2548e-02,
       9.9235e-01,  1.0062e+00],
     [ 2.7099e-01,  1.0496e+00, -2.2780e+00,  ...,  8.2996e-02,
       9.9398e-01,  9.6303e-01],
     [ 4.7176e-01,  1.0196e+00, -2.3526e+00,  ...,  9.2828e-02,
       9.9370e-01,  1.0548e+00]],

    [[-2.7644e-01, -7.5817e-02,  3.0098e-01,  ..., -2.6721e-01,
      -7.3597e-03,  3.1150e-01],
     [-2.7400e-01, -8.2529e-02,  3.0173e-01,  ..., -2.5754e-01,
      -4.3918e-02,  3.0245e-01],
     [-2.6914e-01, -7.1011e-02,  2.8584e-01,  ..., -2.6781e-01,
      -1.2103e-02,  3.2070e-01],
     ...,
     [-2.9461e-01, -4.7481e-02,  2.6912e-01,  ..., -2.4372e-01,
      -2.7129e-02,  3.0116e-01],
     [-2.5839e-01, -6.4935e-02,  2.7611e-01,  ..., -2.5553e-01,
      -1.5848e-02,  3.0867e-01],
     [-2.6162e-01, -5.6842e-02,  2.7326e-01,  ..., -2.5025e-01,
      -3.0504e-02,  2.9524e-01]],

    [[ 1.8682e-01, -8.5577e-02, -8.7757e-02,  ...,  6.2227e-03,
      -4.3620e-02,  1.0997e-01],
     [ 1.8689e-01, -8.7545e-02, -8.5590e-02,  ...,  3.0909e-03,
      -4.0423e-02,  1.1175e-01],
     [ 1.8515e-01, -8.4282e-02, -9.1440e-02,  ...,  7.4527e-03,
      -4.0361e-02,  1.1215e-01],
     ...,
     [ 1.8369e-01, -8.6465e-02, -9.2885e-02,  ...,  3.8705e-03,
      -4.0138e-02,  1.1528e-01],
     [ 1.8638e-01, -8.3885e-02, -8.9513e-02,  ...,  1.8936e-03,
      -4.2861e-02,  1.1478e-01],
     [ 1.8395e-01, -8.7310e-02, -9.0022e-02,  ...,  5.3197e-03,
      -4.4569e-02,  1.1564e-01]]], device='cuda:0',
   grad_fn=<CudnnRnnBackward>)))

I meant the shape of the tensor.
You can get it using print(r_out.shape).

aha I’m sorry
r_out shape is: torch.Size([29, 44, 103])

I assume the number of input features is 103.
In that case, your output is not being broadcasted, but apparently your model is currently overfitting to class4, since the output shape of [29, 1] matches r_out's shape after slicing and the linear layer.

Aha thank you so much then I should I add drop out to avoid the overfitting right!

It might help.
However, this also looks as if you are dealing with an imbalanced dataset. Is that also the case, i.e. do you have a lot more class4 samples than others?

I’m dealing with video dataset each class with 10 observations