ValueError: Expected input batch_size (324) to match target batch_size (4)

hi.
Error resolved but i am getting only 0 as output.
my model is:
‘’'class Net(nn.Module):

def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
    self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
    self.mp = nn.MaxPool2d(2)
    self.fc = nn.Linear(2340, 2)

def forward(self, x):
    in_size = x.size(0)
    x = F.relu(self.mp(self.conv1(x.float())))
    x = F.relu(self.mp(self.conv2(x.float())))
    x = x.view(in_size, -1)  # flatten the tensor
    x = self.fc(x)
    return F.log_softmax(x,dim=1)

I don’t think this could happen as the probabilities for both classes have to sum to 1.. Both log probabilities thus cannot be zero as seen here:

model = Net()
x = torch.randn(1, 1, 67, 50)
out = model(x)
print(out)
# > tensor([[-0.5469, -0.8645]], grad_fn=<LogSoftmaxBackward0>)
print(torch.exp(out))
# > tensor([[0.5787, 0.4213]], grad_fn=<ExpBackward0>)

yes and during testing i am getting only zeros as output(output class can be 0 or 1)
‘’'preds = []
with torch.no_grad():
correct = 0
total = 0
for i, batch in enumerate(test_loader):
#batch = tuple(t.to(device) for t in batch)
batch = tuple(batch)
b_input_ids, b_labels = batch

outputs = model(b_input_ids[None, ...])
prediction = torch.argmax(outputs,dim=1)
print (prediction," correct=",b_labels)

preds.append(prediction)
total += b_labels.size(0)
correct+=(prediction==b_labels).sum().item()

it is printing-

tensor([0]) correct= tensor([1])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([1])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([1])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([0])
tensor([0]) correct= tensor([1])
tensor([0]) correct= tensor([0])…

Your model seems to be overfitting to the majority class so you could use a weighted loss or e.g. WeightedRandomSampler to balance the data.

Hi sir, please help me with this error:

runcell(0, ‘C:/Users/Ioana PMEC/Documents/TIDAIM/lab6/cnn2 (1).py’)
cpu
Traceback (most recent call last):

File “C:\Users\Ioana PMEC\Documents\TIDAIM\lab6\cnn2 (1).py”, line 136, in
loss = criterion(outputs, targets)

File “C:\Anaconda\lib\site-packages\torch\nn\modules\module.py”, line 1102, in _call_impl
return forward_call(*input, **kwargs)

File “C:\Anaconda\lib\site-packages\torch\nn\modules\loss.py”, line 1150, in forward
return F.cross_entropy(input, target, weight=self.weight,

File “C:\Anaconda\lib\site-packages\torch\nn\functional.py”, line 2846, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)

ValueError: Expected input batch_size (30) to match target batch_size (10).

The code I am using is:

if __name__ == '__main__':
  import torch
  import torchvision
  import torchvision.transforms as transforms
  import matplotlib.pyplot as plt
  import numpy as np
  import torch.nn as nn
  import torch.nn as nn
  import torchvision.transforms as ToTensor 
  from torch.utils.data import DataLoader
  from torch import optim
  from torch.utils.data import Dataset
  from torchvision import datasets
  from torch.autograd import Variable
  import torch.nn.functional as F
  import torch.optim as optim
  import pandas as pd
  import os 
  import csv
  from csv import DictReader
  from sklearn.utils import shuffle
  import matplotlib.pyplot as plt
  from torch.utils.data import Dataset, DataLoader
  from sklearn.model_selection import train_test_split
  import torch.utils.data as data
  
  
#path = r"E:\Andreea\an 2\tidaim\tidaim\tidaim\Benign\data\train"
#filedirectory = []
#for files in os.listdir(path):
#    filedirectorys = filedirectory.append(os.path.join(path,files))
#filedirectory


#dataset = ImageFolder(''E:\Andreea\tidaim\Benign\data\train\')

  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(device)
  
  transforms_train = transforms.Compose([transforms.Resize((68, 68)),
                                         transforms.RandomRotation(10.),
                                         transforms.ToTensor()])

  transforms_test = transforms.Compose([transforms.Resize((68, 68)),
                                        transforms.ToTensor()])
            
  
  transforms_validation = transforms.Compose([transforms.Resize((68, 68)),
                                        transforms.ToTensor()])
            
         

          

  BATCH_SIZE = 10
  LEARNING_RATE = 0.1
  TRAIN_DATA_PATH = "./train"
  TEST_DATA_PATH = "./test"
  VALIDATION_DATA_PATH = "./validation"
  TRANSFORM_IMG = transforms.Compose([
      transforms.Resize(128),
      transforms.CenterCrop(256),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225] )
      ])
#dataset = ImageFolder(''E:\Andreea\tidaim\Benign\data\train\')
#transform_train = transforms.Compose([
 #       transforms.ToTensor(),
  #  ])

#transform_test = transforms.Compose([
 #       transforms.ToTensor(),
  #  ])
                      
  train_data = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=transforms_train)
  train_data_loader = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True,  num_workers=4)
  test_data = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=transforms_test)
  test_data_loader  = data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) 
  validation_data = torchvision.datasets.ImageFolder(root=VALIDATION_DATA_PATH, transform=transforms_validation)
  validation_loader = data.DataLoader(validation_data, batch_size=BATCH_SIZE, shuffle=True,  num_workers=4)

  net = nn.Sequential(nn.Linear(4624, 64),
                      nn.ReLU(),
                      nn.Linear(64, 128),
                      nn.ReLU(),
                      nn.Linear(128,3),
                      )

  class MLP(nn.Module):
     def __init__ (self,n_hidden):
         super(MLP, self).__init__()

         self.linear1 = torch.nn.Linear(4624, n_hidden)
         self.linear2 = torch.nn.Linear(n_hidden,3)

         self.activation = torch.nn.ReLU()


     def forward(self,x):
        
          a1 = self.activation(self.linear1(x))
          z2 = self.linear2(a1)
    

          return z2

  net = MLP(64)


  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(net.parameters(), lr=1e-3)
  num_epochs = 50     
        


  for epoch in range(num_epochs): 
      net.train()
      train_loss=0
      total= 0
      correct = 0
      
      for batch_idx, (inputs, targets) in enumerate(train_data_loader):
             correct =  0 
             
             inputs = inputs.view(-1, 4624)
             optimizer.zero_grad()
             outputs = net(inputs)

             loss = criterion(outputs, targets)
             loss.backward()
             optimizer.step()

             train_loss += loss.item()
             _, predicted = torch.max(outputs.data, 1)
             total += targets.size(0)
             #correct = correct + predicted.eq(targets.data).cpu().sum 

  print('Results after epoch %d' % (epoch + 1))
  
  print('Training loss: %.3f | Training Acc: %3.f%% )%d%d)'
          % (train_loss / (batch_idx + 1), 100. *float(correct) / total, correct, total))

  net.eval()

  valid_loss = 0
  correct = 0
  total = 0 

  for batch_idx, (inputs, targets) in enumerate(validation_loader):
      inputs, targets = Variable(inputs), Variable(targets)
      inputs = inputs.view(-1, 4624)
    
      outputs = net(inputs)
    
      loss = criterion(outputs, targets)
    
      valid_loss += loss.item()
      _, predicted = torch.max(outputs.data, 1)
      total += targets.size(0)
      correct += predicted.eq(targets.data).cpu().sum()
    
  print('Validation loss: %.3f | Validation Acc: %.3f%% (%d/%d)' 
        %(valid_loss / (batch_idx + 1), * float(correct)/ total, correct, total))
   "

I really don’t know how to solve this issue.

Could you print the shape of input before the view operation as I guess you might be changing the batch size by using view(-1, 4624). If you want to flatten the input tensor use input = input.view(input.size(0), -1) and check if you are running into shape mismatches in the first linear layer. In that case, adapt the in_features of this layer to match the activation shape.

Done it. Thanks a lot

Sir i am new to pytorch.
My code is working fine for batch size=1 but when i am trying to change the batch size to 32 …i get error------>RuntimeError: Given groups=1, weight of size [10, 1, 5, 5], expected input[1, 32, 67, 50] to have 1 channels, but got 32 channels instead

Blockquote

class Net(nn.Module):

def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
    self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
    self.mp = nn.MaxPool2d(2)
    self.fc = nn.Linear(2340, 2)

def forward(self, x):
    #in_size = x.size(0)
    x = F.relu(self.mp(self.conv1(x.float())))
    x = F.relu(self.mp(self.conv2(x.float())))
    x = x.view(in_size, -1)  # flatten the tensor
    x = self.fc(x)
    return F.log_softmax(x,dim=1)

print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)

torch.Size([8054, 67, 50]) torch.Size([8054])
torch.Size([3968, 67, 50]) torch.Size([3968])

pls help to resolve this…i dont know ho to change in feature , out feature value

In above code i changed conv1 to self.conv1 = nn.Conv2d(32, 10, kernel_size=5)
Now I am getting error →

ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_10968/672008910.py in
21 print(b_labels.shape)
22 b_labels = b_labels.view(batch_size)
—> 23 loss = loss_fn(outputs,b_labels.long())
24
25 #loss =F.nll_loss(outputs,b_labels.long())

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
1148
1149 def forward(self, input: Tensor, target: Tensor) → Tensor:
→ 1150 return F.cross_entropy(input, target, weight=self.weight,
1151 ignore_index=self.ignore_index, reduction=self.reduction,
1152 label_smoothing=self.label_smoothing)

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
2844 if size_average is not None or reduce is not None:
2845 reduction = _Reduction.legacy_get_string(size_average, reduce)
→ 2846 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
2847
2848

ValueError: Expected input batch_size (1) to match target batch_size (32).

The reported input shape of [1, 32, 67, 50] doesn’t match your use case which is to increase the batch size to 32.
nn.Conv2d layers expect an input in the shape [batch_size, channels, height, width] so dim0 should have the size of 32 not dim1.
I’m also unsure how you are passing the data to the model. [8054, 67, 50] seems to be missing the channel dimension, so you could try to use x = x.unsqueeze(0), pass the data tensor to e.g. a TensorDataset and then to a DataLoader which would create the batches.

Sir i unsqueezed both x & y…
dimensions now are–>
print(X_train.shape,y_train.shape)->torch.Size([1, 8054, 67, 50]) torch.Size([1, 8054])
print(X_test.shape,y_test.shape)-> torch.Size([1, 3968, 67, 50]) torch.Size([1, 3968])

Changed this to->

class Net(nn.Module):

def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
    self.conv2 = nn.Conv2d(32, 20, kernel_size=5)
    self.mp = nn.MaxPool2d(2)
    self.fc = nn.Linear(2340, 2)

def forward(self, x):
    in_size = x.size(0)
    x = F.relu(self.mp(self.conv1(x.float())))
    x = F.relu(self.mp(self.conv2(x.float())))
    x = x.view(in_size, -1)  # flatten the tensor
    x = self.fc(x)
    return F.log_softmax(x,dim=1)

Now error is

RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_1468/1708038120.py in
17 optimizer.zero_grad()
18
—> 19 outputs = model(b_input_ids[None, …])
20 print(b_input_ids.shape)
21 print(b_labels.shape)

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Local\Temp/ipykernel_1468/3913028989.py in forward(self, x)
10 def forward(self, x):
11 in_size = x.size(0)
—> 12 x = F.relu(self.mp(self.conv1(x.float())))
13 x = F.relu(self.mp(self.conv2(x.float())))
14 x = x.view(in_size, -1) # flatten the tensor

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
444
445 def forward(self, input: Tensor) → Tensor:
→ 446 return self._conv_forward(input, self.weight, self.bias)
447
448 class Conv3d(_ConvNd):

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\conv.py in _conv_forward(self, input, weight, bias)
440 weight, bias, self.stride,
441 _pair(0), self.dilation, self.groups)
→ 442 return F.conv2d(input, weight, bias, self.stride,
443 self.padding, self.dilation, self.groups)
444

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 5, 5], but got 5-dimensional input of size [1, 1, 8054, 67, 50] instead

On changing line —> 19 outputs = model(b_input_ids[None, …])
to —> 19 outputs = model(b_input_ids)

This error appears->

RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_1468/693832708.py in
17 optimizer.zero_grad()
18
—> 19 outputs = model(b_input_ids)
20 print(b_input_ids.shape)
21 print(b_labels.shape)

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

~\AppData\Local\Temp/ipykernel_1468/3913028989.py in forward(self, x)
10 def forward(self, x):
11 in_size = x.size(0)
—> 12 x = F.relu(self.mp(self.conv1(x.float())))
13 x = F.relu(self.mp(self.conv2(x.float())))
14 x = x.view(in_size, -1) # flatten the tensor

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
444
445 def forward(self, input: Tensor) → Tensor:
→ 446 return self._conv_forward(input, self.weight, self.bias)
447
448 class Conv3d(_ConvNd):

~\anaconda3\envs\for_CharBert\lib\site-packages\torch\nn\modules\conv.py in _conv_forward(self, input, weight, bias)
440 weight, bias, self.stride,
441 _pair(0), self.dilation, self.groups)
→ 442 return F.conv2d(input, weight, bias, self.stride,
443 self.padding, self.dilation, self.groups)
444

RuntimeError: Given groups=1, weight of size [32, 1, 5, 5], expected input[1, 8054, 67, 50] to have 1 channels, but got 8054 channels instead

In my previous post the code contains a mistake, as you should unsqueeze dim1 (not dim0 as you already have the batch dimension) so change it to x = x.unsqueeze(1).

PS: you can post code snippets by wrapping them into three backticks ```, which would make debugging easier.

1 Like

Data Generator & augmentation

datagen = ImageDataGenerator(rescale = 1./255, validation_split=0.2)
IMAGE_SIZE = (256,256,3)
IMAGE_SIZE[:-1]
train_ds = datagen.flow_from_dataframe(dataframe=df_train,
directory=‘Images’,
x_col = ‘Image_name’,
y_col = ‘Plane’,
subset=‘training’,
class_mode=‘categorical’,
color_mode=‘grayscale’,
seed=43,
target_size=IMAGE_SIZE[:-1],
batch_size=32)

valid_ds = datagen.flow_from_dataframe(dataframe=df_train,
directory=‘Images’,
x_col = ‘Image_name’,
y_col = ‘Plane’,
subset=‘validation’,
class_mode=‘categorical’,
color_mode=‘grayscale’,
seed=43,
target_size=IMAGE_SIZE[:-1],
batch_size=32)
test_ds = datagen.flow_from_dataframe(dataframe=df_test,
directory=‘Images’,
x_col = ‘Image_name’,
class_mode=None,
color_mode=‘grayscale’,
target_size=IMAGE_SIZE[:-1],
batch_size=32)
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
class FeedforwardNeuralNetModel(nn.Module):
def init(self, input_dim, hidden_dim, output_dim):
super(FeedforwardNeuralNetModel, self).init()
# Linear function
self.fc1 = nn.Linear(2562563, 100)

    # Non-linearity
    self.sigmoid = nn.Sigmoid()

    # Linear function (readout)
    self.fc2 = nn.Linear(hidden_dim, output_dim)  

def forward(self, x):
    # Linear function  # LINEAR
    out = self.fc1(x)

    # Non-linearity  # NON-LINEAR
    out = self.sigmoid(out)

    # Linear function (readout)  # LINEAR
    out = self.fc2(out)
    return out

input_dim = 2562563
hidden_dim = 100
output_dim = 10

model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss() # create an object of crossentropy loss
learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # setting the hyperparameter tunning, set learning rate and optimizer is taken as schostic gradiant decent
print(model.parameters())
print(len(list(model.parameters())))

FC 1 Parameters

print(list(model.parameters())[0].size())

FC 1 Bias Parameters

print(list(model.parameters())[1].size())

FC 2 Parameters

print(list(model.parameters())[2].size())

FC 2 Bias Parameters

print(list(model.parameters())[3].size())
iter = 0
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Load images with gradient accumulation capabilities
images = images.view(-1, 256*256).requires_grad_()

    # Clear gradients w.r.t. parameters
    optimizer.zero_grad()

    # Forward pass to get output/logits
    outputs = model(images)

    # Calculate Loss: softmax --> cross entropy loss
    loss = criterion(outputs, labels)

    # Getting gradients w.r.t. parameters
    loss.backward()

    # Updating parameters
    optimizer.step()

    iter += 1

    if iter % 500 == 0:
        # Calculate Accuracy         
        correct = 0
        total = 0
        # Iterate through test dataset
        for images, labels in test_loader:
            # Load images with gradient accumulation capabilities
            images = images.view(-1, 256*256).requires_grad_()

            # Forward pass only to get logits/output
            outputs = model(images)

            # Get predictions from the maximum value
            _, predicted = torch.max(outputs.data, 1)

            # Total number of labels
            total += labels.size(0)

            # Total correct predictions
            correct += (predicted == labels).sum()

        accuracy = 100 * correct / total

        # Print Loss
        print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

this is my sample code and am getting error as ValueError: Expected input batch_size (3200) to match target batch_size (100)

I would guess this view operation is wrong:

images = images.view(-1, 256*256)

as was already discussed in previous posts in this topic, e.g. here.

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier.

Hi Sir, I Have a similar error but the .py files are different:
the code has been taken from GitHub: GitHub - Leo-Q-316/ImGAGN: Imbalanced Network Embedding vi aGenerative Adversarial Graph Networks
and the data had been transformed as it says in GitHub. The only problem I faces in the features.cora part.

so I have this error in the following code: ValueError: Expected input batch_size (18) to match target batch_size (15).

from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np
import scipy.sparse as sp
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from utils import load_data, accuracy, add_edges
from models import GCN
from models import Generator

# Training settings
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False,
                    help='Validate during training pass.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=100,
                    help='Number of epochs to train.')
parser.add_argument('--hidden', type=int, default=128,
                    help='Number of hidden units.')
parser.add_argument('--dropout', type=float, default=0.5,
                    help='Dropout rate (1 - keep probability).')
parser.add_argument('--epochs_gen', type=int, default=10,
                    help='Number of epochs to train for gen.')
parser.add_argument('--ratio_generated', type=float, default=1,
                    help='ratio of generated nodes.')
parser.add_argument('--dataset', choices=['cora', 'citeseer','pubmed', 'dblp', 'wiki'], default='cora')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()


np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

dataset = args.dataset
path = "../Dataset/" + dataset+"/"

if dataset=='wiki':
    num = 3
else:
    num = 10

# Specfic Parameters to get the best result
if dataset=='wiki':
    lr=0.001
elif dataset=='dblp':
    lr=0.0009
else:
    lr=0.01

if dataset == 'cora':
    weight_decay = 0.0008
elif dataset == 'citeseer':
    weight_decay = 0.0005
elif dataset == 'pubmed':
    weight_decay = 0.00008
elif dataset == 'dblp':
    weight_decay = 0.003
elif dataset == 'wiki':
    weight_decay = 0.0005



def train(features, adj):
    global max_recall, test_recall, test_f1, test_AUC, test_acc, test_pre
    model.train()
    optimizer.zero_grad()
    output, output_gen, output_AUC = model(features, adj)
    labels_true = torch.cat((torch.LongTensor(num_real).fill_(0), torch.LongTensor(num_false).fill_(1)))

    if args.cuda:
        labels_true=labels_true.cuda()

    loss_dis = - euclidean_dist(features[minority], features[majority]).mean()
    loss_train = F.nll_loss(output[idx_train], labels[idx_train]) \
                 + F.nll_loss(output_gen[idx_train], labels_true) \
                +loss_dis

    loss_train.backward()
    optimizer.step()


    if not args.fastmode:
        model.eval()
        output, output_gen, output_AUC = model(features, adj)


    recall_val, f1_val, AUC_val, acc_val, pre_val = accuracy(output[idx_val], labels[idx_val], output_AUC[idx_val])
    recall_train, f1_train, AUC_train, acc_train, pre_train = accuracy(output[idx_val], labels[idx_val], output_AUC[idx_val])

    if max_recall < (recall_val + acc_val)/2:
        output, output_gen, output_AUC = model(features, adj)
        recall_tmp, f1_tmp, AUC_tmp, acc_tmp, pre_tmp = accuracy(output[idx_test], labels[idx_test], output_AUC[idx_test])
        test_recall = recall_tmp
        test_f1 = f1_tmp
        test_AUC = AUC_tmp
        test_acc = acc_tmp
        test_pre = pre_tmp
        max_recall = (recall_val + acc_val)/2

    return recall_val, f1_val, acc_val, recall_train, f1_train, acc_train


def euclidean_dist(x, y):
    m, n = x.size(0), y.size(0)
    xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
    yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
    dist = xx + yy
    dist.addmm_(1, -2, x, y.t())
    dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
    return dist


# ratio_arr = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
# for ratio in ratio_arr:
adj, adj_real, features, labels, idx_temp, idx_test, generate_node, minority, majority, minority_all = load_data(args.ratio_generated, path=path, dataset=dataset)
# Model and optimizer
model = GCN(nfeat=features.shape[1],
    nhid=args.hidden,
    nclass=labels.max().item() + 1,
    dropout=args.dropout,
    generate_node= generate_node,
    min_node = minority)
optimizer = optim.Adam(model.parameters(),lr=lr, weight_decay=weight_decay)

# num_real = features.shape[0]
num_false = labels.shape[0]- features.shape[0] #diff bw lengths of first row

print(num_false)
model_generator = Generator(minority_all.shape[0])
optimizer_G = torch.optim.Adam(model_generator.parameters(),
                       lr=lr, weight_decay=weight_decay)

max_recall = 0
test_recall = 0
test_f1 = 0
test_AUC = 0
test_acc=0
test_pre =0

if args.cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_temp = idx_temp.cuda()
    idx_test = idx_test.cuda()
    model_generator.cuda()


for epoch_gen in range(args.epochs_gen):
    part = epoch_gen % num
    range_val_maj = range(int(part*len(majority)/num), int((part+1)*len(majority)/num))
    range_val_min = range(int(part * len(minority) / num), int((part + 1) * len(minority) / num))

    range_train_maj = list(range(0,int(part*len(majority)/num)))+ list(range(int((part+1)*len(majority)/num),len(majority)))
    range_train_min = list(range(0,int(part*len(minority)/num)))+ list(range(int((part+1)*len(minority)/num),len(minority)))

    idx_val = torch.cat((majority[range_val_maj], minority[range_val_min]))
    idx_train = torch.cat((majority[range_train_maj], minority[range_train_min]))
    idx_train = torch.cat((idx_train, generate_node))
    num_real = features.shape[0] - len(idx_test) -len(idx_val)

    # Train model
    model_generator.train()
    optimizer_G.zero_grad()
    z = Variable(torch.FloatTensor(np.random.normal(0, 1, (generate_node.shape[0], 100))))
    if args.cuda:
        z=z.cuda()

    adj_min = model_generator(z)
    gen_imgs1 = torch.mm(F.softmax(adj_min[:,0:minority.shape[0]], dim=1), features[minority])
    gen_imgs1_all = torch.mm(F.softmax(adj_min, dim=1), features[minority_all])

    matr = F.softmax(adj_min[:,0:minority.shape[0]], dim =1).data.cpu().numpy()
    pos=np.where(matr>1/matr.shape[1])
    adj_temp = sp.coo_matrix((np.ones(pos[0].shape[0]),(generate_node[pos[0]].numpy(), minority_all[pos[1]].numpy())),
                             shape=(labels.shape[0], labels.shape[0]),
                             dtype=np.float32)

    adj_new = add_edges(adj_real, adj_temp)
    if args.cuda:
        adj_new=adj_new.cuda()

    t_total = time.time()
    # model.eval()
    output, output_gen, output_AUC = model(torch.cat((features, gen_imgs1.data),0), adj)

    labels_true = torch.LongTensor(num_false).fill_(0)
   
    labels_min = torch.LongTensor(num_false).fill_(1)
    if args.cuda:
        labels_true = labels_true.cuda()
        labels_min = labels_min.cuda()

    g_loss = F.nll_loss(output_gen[generate_node], labels_true) \
             + F.nll_loss(output[generate_node], labels_min) \
             + euclidean_dist(features[minority], gen_imgs1).mean()

    print(output[idx_train].size())
    print(labels[idx_train].size())
    print(output_gen[idx_train].size())
    print(labels_true.size()) 
    print(num_false)
    print(num_real)   
         
    g_loss.backward()
    optimizer_G.step()

    for epoch in range(args.epochs):
        recall_val, f1_val, acc_val, recall_train, f1_train, acc_train = train(torch.cat((features, gen_imgs1.data.detach()),0), adj_new)
    print("Epoch:", '%04d' % (epoch_gen + 1),
          "train_recall=", "{:.5f}".format(recall_train), "train_f1=", "{:.5f}".format(f1_train),"train_acc=", "{:.5f}".format(acc_train),
          "val_recall=", "{:.5f}".format(recall_val), "val_f1=", "{:.5f}".format(f1_val),"val_acc=", "{:.5f}".format(acc_val))



print("Test Recall: ", test_recall)
print("Test Accuracy: ", test_acc)
print("Test F1: ", test_f1)
print("Test precision: ", test_pre)
print("Test AUC: ", test_AUC)


I can see that the target_batch size = num_Real+num_false where :
features = sp.csr_matrix(idx_features_labels[:, 0:-1], dtype=np.float32) #reverse order of features cora ndarray
labels = idx_features_labels[:, -1]

Ps this is required for a group project which is due on 24th so I would be grateful if u could help out

Check which line of code is raising the shape mismatch as your current script calculates the loss in a few places. Once isolated, check if the input tensor matches the output tensor in its batch size. If not, then check the model implementation and in particular its forward method to narrow down where the batch size changes. On the other hand, if the input matches the output in the batch size, make sure the target also has the same batch size. If not, check how the target is created and why the batch size is different.

Hi, @ptrblck! I am kinda in the same situation. I am trying a very basic model:

class Conv2DModel(nn.Module):
    def __init__(self, n_input=1, n_output=35, stride=16, n_channel=32):
        super(Conv2DModel, self).__init__()
        self.conv1 = nn.Conv2d(n_input, n_channel, kernel_size=(1,80), stride=stride)
        self.conv2 = nn.Conv2d(n_channel, n_channel, kernel_size=(1,3))
        self.fc1 = nn.Linear(1976, n_channel)
        self.fc2 = nn.Linear(n_channel, n_output)

    def forward(self, x):
        x = self.conv1(x)
        print(x.shape)
        x = F.relu(x)
        print(x.shape)
        x = self.conv2(x)
        print(x.shape)
        x = F.relu(x)
        print(x.shape)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        print(x.shape)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

The error:

ValueError                                Traceback (most recent call last)
Input In [174], in <cell line: 9>()
      9 with tqdm(total=n_epoch) as pbar:
     10     for epoch in range(1, n_epoch + 1):
---> 11         train(model, epoch, log_interval)
     12         test(model, epoch)
     13         scheduler.step()

Input In [173], in train(model, epoch, log_interval)
     15 output = model(data)
     17 # negative log-likelihood for a tensor of size (batch x 1 x n_output)
---> 18 loss = F.nll_loss(output.squeeze(), target)
     20 optimizer.zero_grad()
     21 loss.backward()

File ~/anaconda3/envs/user/lib/python3.8/site-packages/torch/nn/functional.py:2671, in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2669 if size_average is not None or reduce is not None:
   2670     reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2671 return torch._C._nn.nll_loss_nd(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

ValueError: Expected input batch_size (32) to match target batch_size (256).

I don’t know what your input shapes are so could you post a minimal, executable code snippet please?

input is torch.Size([1, 256, 8000]).

optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1) 

def train(model, epoch, log_interval):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):

        data = data.to(device)
        target = target.to(device)
        # apply transform and model on whole batch directly on device
        data = transform(data)
        data = torch.transpose(data, 1, 0)
        print(data.shape)


        output = model(data)

        # negative log-likelihood for a tensor of size (batch x 1 x n_output)
        loss = F.nll_loss(output.squeeze(), target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print training stats
        if batch_idx % log_interval == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")

        # update progress bar
        pbar.update(pbar_update)
        # record loss
        losses.append(loss.item())
log_interval = 20
n_epoch = 2

pbar_update = 1 / (len(train_loader) + len(test_loader))
losses = []

# The transform needs to live on the same device as the model and the data.
transform = transform.to(device)
with tqdm(total=n_epoch) as pbar:
    for epoch in range(1, n_epoch + 1):
        train(model, epoch, log_interval)
        test(model, epoch)
        scheduler.step()