Shape and batches

Hi, I’m a bit confused about an error i have.
[torch.FloatTensor of size 3x1x296]

Traceback (most recent call last):
File “”, line 168, in
File “”, line 139, in train
output = model(data)
File “/home/david/anaconda3/lib/python3.5/site-packages/torch/nn/modules/”, line 206, in call
result = self.forward(*input, **kwargs)
File “”, line 118, in forward
x = F.relu(self.fc1(x))
File “/home/david/anaconda3/lib/python3.5/site-packages/torch/nn/modules/”, line 206, in call
result = self.forward(*input, **kwargs)
File “/home/david/anaconda3/lib/python3.5/site-packages/torch/nn/modules/”, line 54, in forward
return self._backend.Linear()(input, self.weight, self.bias)
File "/home/david/anaconda3/lib/python3.5/site-packages/torch/nn/functions/", line 10, in forward
(0, 1, input, weight.t())
RuntimeError: matrices expected, got 3D, 2D tensors at /b/wheel/pytorch-src/torch/lib/TH/generic/THTensorMath.c:1232

my tensor is as i would have hope 3x1x196 which is the expected size with 3 being the batch size.

i think it’s the batch of 3 which is causing the 3d, 2d tensor confusion but not sure how to get around this. is the input to NN the whole batch or just one record ?

full code below.

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import as utils_data
import numpy
from random import randrange
# fix random seed for reproducibility

# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
                help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                help='how many batches to wait before logging training status')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

if args.cuda:

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
class MyDataset():
    def __init__(self):
        dataset = numpy.genfromtxt("vectorFile.csv", delimiter=",")

        self.features = dataset[:,0:148]
        self.labels = dataset[:,149]
        self.size = len(dataset)

    def __getitem__(self, index):
    # choose random index in features
        firstPick = randrange(0,self.size)
        firstPlayer = self.features[firstPick][0]#dealer or not ?
        while True:
            secondPick = randrange(0,self.size)
            if firstPlayer==self.features[secondPick][0]:

        if self.labels[firstPick]>self.labels[secondPick]:
            #print('adding to outputAnswer')
            target = numpy.array([(1,0)])
            #print('adding to outputAnswer')
            target = numpy.array([(1,0)])

        target = torch.from_numpy(target).float()

        data = numpy.array([(self.features[firstPick], self.features[secondPick])])
        data = data.reshape((1, 296))
        data = torch.from_numpy(data).float()

        #print('data', data)
        #print('target', target)
        return data, target

    def __len__(self):
        return self.size

train_loader =, batch_size=3)

class Net(nn.Module):
def __init__(self):
    super(Net, self).__init__()
    #self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
    #self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
    #self.conv2_drop = nn.Dropout2d()
    self.fc1 = nn.Linear(296, 50)
    self.fc2 = nn.Linear(50, 2)

def forward(self, x):
    #x = F.relu(F.max_pool2d(self.conv1(x), 2))
    #x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
    #x = x.view(-1, 320)
    x = F.relu(self.fc1(x))
    #x = F.dropout(x,
    x = self.fc2(x)
    return F.log_softmax(x)

model = Net()
if args.cuda:

optimizer = optim.SGD(model.parameters(),, momentum=args.momentum)

def train(epoch):
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)
            output = model(data)
            loss = F.nll_loss(output, target)
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader),[0]))

def test():
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
            data, target = Variable(data, volatile=True), Variable(target)
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
            pred =[1] # get the index of the max log-probability
            correct += pred.eq(

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(train_loader.dataset),
    100. * correct / len(train_loader.dataset)))

for epoch in range(1, args.epochs + 1):

It’s the ×1× in the middle, Linear takes batch x features. Try to drop the extra dimension, eg in
data = data.reshape((1, 296))

Best regards


thanks, that looks to have fixed that bit. I think where i have a lack of knowledge is that i’m confused around what needs to be passed around in the tensor at which points. i’m thinking of batch size, channels, rows and columns. the ‘1’ here seemed ok to me as either channel or row but in fact neither was needed !