Convert numpy to PyTorch Dataset

Hi All,

I have a numpy array of modified MNIST, which has the dimensions of a working dataset (Nx28x28), and labels(N,)

I want to convert this to a PyTorch Dataset, so I did:

train = torch.utils.data.TensorDataset(img, labels.view(-1))
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=False)

This causes an AssertionError for the dimensions when I try to optimize my model, and there’s zero transparency, since if I load the data from PyTorch my tensors and the train.dataset.train_data.dim() are the same.

I tried this too,

train = torch.utils.data.TensorDataset(img, labels)
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=False)

No dice

can you show a result below?

print(img.size(), label.size())

I think increasing a dimension will work,

train = torch.utils.data.TensorDataset(img, labels.unsqueeze(1))
torch.Size([18000, 28, 28]) torch.Size([18000])

unsqueeze(1) gave the same error

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-18-f3ca3f765752> in <module>()
      1 for epoch in range(1, 101):
----> 2     train(epoch)
      3     test(epoch, valid_loader)

<ipython-input-17-f91e8ba0f29c> in train(epoch)
      6         data, target = Variable(data), Variable(target)
      7         optimizer.zero_grad()
----> 8         output = model(data)
      9         loss = F.nll_loss(output, target)
     10         loss.backward()

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    208 
    209     def __call__(self, *input, **kwargs):
--> 210         result = self.forward(*input, **kwargs)
    211         for hook in self._forward_hooks.values():
    212             hook_result = hook(self, input, result)

<ipython-input-15-7f886ceeb28f> in forward(self, x)
     10 
     11     def forward(self, x):
---> 12         x = F.relu(F.max_pool2d(self.conv1(x), 2))
     13         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), ))
     14 #         x = F.relu(F.max_pool2d(self.conv3(x), 2))

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    208 
    209     def __call__(self, *input, **kwargs):
--> 210         result = self.forward(*input, **kwargs)
    211         for hook in self._forward_hooks.values():
    212             hook_result = hook(self, input, result)

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/conv.pyc in forward(self, input)
    233     def forward(self, input):
    234         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 235                         self.padding, self.dilation, self.groups)
    236 
    237 

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/functional.pyc in conv2d(input, weight, bias, stride, padding, dilation, groups)
     35     f = ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
     36                _pair(0), groups)
---> 37     return f(input, weight, bias) if bias is not None else f(input, weight)
     38 
     39 

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in forward(self, input, weight, bias)
     30         self.save_for_backward(input, weight, bias)
     31         if k == 3:
---> 32             input, weight = _view4d(input, weight)
     33         output = self._update_output(input, weight, bias)
     34         if k == 3:

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in _view4d(*tensors)
    171     output = []
    172     for t in tensors:
--> 173         assert t.dim() == 3
    174         size = list(t.size())
    175         size.insert(2, 1)

AssertionError: 

I think I found a problem
conv layer take a input as batchnumber X colorDepth X height X Width but, your dataset don’t have colordepth.

I hope below works

img.unsqueeze(1)

Doesn’t work, gives:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-24-f3ca3f765752> in <module>()
      1 for epoch in range(1, 101):
----> 2     train(epoch)
      3     test(epoch, valid_loader)

<ipython-input-23-f91e8ba0f29c> in train(epoch)
      6         data, target = Variable(data), Variable(target)
      7         optimizer.zero_grad()
----> 8         output = model(data)
      9         loss = F.nll_loss(output, target)
     10         loss.backward()

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    208 
    209     def __call__(self, *input, **kwargs):
--> 210         result = self.forward(*input, **kwargs)
    211         for hook in self._forward_hooks.values():
    212             hook_result = hook(self, input, result)

<ipython-input-21-7f886ceeb28f> in forward(self, x)
     10 
     11     def forward(self, x):
---> 12         x = F.relu(F.max_pool2d(self.conv1(x), 2))
     13         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), ))
     14 #         x = F.relu(F.max_pool2d(self.conv3(x), 2))

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    208 
    209     def __call__(self, *input, **kwargs):
--> 210         result = self.forward(*input, **kwargs)
    211         for hook in self._forward_hooks.values():
    212             hook_result = hook(self, input, result)

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/conv.pyc in forward(self, input)
    233     def forward(self, input):
    234         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 235                         self.padding, self.dilation, self.groups)
    236 
    237 

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/functional.pyc in conv2d(input, weight, bias, stride, padding, dilation, groups)
     35     f = ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
     36                _pair(0), groups)
---> 37     return f(input, weight, bias) if bias is not None else f(input, weight)
     38 
     39 

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in forward(self, input, weight, bias)
     31         if k == 3:
     32             input, weight = _view4d(input, weight)
---> 33         output = self._update_output(input, weight, bias)
     34         if k == 3:
     35             output, = _view3d(output)

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in _update_output(self, input, weight, bias)
     86 
     87         self._bufs = [[] for g in range(self.groups)]
---> 88         return self._thnn('update_output', input, weight, bias)
     89 
     90     def _grad_input(self, input, weight, grad_output):

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in _thnn(self, fn_name, input, weight, *args)
    145         impl = _thnn_convs[self.thnn_class_name(input)]
    146         if self.groups == 1:
--> 147             return impl[fn_name](self, self._bufs[0], input, weight, *args)
    148         else:
    149             res = []

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in call_update_output(self, bufs, input, weight, bias)
    223         args = parse_arguments(self, fn.arguments[5:], bufs, kernel_size)
    224         getattr(backend, fn.name)(backend.library_state, input, output, weight,
--> 225                                   bias, *args)
    226         return output
    227     return call_update_output

TypeError: DoubleSpatialConvolutionMM_updateOutput received an invalid combination of arguments - got (int, torch.DoubleTensor, torch.DoubleTensor, torch.FloatTensor, torch.FloatTensor, torch.DoubleTensor, torch.DoubleTensor, long, long, int, int, int, int), but expected (int state, torch.DoubleTensor input, torch.DoubleTensor output, torch.DoubleTensor weight, [torch.DoubleTensor bias or None], torch.DoubleTensor finput, torch.DoubleTensor fgradInput, int kW, int kH, int dW, int dH, int padW, int padH)

image data numpy type is float64, just change float32

float tensors and double tensors cannot be mixed in one function call.