Convert numpy to PyTorch Dataset

dmadeka1 · February 27, 2017, 12:30am

Hi All,

I have a numpy array of modified MNIST, which has the dimensions of a working dataset (Nx28x28), and labels(N,)

I want to convert this to a PyTorch Dataset, so I did:

train = torch.utils.data.TensorDataset(img, labels.view(-1))
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=False)

This causes an AssertionError for the dimensions when I try to optimize my model, and there’s zero transparency, since if I load the data from PyTorch my tensors and the train.dataset.train_data.dim() are the same.

dmadeka1 · February 27, 2017, 12:30am

I tried this too,

train = torch.utils.data.TensorDataset(img, labels)
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=False)

No dice

jhjungCode · February 27, 2017, 1:00am

can you show a result below?

print(img.size(), label.size())

I think increasing a dimension will work,

train = torch.utils.data.TensorDataset(img, labels.unsqueeze(1))

dmadeka1 · February 27, 2017, 1:10am

torch.Size([18000, 28, 28]) torch.Size([18000])

unsqueeze(1) gave the same error

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-18-f3ca3f765752> in <module>()
      1 for epoch in range(1, 101):
----> 2     train(epoch)
      3     test(epoch, valid_loader)

<ipython-input-17-f91e8ba0f29c> in train(epoch)
      6         data, target = Variable(data), Variable(target)
      7         optimizer.zero_grad()
----> 8         output = model(data)
      9         loss = F.nll_loss(output, target)
     10         loss.backward()

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    208 
    209     def __call__(self, *input, **kwargs):
--> 210         result = self.forward(*input, **kwargs)
    211         for hook in self._forward_hooks.values():
    212             hook_result = hook(self, input, result)

<ipython-input-15-7f886ceeb28f> in forward(self, x)
     10 
     11     def forward(self, x):
---> 12         x = F.relu(F.max_pool2d(self.conv1(x), 2))
     13         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), ))
     14 #         x = F.relu(F.max_pool2d(self.conv3(x), 2))

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    208 
    209     def __call__(self, *input, **kwargs):
--> 210         result = self.forward(*input, **kwargs)
    211         for hook in self._forward_hooks.values():
    212             hook_result = hook(self, input, result)

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/conv.pyc in forward(self, input)
    233     def forward(self, input):
    234         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 235                         self.padding, self.dilation, self.groups)
    236 
    237 

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/functional.pyc in conv2d(input, weight, bias, stride, padding, dilation, groups)
     35     f = ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
     36                _pair(0), groups)
---> 37     return f(input, weight, bias) if bias is not None else f(input, weight)
     38 
     39 

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in forward(self, input, weight, bias)
     30         self.save_for_backward(input, weight, bias)
     31         if k == 3:
---> 32             input, weight = _view4d(input, weight)
     33         output = self._update_output(input, weight, bias)
     34         if k == 3:

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in _view4d(*tensors)
    171     output = []
    172     for t in tensors:
--> 173         assert t.dim() == 3
    174         size = list(t.size())
    175         size.insert(2, 1)

AssertionError:

jhjungCode · February 27, 2017, 1:28am

I think I found a problem
conv layer take a input as batchnumber X colorDepth X height X Width but, your dataset don’t have colordepth.

I hope below works

img.unsqueeze(1)

dmadeka1 · February 27, 2017, 1:58am

Doesn’t work, gives:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-24-f3ca3f765752> in <module>()
      1 for epoch in range(1, 101):
----> 2     train(epoch)
      3     test(epoch, valid_loader)

<ipython-input-23-f91e8ba0f29c> in train(epoch)
      6         data, target = Variable(data), Variable(target)
      7         optimizer.zero_grad()
----> 8         output = model(data)
      9         loss = F.nll_loss(output, target)
     10         loss.backward()

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    208 
    209     def __call__(self, *input, **kwargs):
--> 210         result = self.forward(*input, **kwargs)
    211         for hook in self._forward_hooks.values():
    212             hook_result = hook(self, input, result)

<ipython-input-21-7f886ceeb28f> in forward(self, x)
     10 
     11     def forward(self, x):
---> 12         x = F.relu(F.max_pool2d(self.conv1(x), 2))
     13         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), ))
     14 #         x = F.relu(F.max_pool2d(self.conv3(x), 2))

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/module.pyc in __call__(self, *input, **kwargs)
    208 
    209     def __call__(self, *input, **kwargs):
--> 210         result = self.forward(*input, **kwargs)
    211         for hook in self._forward_hooks.values():
    212             hook_result = hook(self, input, result)

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/modules/conv.pyc in forward(self, input)
    233     def forward(self, input):
    234         return F.conv2d(input, self.weight, self.bias, self.stride,
--> 235                         self.padding, self.dilation, self.groups)
    236 
    237 

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/functional.pyc in conv2d(input, weight, bias, stride, padding, dilation, groups)
     35     f = ConvNd(_pair(stride), _pair(padding), _pair(dilation), False,
     36                _pair(0), groups)
---> 37     return f(input, weight, bias) if bias is not None else f(input, weight)
     38 
     39 

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in forward(self, input, weight, bias)
     31         if k == 3:
     32             input, weight = _view4d(input, weight)
---> 33         output = self._update_output(input, weight, bias)
     34         if k == 3:
     35             output, = _view3d(output)

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in _update_output(self, input, weight, bias)
     86 
     87         self._bufs = [[] for g in range(self.groups)]
---> 88         return self._thnn('update_output', input, weight, bias)
     89 
     90     def _grad_input(self, input, weight, grad_output):

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in _thnn(self, fn_name, input, weight, *args)
    145         impl = _thnn_convs[self.thnn_class_name(input)]
    146         if self.groups == 1:
--> 147             return impl[fn_name](self, self._bufs[0], input, weight, *args)
    148         else:
    149             res = []

/home/dhruv/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/conv.pyc in call_update_output(self, bufs, input, weight, bias)
    223         args = parse_arguments(self, fn.arguments[5:], bufs, kernel_size)
    224         getattr(backend, fn.name)(backend.library_state, input, output, weight,
--> 225                                   bias, *args)
    226         return output
    227     return call_update_output

TypeError: DoubleSpatialConvolutionMM_updateOutput received an invalid combination of arguments - got (int, torch.DoubleTensor, torch.DoubleTensor, torch.FloatTensor, torch.FloatTensor, torch.DoubleTensor, torch.DoubleTensor, long, long, int, int, int, int), but expected (int state, torch.DoubleTensor input, torch.DoubleTensor output, torch.DoubleTensor weight, [torch.DoubleTensor bias or None], torch.DoubleTensor finput, torch.DoubleTensor fgradInput, int kW, int kH, int dW, int dH, int padW, int padH)

jhjungCode · February 27, 2017, 4:41am

image data numpy type is float64, just change float32

shicai · February 27, 2017, 9:23am

float tensors and double tensors cannot be mixed in one function call.