Tensor type mismatch when moving to GPU

I’m getting the following error when trying to move my network and tensors to GPU. Could someone tell me what I’m doing wrong? Thanks.

Traceback (most recent call last):
  File "/media/project/train.py", line 78, in train
    hypo = self.network(x)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
    result = self.forward(*input, **kwargs)
  File "model/network.py", line 35, in forward
    hidden = self.input_layer(x.view(1, input_size)).clamp(min=0)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/linear.py", line 54, in forward
    return self._backend.Linear()(input, self.weight, self.bias)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/linear.py", line 10, in forward
    output.addmm_(0, 1, input, weight.t())
TypeError: addmm_ received an invalid combination of arguments - got (int, int, torch.cuda.FloatTensor, torch.FloatTensor), but expected one of:
 * (torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float beta, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float alpha, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float beta, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float alpha, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float beta, float alpha, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
      didn't match because some of the arguments have invalid types: (int, int, torch.cuda.FloatTensor, torch.FloatTensor)
 * (float beta, float alpha, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
      didn't match because some of the arguments have invalid types: (int, int, torch.cuda.FloatTensor, torch.FloatTensor)


class Train(object):
def __init__(self, network, training, address):
	self.network    = network
	self.address    = address
	self.batch_size = training['batch_size']
	self.iterations = training['iterations']
	self.samples 	= training['samples']
	self.data       = training['data']
	self.lr 	    = training['lr']
	self.noisy_lr 	= training['nlr']
	self.cuda       = training['cuda']
	self.save       = training['save']
	self.scale	    = training['scale']
	self.limit      = training['limit']
	self.replace    = training['strategy']
	self.optimizer  = torch.optim.Adam(self.network.parameters(), lr=self.lr)

def tensor_to_Variable(self, t):
	if next(self.network.parameters()).is_cuda and not t.is_cuda:
		t = t.cuda()

	return Variable(t)

def train(self):
    if self.cuda:
	dh = DataHandler(self.data)
	loss_fn = torch.nn.MSELoss()
	losses    = []
	validate  = []
	val_size  = 100
	val_diff  = 1
	total_val = float(val_size * self.batch_size)
	hypos     = []
	labels    = []

	# training loop
	for i in range(self.iterations):
		x, y = dh.get_batch(self.batch_size)
		x = self.tensor_to_Variable(x)
		y = self.tensor_to_Variable(y)
		hypo = self.network(x)
		loss = loss_fn(hypo, y)
		if i % 100 == 0:
			num_correct = 0.0
			acc_diff = torch.FloatTensor([val_diff]).expand(self.batch_size)
			for j in range(val_size):
				val_x, val_y = dh.get_batch(self.batch_size)
				val_h = self.network(self.tensor_to_Variable(val_x))
				num_correct += torch.sum(torch.abs(val_h.data - val_y.data) < acc_diff)

			validate.append(num_correct / total_val)

class Feedforward(nn.Module):
def __init__(self, topology):
	super(Feedforward, self).__init__()
	self.input_dim     = topology['features']
	self.num_hidden    = topology['hidden_layers']
	self.hidden_dim    = topology['hidden_dim']
	self.output_dim    = topology['output_dim']
	self.input_layer   = nn.Linear(self.input_dim, self.hidden_dim)
	self.hidden_layer  = nn.Linear(self.hidden_dim, self.hidden_dim)
	self.output_layer  = nn.Linear(self.hidden_dim, self.output_dim)
	self.dropout_layer = nn.Dropout(p=0.2)

def forward(self, x):
	batch_size = x.size()[0]
	feat_size  = x.size()[1]
	input_size = batch_size * feat_size

	self.input_layer = nn.Linear(input_size, self.hidden_dim)
	hidden = self.input_layer(x.view(1, input_size)).clamp(min=0)

	for _ in range(self.num_hidden):
		hidden = self.dropout_layer(F.relu(self.hidden_layer(hidden)))
	output_size = batch_size * self.output_dim
	self.output_layer = nn.Linear(self.hidden_dim, output_size)
	return self.output_layer(hidden).view(output_size)


Your model is on the CPU and your input is on the GPU. The error says the input to linear received a mix of FloatTensor and cuda.FloatTensor.

Yeah, turns out the issue was that I was re-initializing the input and output layers in forward(), so they were moved off the GPU. Fixed it by just adding .cuda() each time I reinitialized them.

Is it necessary to account for batch size in forward()? Do I need to change the size of the input and output layers? When I didn’t do this, the dimensions were off, but someone else told me that it’d not necessary to change the dimensions. Is this true?

The input to a nn.Module is a Variable of shape batch_size * feature_size.

Let’s say you have a nn.Linear(3,5) module. This module take as input a tensor of shape batch_size * 3. So the definition of the module is independent of the batch size. Does this answer your question?

PS. That being said the batch_size has to be at least 1