Tensor type mismatch when moving to GPU

chenjus · August 1, 2017, 7:48pm

I’m getting the following error when trying to move my network and tensors to GPU. Could someone tell me what I’m doing wrong? Thanks.

Traceback (most recent call last):
  File "/media/project/train.py", line 78, in train
    hypo = self.network(x)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
    result = self.forward(*input, **kwargs)
  File "model/network.py", line 35, in forward
    hidden = self.input_layer(x.view(1, input_size)).clamp(min=0)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/linear.py", line 54, in forward
    return self._backend.Linear()(input, self.weight, self.bias)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/linear.py", line 10, in forward
    output.addmm_(0, 1, input, weight.t())
TypeError: addmm_ received an invalid combination of arguments - got (int, int, torch.cuda.FloatTensor, torch.FloatTensor), but expected one of:
 * (torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float beta, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float alpha, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float beta, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float alpha, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
 * (float beta, float alpha, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
      didn't match because some of the arguments have invalid types: (int, int, torch.cuda.FloatTensor, torch.FloatTensor)
 * (float beta, float alpha, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
      didn't match because some of the arguments have invalid types: (int, int, torch.cuda.FloatTensor, torch.FloatTensor)



torch.manual_seed(1)

class Train(object):
def __init__(self, network, training, address):
	self.network    = network
	self.address    = address
	self.batch_size = training['batch_size']
	self.iterations = training['iterations']
	self.samples 	= training['samples']
	self.data       = training['data']
	self.lr 	    = training['lr']
	self.noisy_lr 	= training['nlr']
	self.cuda       = training['cuda']
	self.save       = training['save']
	self.scale	    = training['scale']
	self.limit      = training['limit']
	self.replace    = training['strategy']
	self.optimizer  = torch.optim.Adam(self.network.parameters(), lr=self.lr)
	logging.basicConfig(filename='gradient.log',level=logging.DEBUG)


def tensor_to_Variable(self, t):
	if next(self.network.parameters()).is_cuda and not t.is_cuda:
		t = t.cuda()

	return Variable(t)


def train(self):
    if self.cuda:
		self.network.cuda()
	dh = DataHandler(self.data)
	loss_fn = torch.nn.MSELoss()
	losses    = []
	validate  = []
	val_size  = 100
	val_diff  = 1
	total_val = float(val_size * self.batch_size)
	hypos     = []
	labels    = []

	# training loop
	for i in range(self.iterations):
		x, y = dh.get_batch(self.batch_size)
		x = self.tensor_to_Variable(x)
		y = self.tensor_to_Variable(y)
		
		self.optimizer.zero_grad()
		hypo = self.network(x)
		loss = loss_fn(hypo, y)
		loss.backward()
		self.optimizer.step()
		
		if i % 100 == 0:
			losses.append(loss.data.tolist()[0])
			
			num_correct = 0.0
			acc_diff = torch.FloatTensor([val_diff]).expand(self.batch_size)
			
			for j in range(val_size):
				val_x, val_y = dh.get_batch(self.batch_size)
				val_h = self.network(self.tensor_to_Variable(val_x))
				hypos.extend(val_h.data.tolist())
				labels.extend(val_y.data.tolist())
				num_correct += torch.sum(torch.abs(val_h.data - val_y.data) < acc_diff)

			validate.append(num_correct / total_val)


class Feedforward(nn.Module):
def __init__(self, topology):
	super(Feedforward, self).__init__()
	self.input_dim     = topology['features']
	self.num_hidden    = topology['hidden_layers']
	self.hidden_dim    = topology['hidden_dim']
	self.output_dim    = topology['output_dim']
	self.input_layer   = nn.Linear(self.input_dim, self.hidden_dim)
	self.hidden_layer  = nn.Linear(self.hidden_dim, self.hidden_dim)
	self.output_layer  = nn.Linear(self.hidden_dim, self.output_dim)
	self.dropout_layer = nn.Dropout(p=0.2)


def forward(self, x):
	batch_size = x.size()[0]
	feat_size  = x.size()[1]
	input_size = batch_size * feat_size

	self.input_layer = nn.Linear(input_size, self.hidden_dim)
	hidden = self.input_layer(x.view(1, input_size)).clamp(min=0)

	for _ in range(self.num_hidden):
		hidden = self.dropout_layer(F.relu(self.hidden_layer(hidden)))
		
	output_size = batch_size * self.output_dim
	self.output_layer = nn.Linear(self.hidden_dim, output_size)
	return self.output_layer(hidden).view(output_size)

vabh · August 2, 2017, 1:23pm

Hi,

Your model is on the CPU and your input is on the GPU. The error says the input to linear received a mix of FloatTensor and cuda.FloatTensor.

chenjus · August 2, 2017, 1:29pm

Yeah, turns out the issue was that I was re-initializing the input and output layers in forward(), so they were moved off the GPU. Fixed it by just adding .cuda() each time I reinitialized them.

chenjus · August 2, 2017, 1:31pm

Is it necessary to account for batch size in forward()? Do I need to change the size of the input and output layers? When I didn’t do this, the dimensions were off, but someone else told me that it’d not necessary to change the dimensions. Is this true?

vabh · August 2, 2017, 1:37pm

The input to a nn.Module is a Variable of shape batch_size * feature_size.

Let’s say you have a nn.Linear(3,5) module. This module take as input a tensor of shape batch_size * 3. So the definition of the module is independent of the batch size. Does this answer your question?

PS. That being said the batch_size has to be at least 1