I’m getting the following error when trying to move my network and tensors to GPU. Could someone tell me what I’m doing wrong? Thanks.
Traceback (most recent call last):
File "/media/project/train.py", line 78, in train
hypo = self.network(x)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
result = self.forward(*input, **kwargs)
File "model/network.py", line 35, in forward
hidden = self.input_layer(x.view(1, input_size)).clamp(min=0)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/linear.py", line 54, in forward
return self._backend.Linear()(input, self.weight, self.bias)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/linear.py", line 10, in forward
output.addmm_(0, 1, input, weight.t())
TypeError: addmm_ received an invalid combination of arguments - got (int, int, torch.cuda.FloatTensor, torch.FloatTensor), but expected one of:
* (torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
* (torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
* (float beta, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
* (float alpha, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
* (float beta, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
* (float alpha, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
* (float beta, float alpha, torch.cuda.FloatTensor mat1, torch.cuda.FloatTensor mat2)
didn't match because some of the arguments have invalid types: (int, int, torch.cuda.FloatTensor, torch.FloatTensor)
* (float beta, float alpha, torch.cuda.sparse.FloatTensor mat1, torch.cuda.FloatTensor mat2)
didn't match because some of the arguments have invalid types: (int, int, torch.cuda.FloatTensor, torch.FloatTensor)
torch.manual_seed(1)
class Train(object):
def __init__(self, network, training, address):
self.network = network
self.address = address
self.batch_size = training['batch_size']
self.iterations = training['iterations']
self.samples = training['samples']
self.data = training['data']
self.lr = training['lr']
self.noisy_lr = training['nlr']
self.cuda = training['cuda']
self.save = training['save']
self.scale = training['scale']
self.limit = training['limit']
self.replace = training['strategy']
self.optimizer = torch.optim.Adam(self.network.parameters(), lr=self.lr)
logging.basicConfig(filename='gradient.log',level=logging.DEBUG)
def tensor_to_Variable(self, t):
if next(self.network.parameters()).is_cuda and not t.is_cuda:
t = t.cuda()
return Variable(t)
def train(self):
if self.cuda:
self.network.cuda()
dh = DataHandler(self.data)
loss_fn = torch.nn.MSELoss()
losses = []
validate = []
val_size = 100
val_diff = 1
total_val = float(val_size * self.batch_size)
hypos = []
labels = []
# training loop
for i in range(self.iterations):
x, y = dh.get_batch(self.batch_size)
x = self.tensor_to_Variable(x)
y = self.tensor_to_Variable(y)
self.optimizer.zero_grad()
hypo = self.network(x)
loss = loss_fn(hypo, y)
loss.backward()
self.optimizer.step()
if i % 100 == 0:
losses.append(loss.data.tolist()[0])
num_correct = 0.0
acc_diff = torch.FloatTensor([val_diff]).expand(self.batch_size)
for j in range(val_size):
val_x, val_y = dh.get_batch(self.batch_size)
val_h = self.network(self.tensor_to_Variable(val_x))
hypos.extend(val_h.data.tolist())
labels.extend(val_y.data.tolist())
num_correct += torch.sum(torch.abs(val_h.data - val_y.data) < acc_diff)
validate.append(num_correct / total_val)
class Feedforward(nn.Module):
def __init__(self, topology):
super(Feedforward, self).__init__()
self.input_dim = topology['features']
self.num_hidden = topology['hidden_layers']
self.hidden_dim = topology['hidden_dim']
self.output_dim = topology['output_dim']
self.input_layer = nn.Linear(self.input_dim, self.hidden_dim)
self.hidden_layer = nn.Linear(self.hidden_dim, self.hidden_dim)
self.output_layer = nn.Linear(self.hidden_dim, self.output_dim)
self.dropout_layer = nn.Dropout(p=0.2)
def forward(self, x):
batch_size = x.size()[0]
feat_size = x.size()[1]
input_size = batch_size * feat_size
self.input_layer = nn.Linear(input_size, self.hidden_dim)
hidden = self.input_layer(x.view(1, input_size)).clamp(min=0)
for _ in range(self.num_hidden):
hidden = self.dropout_layer(F.relu(self.hidden_layer(hidden)))
output_size = batch_size * self.output_dim
self.output_layer = nn.Linear(self.hidden_dim, output_size)
return self.output_layer(hidden).view(output_size)