Thanks Adam! I incorporated what you wrote, and reviewed the examples/mnist/main.py file, and I’m close to a complete code now. The forward part of the model is generating an error, and I’m wondering if you or anyone else can offer a suggestion:
(py35) ~/parameter$ ./pytorch_mlp_param.py
Setting up data
Defining model
X_train.shape= (10000, 3)
Using CUDA, number of devices = 2
(Outer) Epoch 0 of 10000 :
Traceback (most recent call last):
File “./pytorch_mlp_param.py”, line 160, in
main()
File “./pytorch_mlp_param.py”, line 154, in main
train(model, epoch, trainloader, optimizer)
File “./pytorch_mlp_param.py”, line 101, in train
output = model(data)
File “/opt/anaconda/envs/py35/lib/python3.5/site-packages/torch/nn/modules/module.py”, line 202, in call
result = self.forward(*input, **kwargs)
File “./pytorch_mlp_param.py”, line 64, in forward
x = F.relu(self.hidden(x))
File “/opt/anaconda/envs/py35/lib/python3.5/site-packages/torch/nn/modules/module.py”, line 202, in call
result = self.forward(*input, **kwargs)
File “/opt/anaconda/envs/py35/lib/python3.5/site-packages/torch/nn/modules/linear.py”, line 54, in forward
return self.backend.Linear()(input, self.weight, self.bias)
File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/torch/nn/functions/linear.py", line 10, in forward
output.addmm(0, 1, input, weight.t())
TypeError: addmm received an invalid combination of arguments - got (int, int, torch.cuda.DoubleTensor, torch.cuda.FloatTensor), but expected one of:
- (torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
- (torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
- (float beta, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
- (float alpha, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
- (float beta, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
- (float alpha, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
- (float beta, float alpha, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
- (float beta, float alpha, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
.
.
If I print out “x” at the beginning for forward(), I find that, as I was intending, it’s
x = [torch.cuda.DoubleTensor of size 20x3 (GPU 0)]
(where 20 is the batch size). I don’t understand what is the extra torch.cuda.FloatTensor it says it got. Any thoughts?
.
.
.
.
For completeness, in case it helps, I’ll put the full code (with Keras references removed) below.
.
#! /usr/bin/env python
# Multilayer Perceptron to learn "f" in "y = f(x,p)", given lots of (x,y) pairs
# and a set of parameters p=[...] which affect f(x)
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
import torch.optim as optim
from torch.autograd import Variable
def myfunction(x,p=[1,0]): # function to be learned, with its parameters
return p[0]*np.sin(100*p[1]*x) # try just a sine wave, with amplitude & frequency
def myfunc_stacked(X):
Y = []
for i in range(X.shape[0]):
x = X[i,0]
p1 = X[i,1]
p2 = X[i,2]
p = [p1,p2]
Y.append( myfunction(x,p))
return np.array(Y)
def stack_params(X, p=None): # encapsulates parameters with X
if p is None:
p0 = np.random.rand(len(X)) # random values throughout X
p1 = np.random.rand(len(X))
else:
p0 = np.ones(len(X)) * p[0] # stack copies of params with X
p1 = np.ones(len(X)) * p[1]
return np.array(list(zip(X,p0,p1)))
def gen_data(n=1000, n_params=2, rand_all=False):
X = np.linspace(-1.0,1.0,num=n)
if (not rand_all):
p = np.random.random(n_params)-0.5
else:
p = None
X = stack_params(X,p)
Y = myfunc_stacked(X)
return X, Y, p
def make_model(X, n_hidden):
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.hidden = nn.Linear(X.shape[1], n_hidden)
self.hidden2 = nn.Linear(n_hidden, n_hidden)
self.out = nn.Linear(n_hidden, 1)
def forward(self, x):
x = F.relu(self.hidden(x))
x = F.tanh(self.hidden2(x))
x = self.out(x)
return x
# Note: "backward" is automatically defined by torch.autograd
model = Net()
if torch.cuda.is_available():
print("Using CUDA, number of devices = ",torch.cuda.device_count())
model.cuda()
return model
def plot_prediction(X_test, Y_test, Y_pred, epoch, n_epochs, p_test):
fig=plt.figure()
plt.clf()
ax = plt.subplot(1,1,1)
ax.set_ylim([-1,1])
plt.title("Epoch #"+str(epoch)+"/"+str(n_epochs)+", p = "+str(p_test))
plt.plot(X_test[:,0],Y_test,'b-',label="True")
plt.plot(X_test[:,0],Y_pred,'r-',label="Predicted")
plt.legend()
plt.savefig('progress.png')
plt.close(fig)
return
def train(model, epoch, trainloader, criterion, optimizer):
model.train()
for batch_idx, (data, target) in enumerate(trainloader):
if torch.cuda.is_available():
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
def predict(model,testloader, X_test, Y_test, epoch, n_epochs, p_test):
model.eval()
print(" Plotting....")
Y_pred = []
for data, target in testloader:
if torch.cuda.is_available():
data = data.cuda()
data = Variable(data, volatile=True)
output = model(data)
Y_pred.append(output.numpy)
Y_pred = np.array(Y_pred)
plot_prediction(X_test, Y_test, Y_pred, epoch, n_epochs, p_test)
def main():
np.random.seed(2)
# parameters for 'size' of run
n_hidden = 100
batch_size = 20
n_train = 10000
n_test =1000
print("Setting up data")
X_train, Y_train, p_train = gen_data(n=n_train, rand_all=True)
X_test, Y_test, p_test = gen_data(n=n_test)
trainset = torch.utils.data.TensorDataset(torch.from_numpy(X_train),torch.from_numpy(Y_train))
trainloader = torch.utils.data.DataLoader(trainset, batch_size=20, shuffle=True, num_workers=2)
testset = torch.utils.data.TensorDataset(torch.from_numpy(X_test),torch.from_numpy(Y_test))
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2)
print("Defining model")
model = make_model(X_train, n_hidden)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
n_epochs= 10000
predict_every = 20
for epoch in range(n_epochs):
print("(Outer) Epoch ",epoch," of ",n_epochs,":")
train(model, epoch, trainloader, criterion, optimizer)
if (0 == epoch % predict_every):
predict(model,testloader, X_test, Y_test, epoch, n_epochs, p_test)
if __name__ == '__main__':
main()