Hi! I’m a newbie in pytorch and i’m learning linear regression with pytorch.
Below is my jupyter notebook when building model.
In In[11] (tle last block shown below),I encountered problems as below:
- The .grad attribute of a Tensor that is not a leaf Tensor is being accessed…
- TypeError: unsupported operand type(s) for *: ‘float’ and ‘NoneType’ in line 17 of In[19].
I’ve been reading docs of pytorch and similar problems but I’m still totally confused about what happened.
Any tips on how I can solve these problems?
import libs
In[1]
import torch
import random
import matplotlib.pyplot as plt
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
generate data and labels
Generate data with y = xw + b + \epsilon
Assuming that there’s n samples, each sample has m demensions.
So x is an n * m matrix, w is an m * 1 vector,$b and \epsilon are scalars.
In[2]
def generate_data(w, b, num_of_samples, num_of_demensions_per_sample):
"""
To generate data with y = xw + b + ϵ.
real_w: weight <- torch.tensor
real_w.shape = Size([1, num_of_demensions_per_sample]) or Size([num_of_demensions_per_sample, ])
real_b: bias <- torch.tensor or int
num_of_samples <- int
num_of_demensions_per_sample <- int
return: x, y -> (torch.tensor, torch.tensor)
x.shape = Size([num_of_samples, num_of_demensions_per_sample])
y.shape = Size([num_of_samples, 1])
"""
real_w.reshape((-1, 1))
x = torch.normal(0, 1, (num_of_samples, num_of_demensions_per_sample)).to(device)
# generate an n * m matrix, elements satisfy standard normal distribution
y = (torch.matmul(x, real_w) + real_b).to(device) # calculate y without random noise
y += torch.normal(0, 0.01, y.shape).to(device) # add random noise (in normal distribution) to y
return x, y.reshape((-1, 1)) # return samples and its demensions together with its labels
In[3]
real_w = torch.tensor([3.2, 6.4, -1.5, 5]).to(device)
real_b = torch.tensor([4.7]).to(device)
num_of_samples=1000
num_of_demensions_per_sample=4
x, y = generate_data(real_w, real_b, num_of_samples, num_of_demensions_per_sample)
print(f"real_w.T = {real_w.T}, real_b = {real_b}")
print(f"x.shape = {x.shape}, y.shape = {y.shape}.")
data iter
recieve x, y, then return iterable batches of data
In[4]
def data_iter(x, y, batch_size):
"""
To recieve x and y, then it returns iterable batches of data.
x: examples and it's demensions <- torch.tensor
y: examples' laels <- torch.tensor
x.shape = Size([num_of_samples, num_of_demensions_per_sample])
y.shape = Size([num_of_samples, 1])
batch_size: size of batch <- int
x.shape = Size([batch_size, num_of_demensions_per_sample])
y.shape = Size([1, batchsize])
"""
num_of_examples = len(x)
indexes = list(range(num_of_examples)) # generate list of indexes (from 0 to num_of_examples-1)
random.shuffle(indexes)
for i in range(0, num_of_examples, batch_size):
batch_indexes = torch.tensor(indexes[i: min(i + batch_size, num_of_examples)])
yield x[batch_indexes], y[batch_indexes]
In[5]
# test
batch_size = 10
for sample, label in data_iter(x, y, batch_size):
print(sample)
print(label)
break
build model
In[6]
# params
w = torch.normal(0, 0.01, size=(num_of_demensions_per_sample, 1), requires_grad=True).to(device)
b = torch.zeros(1, requires_grad=True).to(device)
In[7]
# y
def estimated(x, w, b):
"""
returns y according to y = xw + b
x <- torch.tensor
x.shape = Size([a_certain_number, num_of_demensions_per_sample])
w <- torch.tensor
w.shape = Size([num_of_demensions_per_sample, 1])
b <- torch.tensor
b.shape = Size([1, ])
return: y -> torch.tensor
y.shape = Size([a_certain_number, 1])
"""
return (torch.matmul(x, w) + b).to(device)
loss function
In[8]
def squared_loss(estimation, label):
"""
loss function between estimated value and truth
estimation <- torch.tensor
estimation.shape = Size([a_certain_number, 1])
label <- torch.tensor
label.shape == estimation.shape
return: l(estimation, label) -> torch.tensor
l.shape = estimation.shape
"""
return (0.5 * (estimation - label.reshape_as(estimation)) ** 2).to(device)
In[9]
# optimizer
def msgd(params, lr, batch_size):
"""
mini-batch stochastic gradient descent
params: [w, b] <- [torch.tensor. torch.tensor]
w <- torch.tensor
w.shape = Size([num_of_demensions_per_sample, 1])
b <- torch.tensor
b.shape = Size([1, ])
lr: learning rate
0 < lr < 1
batch_size <- int
"""
with torch.no_grad(): # no need for grad when updating params
for param in params:
param -= lr * param.grad / batch_size # TODO: PROBLEM OCCURED
param.grad.zero_()
In[10]
train
num_of_epochs = 10
net = estimated
loss = squared_loss
lr = 0.02
In[11]
for epoch in range(num_of_epochs):
# train
for example, label in data_iter(x, y, batch_size):
l = loss(net(example, w, b), label)
l.sum().backward()
msgd([w, b], lr, batch_size)
# estimate
with torch.no_grad():
train_loss = loss(net(x, w, b), y)
print(f"epoch {epoch + 1:3d}, loss = {float(train_loss.mean()):.2f}")