I am trying to update weights manually without using optimizer, but somehow model weights are NoneType…
class TwoLayerNet(torch.nn.Module):
def __init__(self):
super(TwoLayerNet, self).__init__()
self.linear1 = torch.nn.Linear(784, 128)
self.linear2 = torch.nn.Linear(128, 128)
self.linear3 = torch.nn.Linear(128, 10)
self.a1_relu = None
self.a2_relu = None
def forward(self, k):
self.a1_relu = self.linear1(k).clamp(min=0)
self.a2_relu = self.linear2(self.a1_relu).clamp(min=0)
emb = self.linear3(self.a2_relu)
return emb
model = TwoLayerNet()
model.cuda()
criterion = torch.nn.MSELoss(reduction='sum')
x_train, y_train, x_val, y_val = get_data.get_mnist()
minibatch_size = 8
epochs = 100
av_loss = 0
ll = list(range(0, x_train.shape[0], minibatch_size))
learning_rate = 0.5
model.train()
for e in range(epochs):
for i in ll:
k = x_train[i:i + minibatch_size]
y = y_train[i:i + minibatch_size]
k = torch.tensor(k, device="cuda", dtype=torch.float32, requires_grad=True)
y = torch.tensor(y, device="cuda", dtype=torch.float32, requires_grad=True)
model.zero_grad()
y_pred = model.forward(k)
y_pred = torch.argmax(y_pred, dim=1)
loss = criterion(y_pred, y)
av_loss += loss.item()
loss.backward()
with torch.no_grad():
model.linear1.weight -= learning_rate * model.linear1.weight.grad
model.linear2.weight -= learning_rate * model.linear2.weight.grad
model.linear3.weight -= learning_rate * model.linear3.weight.grad
here the weights of .grad values are None. Why So? Is there some problem in the way I compute loss?
Thank you!
P.S.I am updating them manually on purpose, I know about optimizers.