Hi,
Looking for some help to understand why the optimizer step outputs always the same parameters regardless the number of accumulation steps I use.
Thanks!
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from torch import nn, optim, rand, sum as tsum, reshape, save
import torch
class TinyModel(torch.nn.Module):
def __init__(self):
super(TinyModel, self).__init__()
self.linear1 = torch.nn.Linear(10, 2)
self.softmax = torch.nn.Softmax(dim = None)
def forward(self, x):
x = self.linear1(x)
x = self.softmax(x)
return x
torch.manual_seed(10)
LEARNING_RATE = 1e-03
model = TinyModel()
# print(list(model.parameters()))
optimizer = optim.Adam(model.parameters(), lr= LEARNING_RATE)
for i in range(1):
x = torch.tensor([10,20,30,40,50,60,70,80,90,100], dtype=torch.float)
y = model(x)
labels = torch.tensor([0.2,0.8], dtype=torch.float)
loss = criterion(y,labels)
loss.backward()
# print(model.linear1.weight.grad)
optimizer.step()
print(list(model.parameters()))
results in:
[Parameter containing:
tensor([[-0.0275, -0.0118, -0.1196, 0.0717, -0.1819, -0.0568, 0.1216, 0.2958,
0.0735, -0.1083],
[ 0.0313, -0.0344, 0.1301, 0.0372, 0.1249, 0.3077, -0.1303, -0.0102,
0.0737, -0.0011]], requires_grad=True), Parameter containing:
tensor([-0.0313, -0.2789], requires_grad=True)]
regardless if I use none of 10 accumulations.