hi I have a very simple linear net:
class Net(nn.Module):
def __init__(self,measurement_rate,hidden=block_size**2):
super(Net,self).__init__()
self.fc=nn.Linear(int(np.floor(hidden*measurement_rate)),hidden)
def forward(self,x):
x=self.fc(x)
return x
def weights_init(m):
if isinstance(m, nn.Linear):
m.weight.data.normal_(0.0,0.01)
model=Net(measurement_rate,block_size**2)
model.apply(weights_init)
model.to(device)
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate , momentum=0.9)
scheduler = StepLR(optimizer, step_size=200000, gamma=0.5)
criterion=nn.MSELoss()
start_time = time.time()
n_epochs=1000000
print_every=500
test_loss_min=np.Inf
for epoch in range(n_epochs):
print('Epoch:', epoch,'LR:', scheduler.get_last_lr())
train_loss=0
model.train()
for iteration,(label,data) in enumerate(Train_Loader):
optimizer.zero_grad()
output=model(label.to(device))
loss=criterion(output,data.to(device))
loss.backward()
optimizer.step()
train_loss+=loss.item()*data.shape[0]
if iteration % print_every==0:
is_training=model.training
test_loss=0
model.eval()
for (label,data) in Test_Loader:
output=model(label.to(device))
loss=criterion(output,data.to(device))
test_loss+=loss.item()*data.shape[0]
test_loss=test_loss/len(Test_Loader.sampler)
if test_loss<=test_loss_min:
print('\t Test loss decreased ({:.6f}-->{:.6f}). '.format(test_loss_min,test_loss))
test_loss_min=test_loss
model.train(mode=is_training)
scheduler.step()
train_loss=train_loss/len(Train_Loader.sampler)
print(len(Train_Loader.sampler))
print('weight',copy.deepcopy(model.fc.weight.data))
print('Epoch: {} \tTraining loss:{:.6f}. Saving Model...'.format(epoch+1,train_loss))
torch.save(model.state_dict(),('/content/drive/My Drive/dataset/model_step1.pt'))
end_time = time.time()
print('Duration: {}'.format(end_time - start_time))
I printed the weights. all of them are nan.
loss also is nan.
how can I fix this problem?