All params in my model is None

I start a new project by my own, from dataloader to trainer, however, when I start to run, the results are wired. After debugging, I found the grad of all data and params is None, although some of them have “requires_grad=True”. I have no idea how to fix even how to search or ask for help. Can anyone gives me a tip? I’d appriciate that much.

this is the train part:

import torch
import numpy as np
import copy
import sys

import warnings
warnings.filterwarnings('ignore')

from torch.utils.data import Dataset       
from torch.utils.data import DataLoader    
from torch.utils.data import random_split

from Dataloader.Dataloader import MudDataset
from Model.DenseNet6 import DenseNet6 as MyModel
#### fixxing ####
# from config import cfg
"""
    1. Prepare dataset
        tools: Dataset and DataLoader
        
    2. Design model using Class
        inherit from nn.Module
        
    3. Construct loss and optimizer
        using PyTorch API
        
    4. Training cycle
        forward, backward, update
"""
dir = 'D:/project/ML4mud/new.xlsx'
model_dir = 'D:/project/ML4mud/run'
device = 'cuda'
val_rate = 0.1
batch_size = 4
num_workers = 2
# lr = 1e-4
lr = 0.2
epochs = 10000
muddataset = MudDataset(dir)
train_dataset, val_dataset = random_split(muddataset,
                                          [round((1 - val_rate)*len(muddataset)),
                                           round(val_rate*len(muddataset))])

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=num_workers)

val_loader = DataLoader(dataset=val_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=num_workers)

model = MyModel()
model = torch.nn.DataParallel(model)
model.to(device)

criterion = torch.nn.MSELoss()
# criterion = torch.nn.BCELoss(size_average=True)
# criterion = torch.nn.CrossEntropyLossLoss(size_average=True)
optimizer = torch.optim.SGD(model.parameters(), weight_decay=0.01, lr=lr)

best_loss = torch.Tensor(1).cuda()
best_loss[0] = np.inf

if __name__ == "__main__":
    # train
    model.train()
    for epoch in range(epochs):
        print('epoch:', epoch)
        for i, sample in enumerate(train_loader):

            data, labels = sample   # Tensor type

            data = data.to(device)
            labels = labels.unsqueeze(1).to(device)

            pred = model(data)
            loss = criterion(pred, labels)

            print('loss:', loss.item())
            print("pred:", pred, "label:", labels)

            optimizer.zero_grad()
            loss.backward()

            # for name, param in model.named_parameters():
            #     print(name, param.grad.abs().sum())

            optimizer.step()

            # torch.optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.9, last_epoch=-1)

            if best_loss > loss:
                best_loss = loss
                best_model = copy.deepcopy(model.state_dict())
                ### model.load_state_dict(best_model)
                print('best loss is:', best_loss.item())
                print('saving best weights...')
                torch.save(best_model, model_dir + 'best_model.pth')

        torch.save(model.state_dict(), model_dir + 'last_model.pth')


        # for name, parms in model.named_parameters():
        #     print('-->name:', name, '-->grad_requirs:', parms.requires_grad, '--weight', torch.mean(parms.data),
        #           ' -->grad_value:', torch.mean(parms.grad))




    checkpoint = torch.load(model_dir + 'model.pth')

    model.load_state_dict(checkpoint)

    # eval
    for i, sample in enumerate(val_loader, 0):
        data, labels = sample

        with torch.no_grad():
            pred = model(data)
            loss = criterion(pred, labels)

            print('loss:', loss)
            print('prediction:', pred, 'Err:', loss/labels)



here is may model:

import torch

class DenseNet6(torch.nn.Module):
    def __init__(self):
        super(DenseNet6, self).__init__()
        self.linear_10_10 = torch.nn.Linear(10, 10)
        self.linear_13_10 = torch.nn.Linear(13, 10)
        # self.linear_double = torch.nn.Linear(10, 20)
        self.linear_10_5 = torch.nn.Linear(10, 5)
        self.linear_5_5 = torch.nn.Linear(5, 5)
        self.linear_5_1 = torch.nn.Linear(5, 1)
        self.linear_10_1 = torch.nn.Linear(10, 1)
        self.linear_1_1 = torch.nn.Linear(1, 1)
        self.relu = torch.nn.ReLU()
        self.bn10 = torch.nn.BatchNorm1d(10)
        self.bn5 = torch.nn.BatchNorm1d(5)
        self.bn1 = torch.nn.BatchNorm1d(1)



    def forward(self, x):
        x = self.relu(self.linear_13_10(x))
        x = self.relu(self.linear_10_5(x))
        x = self.relu(self.linear_5_1(x))
        x = self.linear_1_1(x)
        return x

and the dataloader is below:

import torch
import numpy as np
from torch.utils.data import Dataset       
from torch.utils.data import DataLoader     
from config import cfg
import pandas as pd

class MudDataset(Dataset):
    def __init__(self, filepath):

        data = []
        with pd.ExcelFile(filepath) as sheet:
            sheet_names = sheet.sheet_names
            for i in range(len(sheet.sheet_names)):
                cur_sheet = sheet.parse(sheet_name=sheet_names[i])
                for j in range(cur_sheet.shape[0]):
                    data.append(cur_sheet.values[j])

        self.len = len(data)

        for i in range(self.len):
            data[i][0] = self.type_encode(data[i][0])

        type_data = np.zeros((self.len, len(data[0][0])))
        prop_data = np.zeros((self.len, len(data[0]) - 1))

        for i in range(self.len):
            type_data[i] = data[i][0]
            prop_data[i] = data[i][1:]

        np_data = np.concatenate((type_data, prop_data), axis = 1)

        self.data = torch.from_numpy(np.float32(np_data[:,:-1]))
        self.label = torch.from_numpy(np.float32(np_data[:,-1]))


        # print("Data prepared ready")

    def __getitem__(self, index):  
        return self.data[index], self.label[index]


    def __len__(self):  
        return self.len

    def type_encode(self, type:str):
        mapping = {'a':0,
                   'b':1,
                   'c':2,
                   'd':3,
                   'e':4,
                   'f':5,
                   'g':6}
        self.map_len = len(mapping)
        encoding = np.zeros(self.map_len)
        encoding[mapping[type]] = 1

        return encoding

Thanks!

Your model works as expected and all used parameters will get a valid gradient:

model = DenseNet6()
x = torch.randn(1, 13)
out = model(x)
out.mean().backward()

for name, param in model.named_parameters():
    print(name, param.grad)

All parameters of layers which were not used in the forward method will have the expected None gradient.

In your code snippet you are using model = MyModel() so I’m unsure if this is a copy/paste issue or if you are using another model.