All params in my model is None

I start a new project by my own, from dataloader to trainer, however, when I start to run, the results are wired. After debugging, I found the grad of all data and params is None, although some of them have “requires_grad=True”. I have no idea how to fix even how to search or ask for help. Can anyone gives me a tip? I’d appriciate that much.

this is the train part:

import torch
import numpy as np
import copy
import sys

import warnings

from import Dataset       
from import DataLoader    
from import random_split

from Dataloader.Dataloader import MudDataset
from Model.DenseNet6 import DenseNet6 as MyModel
#### fixxing ####
# from config import cfg
    1. Prepare dataset
        tools: Dataset and DataLoader
    2. Design model using Class
        inherit from nn.Module
    3. Construct loss and optimizer
        using PyTorch API
    4. Training cycle
        forward, backward, update
dir = 'D:/project/ML4mud/new.xlsx'
model_dir = 'D:/project/ML4mud/run'
device = 'cuda'
val_rate = 0.1
batch_size = 4
num_workers = 2
# lr = 1e-4
lr = 0.2
epochs = 10000
muddataset = MudDataset(dir)
train_dataset, val_dataset = random_split(muddataset,
                                          [round((1 - val_rate)*len(muddataset)),

train_loader = DataLoader(dataset=train_dataset,

val_loader = DataLoader(dataset=val_dataset,

model = MyModel()
model = torch.nn.DataParallel(model)

criterion = torch.nn.MSELoss()
# criterion = torch.nn.BCELoss(size_average=True)
# criterion = torch.nn.CrossEntropyLossLoss(size_average=True)
optimizer = torch.optim.SGD(model.parameters(), weight_decay=0.01, lr=lr)

best_loss = torch.Tensor(1).cuda()
best_loss[0] = np.inf

if __name__ == "__main__":
    # train
    for epoch in range(epochs):
        print('epoch:', epoch)
        for i, sample in enumerate(train_loader):

            data, labels = sample   # Tensor type

            data =
            labels = labels.unsqueeze(1).to(device)

            pred = model(data)
            loss = criterion(pred, labels)

            print('loss:', loss.item())
            print("pred:", pred, "label:", labels)


            # for name, param in model.named_parameters():
            #     print(name, param.grad.abs().sum())


            # torch.optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.9, last_epoch=-1)

            if best_loss > loss:
                best_loss = loss
                best_model = copy.deepcopy(model.state_dict())
                ### model.load_state_dict(best_model)
                print('best loss is:', best_loss.item())
                print('saving best weights...')
      , model_dir + 'best_model.pth'), model_dir + 'last_model.pth')

        # for name, parms in model.named_parameters():
        #     print('-->name:', name, '-->grad_requirs:', parms.requires_grad, '--weight', torch.mean(,
        #           ' -->grad_value:', torch.mean(parms.grad))

    checkpoint = torch.load(model_dir + 'model.pth')


    # eval
    for i, sample in enumerate(val_loader, 0):
        data, labels = sample

        with torch.no_grad():
            pred = model(data)
            loss = criterion(pred, labels)

            print('loss:', loss)
            print('prediction:', pred, 'Err:', loss/labels)

here is may model:

import torch

class DenseNet6(torch.nn.Module):
    def __init__(self):
        super(DenseNet6, self).__init__()
        self.linear_10_10 = torch.nn.Linear(10, 10)
        self.linear_13_10 = torch.nn.Linear(13, 10)
        # self.linear_double = torch.nn.Linear(10, 20)
        self.linear_10_5 = torch.nn.Linear(10, 5)
        self.linear_5_5 = torch.nn.Linear(5, 5)
        self.linear_5_1 = torch.nn.Linear(5, 1)
        self.linear_10_1 = torch.nn.Linear(10, 1)
        self.linear_1_1 = torch.nn.Linear(1, 1)
        self.relu = torch.nn.ReLU()
        self.bn10 = torch.nn.BatchNorm1d(10)
        self.bn5 = torch.nn.BatchNorm1d(5)
        self.bn1 = torch.nn.BatchNorm1d(1)

    def forward(self, x):
        x = self.relu(self.linear_13_10(x))
        x = self.relu(self.linear_10_5(x))
        x = self.relu(self.linear_5_1(x))
        x = self.linear_1_1(x)
        return x

and the dataloader is below:

import torch
import numpy as np
from import Dataset       
from import DataLoader     
from config import cfg
import pandas as pd

class MudDataset(Dataset):
    def __init__(self, filepath):

        data = []
        with pd.ExcelFile(filepath) as sheet:
            sheet_names = sheet.sheet_names
            for i in range(len(sheet.sheet_names)):
                cur_sheet = sheet.parse(sheet_name=sheet_names[i])
                for j in range(cur_sheet.shape[0]):

        self.len = len(data)

        for i in range(self.len):
            data[i][0] = self.type_encode(data[i][0])

        type_data = np.zeros((self.len, len(data[0][0])))
        prop_data = np.zeros((self.len, len(data[0]) - 1))

        for i in range(self.len):
            type_data[i] = data[i][0]
            prop_data[i] = data[i][1:]

        np_data = np.concatenate((type_data, prop_data), axis = 1) = torch.from_numpy(np.float32(np_data[:,:-1]))
        self.label = torch.from_numpy(np.float32(np_data[:,-1]))

        # print("Data prepared ready")

    def __getitem__(self, index):  
        return[index], self.label[index]

    def __len__(self):  
        return self.len

    def type_encode(self, type:str):
        mapping = {'a':0,
        self.map_len = len(mapping)
        encoding = np.zeros(self.map_len)
        encoding[mapping[type]] = 1

        return encoding


Your model works as expected and all used parameters will get a valid gradient:

model = DenseNet6()
x = torch.randn(1, 13)
out = model(x)

for name, param in model.named_parameters():
    print(name, param.grad)

All parameters of layers which were not used in the forward method will have the expected None gradient.

In your code snippet you are using model = MyModel() so I’m unsure if this is a copy/paste issue or if you are using another model.