I start a new project by my own, from dataloader to trainer, however, when I start to run, the results are wired. After debugging, I found the grad of all data and params is None, although some of them have “requires_grad=True”. I have no idea how to fix even how to search or ask for help. Can anyone gives me a tip? I’d appriciate that much.
this is the train part:
import torch
import numpy as np
import copy
import sys
import warnings
warnings.filterwarnings('ignore')
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from Dataloader.Dataloader import MudDataset
from Model.DenseNet6 import DenseNet6 as MyModel
#### fixxing ####
# from config import cfg
"""
1. Prepare dataset
tools: Dataset and DataLoader
2. Design model using Class
inherit from nn.Module
3. Construct loss and optimizer
using PyTorch API
4. Training cycle
forward, backward, update
"""
dir = 'D:/project/ML4mud/new.xlsx'
model_dir = 'D:/project/ML4mud/run'
device = 'cuda'
val_rate = 0.1
batch_size = 4
num_workers = 2
# lr = 1e-4
lr = 0.2
epochs = 10000
muddataset = MudDataset(dir)
train_dataset, val_dataset = random_split(muddataset,
[round((1 - val_rate)*len(muddataset)),
round(val_rate*len(muddataset))])
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers)
val_loader = DataLoader(dataset=val_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers)
model = MyModel()
model = torch.nn.DataParallel(model)
model.to(device)
criterion = torch.nn.MSELoss()
# criterion = torch.nn.BCELoss(size_average=True)
# criterion = torch.nn.CrossEntropyLossLoss(size_average=True)
optimizer = torch.optim.SGD(model.parameters(), weight_decay=0.01, lr=lr)
best_loss = torch.Tensor(1).cuda()
best_loss[0] = np.inf
if __name__ == "__main__":
# train
model.train()
for epoch in range(epochs):
print('epoch:', epoch)
for i, sample in enumerate(train_loader):
data, labels = sample # Tensor type
data = data.to(device)
labels = labels.unsqueeze(1).to(device)
pred = model(data)
loss = criterion(pred, labels)
print('loss:', loss.item())
print("pred:", pred, "label:", labels)
optimizer.zero_grad()
loss.backward()
# for name, param in model.named_parameters():
# print(name, param.grad.abs().sum())
optimizer.step()
# torch.optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.9, last_epoch=-1)
if best_loss > loss:
best_loss = loss
best_model = copy.deepcopy(model.state_dict())
### model.load_state_dict(best_model)
print('best loss is:', best_loss.item())
print('saving best weights...')
torch.save(best_model, model_dir + 'best_model.pth')
torch.save(model.state_dict(), model_dir + 'last_model.pth')
# for name, parms in model.named_parameters():
# print('-->name:', name, '-->grad_requirs:', parms.requires_grad, '--weight', torch.mean(parms.data),
# ' -->grad_value:', torch.mean(parms.grad))
checkpoint = torch.load(model_dir + 'model.pth')
model.load_state_dict(checkpoint)
# eval
for i, sample in enumerate(val_loader, 0):
data, labels = sample
with torch.no_grad():
pred = model(data)
loss = criterion(pred, labels)
print('loss:', loss)
print('prediction:', pred, 'Err:', loss/labels)
here is may model:
import torch
class DenseNet6(torch.nn.Module):
def __init__(self):
super(DenseNet6, self).__init__()
self.linear_10_10 = torch.nn.Linear(10, 10)
self.linear_13_10 = torch.nn.Linear(13, 10)
# self.linear_double = torch.nn.Linear(10, 20)
self.linear_10_5 = torch.nn.Linear(10, 5)
self.linear_5_5 = torch.nn.Linear(5, 5)
self.linear_5_1 = torch.nn.Linear(5, 1)
self.linear_10_1 = torch.nn.Linear(10, 1)
self.linear_1_1 = torch.nn.Linear(1, 1)
self.relu = torch.nn.ReLU()
self.bn10 = torch.nn.BatchNorm1d(10)
self.bn5 = torch.nn.BatchNorm1d(5)
self.bn1 = torch.nn.BatchNorm1d(1)
def forward(self, x):
x = self.relu(self.linear_13_10(x))
x = self.relu(self.linear_10_5(x))
x = self.relu(self.linear_5_1(x))
x = self.linear_1_1(x)
return x
and the dataloader is below:
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from config import cfg
import pandas as pd
class MudDataset(Dataset):
def __init__(self, filepath):
data = []
with pd.ExcelFile(filepath) as sheet:
sheet_names = sheet.sheet_names
for i in range(len(sheet.sheet_names)):
cur_sheet = sheet.parse(sheet_name=sheet_names[i])
for j in range(cur_sheet.shape[0]):
data.append(cur_sheet.values[j])
self.len = len(data)
for i in range(self.len):
data[i][0] = self.type_encode(data[i][0])
type_data = np.zeros((self.len, len(data[0][0])))
prop_data = np.zeros((self.len, len(data[0]) - 1))
for i in range(self.len):
type_data[i] = data[i][0]
prop_data[i] = data[i][1:]
np_data = np.concatenate((type_data, prop_data), axis = 1)
self.data = torch.from_numpy(np.float32(np_data[:,:-1]))
self.label = torch.from_numpy(np.float32(np_data[:,-1]))
# print("Data prepared ready")
def __getitem__(self, index):
return self.data[index], self.label[index]
def __len__(self):
return self.len
def type_encode(self, type:str):
mapping = {'a':0,
'b':1,
'c':2,
'd':3,
'e':4,
'f':5,
'g':6}
self.map_len = len(mapping)
encoding = np.zeros(self.map_len)
encoding[mapping[type]] = 1
return encoding
Thanks!