ptrblck, thank you for your advice!
I updated my code according your adivce. but the result is almost the same. the best mse is 0.12xx. you can try it.
and initing batchnorm weight using the nn.init.normal_ is refer from https://discuss.pytorch.org/t/keras-gives-better-performance-in-both-training-speed-and-result-generalization-than-pytorch-in-simple-mlp-help/102904 It really works.
if i didn’t set the special random seed and init the liner weight using nn.init.normal_, the best mse will be 0.13xx~0.14xx
import numpy as np
import gc
import pandas as pd
from pandas import Series,DataFrame
import os,glob
import torch
from torch.utils.data.dataset import TensorDataset
from torch import optim,nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
import functools
device = 'cuda' if torch.cuda.is_available() else 'cpu'
with_bn = True
# hyper parameter
lr = 1.e-3
n_epochs = 500
bs = 32
valid_pct = 0.15
lam = 2.e-3
def load_data_t(filename:str, x_cols:list, y_col:str = None,shuffle=True):
d = pd.read_csv(filename,sep='\t')
x = get_data_t(d, x_cols,shuffle)
if y_col is not None:
return x, get_data_t(d,y_col)
else :
return x
def get_data_t(df,cols,shuffle=True):
x = df[cols]
x = np.array(x.values)
if shuffle :
np.random.seed(202)
np.random.shuffle(x)
x = torch.from_numpy(x)
return x.float()
def l1_penalty(model, l1_lambda=lam) -> torch.Tensor:
"""Returns the L1 penalty of the params."""
ss = [ p.abs().sum() for n, p in model.named_parameters() ]
l1_norm = torch.stack(ss).sum()
return l1_lambda*l1_norm
def l2_penalty(model, l2_lambda=1.5e-2) -> torch.Tensor:
ss = [p.pow(2.0).sum() for p in model.parameters()]
l2_norm = torch.stack(ss).sum()
return l2_lambda*l2_norm
def make_train_step(model, optimizer,loss_fn, penalty_fn = None):
# Builds function that performs a step in the train loop
def train_step(x, y):
optimizer.zero_grad()
# Makes predictions
yhat = model(x)
# Computes loss
loss = loss_fn(y, yhat)
penalty = penalty_fn(model) if penalty_fn else 0
# Computes gradients
total_loss = loss+penalty
total_loss.backward()
# Updates parameters and zeroes gradients
optimizer.step()
# Returns the loss
return loss.item(),penalty
# Returns the function that will be called inside the train loop
return train_step
def fit(dl:DataLoader,model, loss_fn, penalty_fn= None, optimizer=None):
losses = []
# Sets model to TRAIN mode
model.train()
train_step = make_train_step(model, optimizer, loss_fn, penalty_fn)
optimizer.zero_grad()
for x_batch, y_batch in dl:
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
loss,penalty = train_step(x_batch, y_batch)
losses.append(loss)
if len(losses) :
return sum(losses)/len(losses),penalty
else:
return 0, penalty
def validate(val_loader,model,loss_fn):
val_losses= []
with torch.no_grad():
for x_val, y_val in val_loader:
x_val = x_val.to(device)
y_val = y_val.to(device)
model.eval()
yhat = model(x_val)
val_loss = loss_fn(y_val, yhat)
val_losses.append(val_loss.item())
if len(val_losses) :
val_loss = sum(val_losses)/len(val_losses),penalty
else:
val_loss = 0
return val_loss[0]
def predict(val_loader,model):
ret = None
with torch.no_grad():
for [x_val] in val_loader:
x_val = x_val.to(device)
model.eval()
yhat = model(x_val)
ret = torch.cat((ret,yhat),0) if ret is not None else yhat
return ret
class StreamRegression(nn.Module):
def __init__(self,n_feature,n_hidden1,n_hidden2,n_hidden3):
super(StreamRegression, self).__init__()
self.layer = nn.Sequential()
self.layer.add_module('l1', nn.Linear(n_feature, n_hidden1) )
self.layer.add_module('relue 1', nn.ReLU() )
if with_bn:
self.layer.add_module('bn 1', nn.BatchNorm1d(n_hidden1) )
self.layer.add_module( 'd1', nn.Dropout() )
self.layer.add_module( 'l2', nn.Linear(n_hidden1, n_hidden2) )
self.layer.add_module( 'relu 2', nn.ReLU() )
if with_bn:
self.layer.add_module( 'bn 2', nn.BatchNorm1d(n_hidden2) )
self.layer.add_module( 'd2', nn.Dropout() )
self.layer.add_module( 'l3', nn.Linear(n_hidden2, n_hidden3) )
self.layer.add_module( 'relu 3', nn.ReLU() )
if with_bn:
self.layer.add_module( 'bn 3', nn.BatchNorm1d(n_hidden3) )
self.layer.add_module( 'd3', nn.Dropout() )
self.layer.add_module( 'l4', nn.Linear(n_hidden3, 1) )
if with_bn:
s = 6 # magic number
torch.manual_seed(s)
for name, param in self.layer.named_parameters():
if "weight" in name: # weight matrix
nn.init.normal_(param, std=0.05)
else: # bias
nn.init.zeros_(param)
def forward(self,x):
x = self.layer(x)
return x
x_cols = ['V0', 'V1', 'V2', 'V3', 'V4', 'V6', 'V7', 'V8', 'V10', 'V12', 'V13',
'V15', 'V16', 'V18', 'V19', 'V20', 'V21', 'V23', 'V24', 'V25',
'V26', 'V29', 'V30', 'V31', 'V32', 'V33', 'V35', 'V36', 'V37']
X_train, Y_train = load_data_t("steamprediction/dataset/zhengqi_train.txt",x_cols,"target")
train_ds = TensorDataset(X_train,Y_train.reshape(-1,1))
train_l = len(train_ds)
fit_l = round(train_l * valid_pct) # - round(train_l * valid_pct) % bs
valid_l = train_l - fit_l
# fit_ds, val_ds = random_split(train_ds, [fit_l, valid_l])
fit_ds = torch.utils.data.Subset(train_ds,range(0,fit_l))
val_ds = torch.utils.data.Subset(train_ds,range(fit_l,train_l))
fit_loader = DataLoader(dataset=fit_ds, batch_size=bs, shuffle=False)
val_loader = DataLoader(dataset=val_ds, batch_size=bs, shuffle=False)
model = StreamRegression(29,128,64,16)
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
# Defines a MSE loss function
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
print(var_name, "\t", optimizer.state_dict()[var_name])
# For each epoch...
print('{:<7}{:>10}{:>10}{:>10}'.format('No','train_loss','penalty','val_loss'))
best_loss = float('INF')
for epoch in range(n_epochs):
# Performs one train step and returns the corresponding loss
train_loss, penalty = fit(fit_loader,model, loss_fn, l1_penalty,optimizer)
val_loss = validate(val_loader,model,loss_fn)
print(f'{epoch:<7}{train_loss:>10.3f}{penalty:>10.3f}{val_loss:>10.3f}')
if best_loss > val_loss :
best_loss = val_loss
torch.save(model,'torch_model.pkl')
print(f"validate-loss declined to {best_loss}, save model!")
print(f"The final validate-loss is {best_loss}")
X_test = load_data_t("steamprediction/dataset/zhengqi_test.txt",x_cols,shuffle=False)
test_ds = TensorDataset(X_test)
test_loader = DataLoader(dataset=test_ds, batch_size= bs, shuffle=False)
model = torch.load('torch_model.pkl')
result = predict(test_loader,model)
result_df = pd.DataFrame(result.numpy())
result_df.to_csv("pytorch_result.txt", sep='\t',index=False, header=None)