I migrated a script from keras to pytorch . but i found pytorch one can’t converge as the original one. pytorch script got the best mse = 0.1250, but the keras script can get the best mse = 0.1066
btw, i remove batchnormal from both script( with_bn = True), then they got almost the same best mse score 0.14xx. So I guess there are something wrong with BatchNorm1d module in pytorch.
Can you help me?
Pytorch script
import numpy as np
import gc
import pandas as pd
from pandas import Series,DataFrame
import os,glob
import torch
from torch.utils.data.dataset import TensorDataset
from torch import optim,nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
import functools
device = 'cuda' if torch.cuda.is_available() else 'cpu'
with_bn = True
# hyper parameter
lr = 1.e-3
n_epochs = 500
bs = 32
valid_pct = 0.15
lam = 2.e-3
def load_data_t(filename:str, x_cols:list, y_col:str = None,shuffle=True):
d = pd.read_csv(filename,sep='\t')
x = get_data_t(d, x_cols,shuffle)
if y_col is not None:
return x, get_data_t(d,y_col)
else :
return x
def get_data_t(df,cols,shuffle=True):
x = df[cols]
x = np.array(x.values)
if shuffle :
np.random.seed(202)
np.random.shuffle(x)
x = torch.from_numpy(x)
return x.float()
def l1_penalty(model, l1_lambda=lam) -> torch.Tensor:
"""Returns the L1 penalty of the params."""
ss = [ p.abs().sum() for n, p in model.named_parameters() ]
l1_norm = torch.stack(ss).sum()
return l1_lambda*l1_norm
def l2_penalty(model, l2_lambda=1.5e-2) -> torch.Tensor:
ss = [p.pow(2.0).sum() for p in model.parameters()]
l2_norm = torch.stack(ss).sum()
return l2_lambda*l2_norm
def make_train_step(model, optimizer,loss_fn, penalty_fn = None):
# Builds function that performs a step in the train loop
def train_step(x, y):
optimizer.zero_grad()
# Makes predictions
yhat = model(x)
# Computes loss
loss = loss_fn(y, yhat)
penalty = penalty_fn(model) if penalty_fn else 0
# Computes gradients
total_loss = loss+penalty
total_loss.backward()
# Updates parameters and zeroes gradients
optimizer.step()
# Returns the loss
return loss.item(),penalty
# Returns the function that will be called inside the train loop
return train_step
def fit(dl:DataLoader,model, loss_fn, penalty_fn= None, optimizer=None):
losses = []
# Sets model to TRAIN mode
model.train()
train_step = make_train_step(model, optimizer, loss_fn, penalty_fn)
optimizer.zero_grad()
for x_batch, y_batch in dl:
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
loss,penalty = train_step(x_batch, y_batch)
losses.append(loss)
if len(losses) :
return sum(losses)/len(losses),penalty
else:
return 0, penalty
def validate(val_loader,model,loss_fn):
val_losses= []
with torch.no_grad():
for x_val, y_val in val_loader:
x_val = x_val.to(device)
y_val = y_val.to(device)
model.eval()
yhat = model(x_val)
val_loss = loss_fn(y_val, yhat)
val_losses.append(val_loss.item())
if len(val_losses) :
val_loss = sum(val_losses)/len(val_losses),penalty
else:
val_loss = 0
return val_loss[0]
def predict(val_loader,model):
ret = None
with torch.no_grad():
for [x_val] in val_loader:
x_val = x_val.to(device)
model.eval()
yhat = model(x_val)
ret = torch.cat((ret,yhat),0) if ret is not None else yhat
return ret
class StreamRegression(nn.Module):
def __init__(self,n_feature,n_hidden1,n_hidden2,n_hidden3):
super(StreamRegression, self).__init__()
self.layer = nn.Sequential()
self.layer.add_module('l1', nn.Linear(n_feature, n_hidden1) )
self.layer.add_module('relue 1', nn.ReLU() )
if with_bn:
self.layer.add_module('bn 1', nn.BatchNorm1d(n_hidden1) )
self.layer.add_module( 'd1', nn.Dropout() )
self.layer.add_module( 'l2', nn.Linear(n_hidden1, n_hidden2) )
self.layer.add_module( 'relu 2', nn.ReLU() )
if with_bn:
self.layer.add_module( 'bn 2', nn.BatchNorm1d(n_hidden2) )
self.layer.add_module( 'd2', nn.Dropout() )
self.layer.add_module( 'l3', nn.Linear(n_hidden2, n_hidden3) )
self.layer.add_module( 'relu 3', nn.ReLU() )
if with_bn:
self.layer.add_module( 'bn 3', nn.BatchNorm1d(n_hidden3) )
self.layer.add_module( 'd3', nn.Dropout() )
self.layer.add_module( 'l4', nn.Linear(n_hidden3, 1) )
if with_bn:
s = 6 # magic number
np.random.seed(s)
torch.manual_seed(s)
for name, param in self.layer.named_parameters():
if "weight" in name: # weight matrix
nn.init.normal_(param, std=0.05)
else: # bias
nn.init.zeros_(param)
def forward(self,x):
x = self.layer(x)
return x
x_cols = ['V0', 'V1', 'V2', 'V3', 'V4', 'V6', 'V7', 'V8', 'V10', 'V12', 'V13',
'V15', 'V16', 'V18', 'V19', 'V20', 'V21', 'V23', 'V24', 'V25',
'V26', 'V29', 'V30', 'V31', 'V32', 'V33', 'V35', 'V36', 'V37']
X_train, Y_train = load_data_t("steamprediction/dataset/zhengqi_train.txt",x_cols,"target")
train_ds = TensorDataset(X_train,Y_train.reshape(-1,1))
train_l = len(train_ds)
fit_l = round(train_l * valid_pct) # - round(train_l * valid_pct) % bs
valid_l = train_l - fit_l
# fit_ds, val_ds = random_split(train_ds, [fit_l, valid_l])
fit_ds = torch.utils.data.Subset(train_ds,range(0,fit_l))
val_ds = torch.utils.data.Subset(train_ds,range(fit_l,train_l))
fit_loader = DataLoader(dataset=fit_ds, batch_size=bs, shuffle=False)
val_loader = DataLoader(dataset=val_ds, batch_size=bs, shuffle=False)
model = StreamRegression(29,128,64,16)
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
# Defines a MSE loss function
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
print(var_name, "\t", optimizer.state_dict()[var_name])
# For each epoch...
print('{:<7}{:>10}{:>10}{:>10}'.format('No','train_loss','penalty','val_loss'))
best_loss = float('INF')
for epoch in range(n_epochs):
# Performs one train step and returns the corresponding loss
train_loss, penalty = fit(fit_loader,model, loss_fn, l1_penalty,optimizer)
val_loss = validate(val_loader,model,loss_fn)
print(f'{epoch:<7}{train_loss:>10.3f}{penalty:>10.3f}{val_loss:>10.3f}')
if best_loss > val_loss :
best_loss = val_loss
torch.save(model,'torch_model.pkl')
print(f"validate-loss declined to {best_loss}, save model!")
print(f"The final validate-loss is {best_loss}")
X_test = load_data_t("steamprediction/dataset/zhengqi_test.txt",x_cols,shuffle=False)
test_ds = TensorDataset(X_test)
test_loader = DataLoader(dataset=test_ds, batch_size= bs, shuffle=False)
model = torch.load('torch_model.pkl')
result = predict(test_loader,model)
result_df = pd.DataFrame(result.numpy())
result_df.to_csv("pytorch_result.txt", sep='\t',index=False, header=None)
Keras script
import numpy as np
import gc
import pandas as pd
from pandas import Series,DataFrame
import os,glob
from keras import optimizers
import keras
from keras.models import Sequential
from keras.layers import Dense,BatchNormalization,Dropout
from keras.optimizers import SGD,Adam
from keras import regularizers
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import ModelCheckpoint,Callback
with_bn = True
def to_array(data):
x = [np.array(data.loc[i]) for i in range (len(data))]
return np.array(x)
# gamm = 3.e-2
# reg = regularizers.l2(gamm)
lam = 2.e-3
reg = regularizers.l1(lam)
def get_lr_metric(optimizer):
def lr(y_true, y_pred):
return optimizer.lr
return lr
optimizer = Adam() # 实际上的 lr = 0.001
lr_metric = get_lr_metric(optimizer)
model = Sequential()
model.add(Dense(input_shape=(29,),units=128, activation='relu', kernel_regularizer=reg, name = 'Dense1'))
if with_bn:
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(units=64, activation='relu', kernel_regularizer=reg, name = 'Dense4'))
if with_bn:
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(units=16, activation='relu', kernel_regularizer=reg, name = 'Dense5'))
if with_bn:
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(units=1, activation='linear', kernel_regularizer=reg, name = 'Dense6'))
# sgd = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-06)
model.compile(loss='MSE', optimizer=optimizer, metrics=['MSE',lr_metric])
model.summary()
pd_data = pd.read_csv("steamprediction/dataset/zhengqi_train.txt", sep='\t')
Y_train_set = pd_data['target']
# del pd_data['target']
pd_data = pd_data[['V0', 'V1', 'V2', 'V3', 'V4', 'V6', 'V7', 'V8', 'V10', 'V12', 'V13',
'V15', 'V16', 'V18', 'V19', 'V20', 'V21', 'V23', 'V24', 'V25',
'V26', 'V29', 'V30', 'V31', 'V32', 'V33', 'V35', 'V36', 'V37']]
X_train = to_array(pd_data)
Y_train = to_array(Y_train_set)
np.random.seed(202)
np.random.shuffle(X_train)
np.random.seed(202)
np.random.shuffle(Y_train)
callbacks_list = [
ModelCheckpoint(
filepath='my_model.h5',
monitor="val_loss",
save_best_only=True,
verbose=1,
)
]
history = model.fit(X_train, Y_train, epochs=500, batch_size=32, verbose=2, validation_split=0.15, shuffle=False, callbacks=callbacks_list)
import pandas as pd
import keras
from keras.models import Sequential,Model
from keras.models import load_model
pd_test = pd.read_csv("steamprediction/dataset/zhengqi_test.txt", sep='\t')
pd_test = pd_test[['V0', 'V1', 'V2', 'V3', 'V4', 'V6', 'V7', 'V8', 'V10', 'V12', 'V13',
'V15', 'V16', 'V18', 'V19', 'V20', 'V21', 'V23', 'V24', 'V25',
'V26', 'V29', 'V30', 'V31', 'V32', 'V33', 'V35', 'V36', 'V37']]
print(pd_test)
# stock_model = load_model("my_model.h5",custom_objects=[lr_metric])
stock_model = load_model("my_model.h5",custom_objects={lr_metric.__name__:lr_metric})
stock = stock_model.predict_on_batch(pd_test)
stock_re = DataFrame(stock)
stock_re.to_csv("keras_result.txt", sep='\t',index=False, header=None)
print(stock)
You can get the training data and test data from https://drive.google.com/drive/folders/1O88OKsVIqYMAuDgePnIYBXf0r2TF3MTh?usp=sharing