Keras vs. PyTorch LSTM Implementation different results

alekos2 · December 5, 2022, 5:20pm

The data is taken from: MasterCard Stock Data - Latest and Updated | Kaggle

I am trying to make a PyTorch implementation of a keras model. However, keras model scores 6.70 RMSE while PT model scores 7.60.

Seeds are set the same for everything:

def set_global_seed(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)

I’ve checked and all the data the two models receive are the same. All the hyperparameters are hardcoded as specified by keras default values. The optimization of the network should also be the same. I don’t know what I’m missing.

The code is attached here but can also be found here: https://drive.google.com/file/d/1mWL40Qdak3EUFUIBdeBTp-bE3wLH3EiU/view?usp=sharing

Keras:

tf_model = Sequential()
tf_model.add(LSTM(units=HIDDEN_UNITS, activation="tanh", input_shape=(n_steps, features)))
tf_model.add(Dense(units=1))
tf_model.compile(optimizer="RMSprop", loss="mse",)
tf_model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE,verbose=False)

PT:

def pt_train(dltrn,dltst,model,criterion,optimizer,):
    model.train()
    for _ in range(EPOCHS):
        optimizer.zero_grad()
        for x,y in dltrn:
            out = model(x.float())
            loss = criterion(y.float(),out)
            loss.backward()
            optimizer.step()
            
    model.eval()
    yh = []
    for xx,yy in dltst:
        with torch.no_grad():
            ypred = model(xx.float())
        yh.append(ypred.detach().numpy())
    yh = np.concatenate(yh)
        
    return yh

class Custom(nn.Module):
    
    def __init__(self,i=1,h=HIDDEN_UNITS,o=1):
        super(Custom,self).__init__()
        self.lstm = nn.LSTM(i,h,bidirectional=False,batch_first=True,dropout=0.0,bias=True,)
        self.fc = nn.Linear(h,o)
        self._init(self.fc)
        self._init(self.lstm)

    def _init(self, module): # keras default initializations
        if isinstance(module,nn.LSTM):
            nn.init.xavier_uniform_(module.weight_ih_l0.data)
            nn.init.orthogonal_(module.weight_hh_l0.data)
            if module.bias is not None:
                module.bias_ih_l0.data.zero_()
                module.bias_hh_l0.data.zero_()
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight.data)
            if module.bias is not None:
                module.bias.data.zero_()
                
    def forward(self,x):
        hs,_ = self.lstm(x,)
        lhs = hs[:,-1,:]
        out = self.fc(lhs)
        return out.reshape(-1,1,1)

pt_model = Custom()
criterion = nn.MSELoss()
optimizer = torch.optim.RMSprop(pt_model.parameters(),lr=0.001,eps=1e-7,momentum=0.0,alpha=0.9) # keras default values
pt_preds = pt_train(dltrn,dltst,pt_model,criterion,optimizer)