Hi all. I just moved from tensorflow to pytorch and I am really new to pytorch. I just wrote my just model but it doesn’t learn anything. Can someone help me out with that? Any information is appreciated. Here is my code. I omitted the data_loader file and run file. But basically in my run file, I run model = RETAIN(config) and model.run()
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import pdb
from ops import *
class RETAIN(nn.Module):
def init(self, config):
super(RETAIN, self).init()
for name in config.dict:
setattr(self,name,getattr(config,name))
self.W_emb = nn.Parameter(torch.randn(self.num_features,self.embed_size).type(torch.FloatTensor), requires_grad=True)
self.alpha_rnn = nn.LSTM(self.embed_size,self.hidden_units,self.num_layers,batch_first=True)
self.alpha_weight = nn.Parameter(torch.randn(self.hidden_units,1).type(torch.FloatTensor), requires_grad=True)
self.alpha_bias = nn.Parameter(torch.randn(1).type(torch.FloatTensor), requires_grad=True)
self.beta_weight = nn.Parameter(torch.randn(self.hidden_units,self.hidden_units).type(torch.FloatTensor), requires_grad=True)
self.beta_bias = nn.Parameter(torch.randn(self.hidden_units).type(torch.FloatTensor), requires_grad=True)
self.beta_rnn = nn.LSTM(self.embed_size,self.hidden_units,self.num_layers,batch_first=True)
self.out_weight = nn.Parameter(torch.randn(self.hidden_units,1).type(torch.FloatTensor), requires_grad=True)
self.out_bias = nn.Parameter(torch.randn(1).type(torch.FloatTensor), requires_grad=True)
self.softmax = nn.Softmax()
self.tanh = nn.Tanh()
self.sigmoid = nn.Sigmoid()
# l = nn.ModuleList([self.alpha_rnn, self.beta_rnn])
self.sigmoid_cross_entropy_with_logits = lambda logits,labels: nn.BCELoss()(self.sigmoid(logits),labels)
self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
def forward(self,x,y):
inp = Variable(torch.from_numpy(x).type(torch.FloatTensor))
label = Variable(torch.from_numpy(y).type(torch.FloatTensor))
# Embed the input
embedded_x = torch.matmul(inp,self.W_emb)
# Reverse the data:
idx = Variable(torch.arange(self.steps-1,-1,-1).type(torch.LongTensor))
reversed_x = torch.index_select(embedded_x,1,idx)
h0 = Variable(torch.zeros(self.num_layers,reversed_x.size()[0],reversed_x.size()[2]))
c0 = Variable(torch.zeros(self.num_layers,reversed_x.size()[0],reversed_x.size()[2]))
# Get alpha attention
alpha_output,_ = self.alpha_rnn(reversed_x,(h0,c0))
alpha_att = torch.matmul(alpha_output,self.alpha_weight) + self.alpha_bias
alpha_att = torch.squeeze(alpha_att,2)
alpha_att = self.softmax(alpha_att)
alpha_att = torch.unsqueeze(alpha_att,2)
alpha_att = torch.index_select(alpha_att,1,idx)
# Get beta attention
beta_output,_ = self.beta_rnn(reversed_x,(h0,c0))
beta_att = torch.matmul(beta_output,self.beta_weight) + self.beta_bias
beta_att = self.tanh(beta_att)
beta_att = torch.index_select(beta_att,1,idx)
c_i = torch.sum(alpha_att * (beta_att*embedded_x),1)
logits = torch.matmul(c_i,self.out_weight) + self.out_bias
preds = self.sigmoid(logits)
preds = preds.data.numpy()
roc, auc = ROC_AUC(preds, y)
preds = preds >= 0.5
acc = accuracy(preds, y)
loss = torch.sum(self.sigmoid_cross_entropy_with_logits(logits,label))
return loss, loss.data.numpy(), auc, acc
def get_batch(self):
index = np.random.choice(len(self.train_x),self.batch_size,replace=False)
batch_x = [self.train_x[i] for i in index]
batch_y = [self.train_y[i] for i in index]
return np.array(batch_x), np.array(batch_y)
def run(self):
eval_loss_min = float('inf')
eval_auc_min = float('inf')
eval_acc_min = float('inf')
step_min = 0
for i in range(self.total_iter):
data, label = self.get_batch()
self.optimizer.zero_grad()
loss, train_loss, train_auc, train_acc = self.forward(data,label)
loss.backward()
self.optimizer.step()
if (i+1)%self.check_iter == 0:
_, eval_loss, eval_auc, eval_acc = self.forward(self.eval_x,self.eval_y)
print("-----------------------------------------------------------------------------")
print self.task
if eval_loss < eval_loss_min:
eval_loss_min = eval_loss
eval_auc_min = eval_auc
eval_acc_min = eval_acc
step_min=i+1
print("MIN_test_loss is updated, lr: %f" %self.lr)
print("Step:%6d, Train loss: %.3f, Train AUC: %.3f, Train Accuracy: %.3f" \
% (i+1, train_loss, train_auc, train_acc))
print("Step:%6d, Eval loss: %.3f, Eval Auc: %.3f, Eval Accuracy: %.3f" \
% (i+1, eval_loss, eval_auc, eval_acc))