One weird point is, even there are RNN families in decoder of Tacotron2, this nan error from LSTM does not occur. But only on the output of LSTM in SimpeSERClassifier shows NaN value.
import torch
from torch import nn
from torch.distributions import normal
import argparse
class Loss(nn.Module):
def init(self):
super(Loss, self).init()
def forward(self,
SER_est_label, SERlabel):
SER_loss = nn.CrossEntropyLoss()(SER_est_label, SERlabel)
return SER_loss
class SimpleSERClassifier(nn.Module):
def init(self, hparams):
super(SimpleSERClassifier, self).init()
self.inputDim = hparams.SERinputDim
self.outputDim = hparams.SERoutputDim
self.numNode = hparams.SERnumNode
self.numLayer = hparams.SERnumLayer
self.bidirectional = True
self.numDirection = 2 if self.bidirectional else 1
self.numLinear = self.numDirection * int(self.numNode/2)
self.numLSTMLayer = 1
self.batch_size = hparams.batch_size
self.fc = nn.Sequential(
nn.Linear(self.inputDim, self.numNode), nn.ReLU(True), nn.Dropout(0.5),
nn.Linear(self.numNode, self.numNode), nn.ReLU(True), nn.Dropout(0.5)
)
self.LSTM = nn.LSTM(input_size=self.numNode,
hidden_size=int(self.numNode/2),
num_layers=1,
bidirectional=self.bidirectional)
self.outfc = nn.Sequential(
nn.Linear(self.numLinear, self.outputDim))
def init_hidden(self):
n = normal.Normal(0.0, 1)
h = n.sample([self.numDirection, self.batch_size, int(self.numNode/2)])
c = n.sample([self.numDirection, self.batch_size, int(self.numNode/2)])
return (h, c)
def forward(self, inSequence, h, c):
fcout = self.fc(inSequence)
if torch.cuda.is_available():
h, c = h.cuda(), c.cuda()
fcout = fcout.permute(1, 0, 2)
self.LSTM.flatten_parameters()
lstmout, (h, c) = self.LSTM(fcout, (h, c))
output = lstmout.permute(1, 0, 2)
output = self.outfc(output)
return output
def train(hparams):
torch.manual_seed(hparams.seed)
torch.cuda.manual_seed(hparams.seed)
model = SimpleSERClassifier(hparams).cuda()
learning_rate = hparams.learning_rate
optimizer = torch.optim.Adam(lr=learning_rate)
if hparams.fp16_run:
from apex import amp
model, optimizer = amp.initialize(
model, optimizer, opt_level='O2')
criterion = Loss()
train_loader, valset, collate_fn = prepare_dataloaders(hparams)
# Load checkpoint if one exists
iteration = 0
epoch_offset = 0
model.train()
# ================ MAIN TRAINNIG LOOP! ===================
for epoch in range(epoch_offset, hparams.epochs):
for i, batch in enumerate(train_loader):
for param_group in optimizer.param_groups:
param_group['lr'] = learning_rate
model.zero_grad()
y_pred = model(x)
loss = criterion(y, y_pred)
if hparams.fp16_run:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
optimizer.step()
iteration += 1
if name == ‘main’:
parser = argparse.ArgumentParser()
# --------------------------------------------------
# Classifier param
# ---------------------------------------------------
parser.add_argument(‘–SERinputDim’, type=int, default=32)
parser.add_argument(‘–SERoutputDim’, type=int, default=4)
parser.add_argument('--SERnumNode', type=int, default=512)
parser.add_argument('--SERnumLayer', type=int, default=3)
parser.add_argument('--SERbidirectional', type=bool, default=True)
# --------------------------------------------------
parser.add_argument('--epochs', type=int, default=100, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--cudnn_enabled', type=bool, default=True)
parser.add_argument('--cudnn_benchmark', type=bool, default=False)
parser.add_argument('--seed', type=int, default=1234)
parser.add_argument('--learning_rate', type=float, default=1e-3)
args = parser.parse_args()
torch.backends.cudnn.enabled = args.cudnn_enabled
torch.backends.cudnn.benchmark = args.cudnn_benchmark
train(args)