i am training RNN-T model put after training it only predict Blank
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
class Encoder(pl.LightningModule):
def __init__(self, inputs, hidden_num, layers):
super(Encoder, self).__init__()
self.lstm = nn.LSTM(inputs, hidden_num, layers, bidirectional=True, batch_first=True)
self.linear = nn.Linear(hidden_num * 2, hidden_num)
self.batch_norm = nn.BatchNorm2d(1)
def forward(self, x):
x = self.batch_norm(x.unsqueeze(1))
x = x.squeeze(1)
x, _ = self.lstm(x)
x = self.linear(x)
return x
class Decoder(pl.LightningModule):
def __init__(self, hidden_num):
super(Decoder, self).__init__()
self.layer_norm = nn.LayerNorm(80)
self.embedding = nn.Embedding(80, 80)
self.lstm = nn.LSTM(80, hidden_num, 8, dropout=.1)
self.linear = nn.Linear(hidden_num, hidden_num)
self.embed = nn.Embedding(4048, 80)
def forward(self, x, h=None):
embedded = self.embedding(x) # embedded shape: [1, batch size, emb dim]
embedded = self.layer_norm(embedded)
embedded = self.dropout(embedded)
if h is None:
output, h = self.lstm(embedded)
else:
output, h = self.lstm(embedded, h)
return output, h
class Joint(pl.LightningModule):
def __init__(self, hidden_num):
super(Joint, self).__init__()
self.dec = nn.Linear(hidden_num, hidden_num)
self.enc = nn.Linear(hidden_num, hidden_num)
self.joint = nn.Linear(hidden_num, 52)
def forward(self, x1, x2):
input_length = x1.size(1)
target_length = x2.size(1)
encoder_outputs = x1.unsqueeze(2)
decoder_outputs = x2.unsqueeze(1)
encoder_outputs = encoder_outputs.repeat([1, 1, target_length, 1])
decoder_outputs = decoder_outputs.repeat([1, input_length, 1, 1])
x = F.tanh(self.enc(encoder_outputs) + self.dec(decoder_outputs))
return self.joint(x)
class RNNT(pl.LightningModule):
def __init__(self, inputs, hidden_num, layers, learning_rate=1e-3):
super(RNNT, self).__init__()
self.encoder = Encoder(inputs, hidden_num, layers)
self.decoder = Decoder(hidden_num)
self.joint = Joint(hidden_num)
self.learning_rate = learning_rate
self.loss = asr.losses.rnnt_pytorch.RNNTLossPytorch(51, "mean")
def forward(self, x1, x2, h=None):
x1 = self.encoder(x1)
x2, h = self.decoder(x2, h)
return torch.softmax(self.joint(x1, x2), dim=-1), h
def training_step(self, batch, batch_idx):
x1, y, x2, _ = batch.values()
y_hat, h = self(x1, x2)
b, s, t, d = y_hat.shape
loss = self.loss(y_hat, y[:, :-1].int().cuda(), (torch.ones(b) * s).int().cuda(), (torch.ones(b) * (t - 1)).int().cuda())
self.log("train_loss", loss, prog_bar=True, on_step=True)
return {"loss": loss}
def validation_step(self, batch, batch_idx):
x1, y, x2, _ = batch.values()
y_hat, h = self(x1, x2)
b, s, t, d = y_hat.shape
loss = self.loss(y_hat, y[:, :-1].int().cuda(), (torch.ones(b) * s).int().cuda(), (torch.ones(b) * (t - 1)).int().cuda())
self.log("val_loss", loss, prog_bar=True, on_step=True)
return {"loss": loss}
def on_train_epoch_end(self):
torch.save(self.state_dict(), "model.pth")
torch.save(self, "model.pt")
def configure_optimizers(self):
self.optim = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
scheduler = {
'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(self.optim, mode='min', factor=0.1, patience=5, min_lr=1e-6),
'monitor': 'train_loss', # This is the metric to monitor
'interval': 'epoch', # How often to check the metric
'frequency': 1 # How often to apply the scheduler
}
return [self.optim], [scheduler]
i am using 80 Filter bank size which give me vectors on every sample audio (s,80) but after few steps of training model make probability of blank and space very high i tried to make penalty but not work because model make always make one charterer probability very high on the other i mean if i take argmax(-1) for all samples model predict all in all samples same character
(upload://n46IaSoh7UnaWjarC95azRfXi7e.png)