Hi, I am trying to train an LSTM Autoencoder and I have variable length sequences. I am feeding the sequences to the network singularly, not in batches (therefore I can’t use pack_padded_sequences).
I have manually padded the sequences with 0s up to the maximum sequence length and I am feeding the padded sequences to the LSTM layer. My question here is: does the LSTM layer recognise that each incoming sequence is padded or do I need to feed the sequence without padding example by example??
Second question is: would it speed up the learning to train in batches??? If so, how can I implement it and what batch size do you suggest?
Thank you. Here is my code (X is an array of size (n_sequences, max_seq_len, n_features) containing the already padded sequences):
X_train, X_val, seq_lengths_train, seq_lengths_val = train_test_split(
X, trials_array,
test_size=0.30,
random_state=RANDOM_SEED
)
X_val, X_test, seq_lengths_val, seq_lengths_test = train_test_split(
X_val, seq_lengths_val,
test_size=0.5,
random_state=RANDOM_SEED
)
X_train = X_train.reshape(-1,190)
X_test = X_test.reshape(-1,190)
X_val = X_val.reshape(-1,190)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_train = X_train.reshape(-1,56,190)
X_test = X_test.reshape(-1,56,190)
X_val = X_val.reshape(-1,56,190)
X_train = torch.from_numpy(X_train).double()
X_val = torch.from_numpy(X_val).double()
X_test = torch.from_numpy(X_test).double()
n_seq, max_seq_len, n_features = X_train.shape # n_seq = 7000, max_seq_len = 56, n_features = 190
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim,
hidden_size=embedding_dim,
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (hidden_n, _) = self.rnn2(x)
return hidden_n.reshape((1, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=64, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim, self.n_features = 2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, 1)
x = x.reshape((1, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x = x.reshape((self.seq_len, self.hidden_dim))
x = self.output_layer(x)
return x
class RecurrentAutoencoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(RecurrentAutoencoder, self).__init__()
self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = RecurrentAutoencoder(max_seq_len, n_features, 128)
model = model.to(device)
def train_model(model, train_dataset, val_dataset, seq_lengths_train, seq_lengths_val, n_epochs):
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.L1Loss(reduction='sum').to(device)
history = dict(train=[], val=[])
for epoch in range(1, n_epochs + 1):
model = model.double()
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
return model.eval(), history
model, history = train_model(
model,
X_train,
X_val,
seq_lengths_train,
seq_lengths_val,
n_epochs=150
)