I’m trying to use unfold function to split the input sequence, and then input the splitted sequence into a small LSTM, the output will be a compressed sequence of the original sequence.
For example, I have a sequence (100, 60), 100 time steps, each frame has 60 dimensions.
I use unfold(0, 10, 10) to split the sequence to 10 small sequence, then input these 10 small sequences into a lstm separately, each sequnce will have their 10 outputs, I use “torch.mean()” function to merge all the outputs of each small sequence. Lastly, I will get a sequence with 10 time steps, then input this sequence into another LSTM layer to do classification.
With this operation, I want to compress the sequence length. I implemented this, but the loss does not change? any help will be appreciated.
the data and code is here:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
import time
from torch.optim.lr_scheduler import StepLR
import argparse
import os
import numpy as np
import scipy.io as scio
from pathlib import Path
num_total = 861
num_train = 431
num_test = 430
slen =100
class LSTMTCN(nn.Module):
# SKBTCN(32, 1, 5, 2, 7, self.batch_size, 2)
# self.lstm_skb = SKBTCN(100, 1, 2, 1, 100, 32, 100)
def __init__(self, input_dim, dim, window_size, step_size, hidden_dim, batch_size, output_dim=1, num_layers=2):
super(LSTMTCN, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.window_size = window_size
self.step_size = step_size
self.lstm = nn.LSTM(60, self.hidden_dim, self.num_layers,batch_first=True)
def init_hidden(self):
# This is what we'll initialise our hidden state as
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def forward(self, input):
# Forward pass through LSTM layer
# shape of lstm_out: [input_size, batch_size, hidden_dim]
# shape of self.hidden: (a, b), where a and b both
# have shape (num_layers, batch_size, hidden_dim).
# unfold(1, 2, 1), 2 is the window_size, 1 is the stride
# reshape(-1, 2, 7), 2 is the window_size, is the sequence length of our small window
#
self.tensor_unfolded = input.transpose(0, 1).unfold(0, 10, 10).transpose(3, 2).reshape(-1, 10, input.shape[-1]).transpose(0, 1) # 2, 320, 7, 2 is the sequence length, 320 is the batch size, 7 is the feature dimension
self.batch_size_after = self.tensor_unfolded.shape[1]
# lstm_out, self.hidden = self.lstm(self.tensor_unfolded.view(len(self.tensor_unfolded), self.batch_size_after, -1))
lstm_out, self.hidden = self.lstm(self.tensor_unfolded)
# self.step_after_stride = 4
# self.step_after_stride = input.transpose(0, 1).unfold(1, 2, 1).size()[1]
self.step_after_stride = int((input.size()[1]-self.window_size)/self.step_size) +1
lstm_out = torch.mean(lstm_out.reshape(self.window_size, input.shape[0], self.step_after_stride, self.hidden_dim).permute(1, 2, 0, 3), 2) # out put shape (4, 80, 7)
# Only take the output from the final timetep
# Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
# y_pred = self.linear(lstm_out[-1].view(self.batch_size, -1))
return lstm_out
class RecogNet(nn.Module):
def __init__(self):
super(RecogNet, self).__init__()
self.lstm = nn.LSTM(100, 100, 3, batch_first=True, dropout=0.5)
self.lstm_skb = LSTMTCN(100, 1, 10, 10, 100, 3, 100) # batch_size is 3
self.linear = nn.Sequential(nn.Linear(100, 27), nn.ELU())
def forward(self, inputs):
# This is working very well
# LSTM
# features, _ = self.lstm(inputs)
# out = self.linear(features[:, -1, :])
# embed a small lstm into the framework
# SKB_TCN
features_skb = self.lstm_skb(inputs)
features, _ = self.lstm(features_skb)
out = self.linear(features[:, -1, :])
return out
def train_model(model_ft, criterion, MSEdis, optimizer, scheduler, num_epochs=60):
since = time.time()
model_ft.train(True)
dset_sizes = len(dsets)
for epoch in range(num_epochs):
print('Data Size', dset_sizes)
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
dset_loaders = DataLoader(dataset=dsets, num_workers=4, batch_size=batch_size, shuffle=True)
# Each epoch has a training and validation phase
# Set model to training mode
running_loss = 0.0
running_corrects = 0
count = 0
scheduler.step()
# Iterate over data.
for data in dset_loaders:
# get the inputs
inputs, labels = data
labels, inputs = Variable(labels.cuda()), Variable(inputs.cuda())
# zero the parameter gradients
optimizer.zero_grad()
# train
out_class = model_ft(inputs) # shape of inputs (100, batch_size, 60)
loss = criterion(out_class, labels)
loss.backward()
optimizer.step()
# backward + optimize only if in training phase
count += 1
if count % 10 == 0 or inputs.size()[0] < batch_size:
print('Epoch:{}: loss_out_class:{:.3f}'.format(epoch, loss.item()))
allloss.append(loss.item())
# statistics
running_loss += loss.item()
running_corrects += torch.sum(torch.argmax(out_class, 1) == labels.data)
epoch_loss = running_loss / dset_sizes
epoch_acc = running_corrects.to(dtype=torch.float) / (dset_sizes * 1.0)
print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
if (epoch + 1) % 50 == 0:
model_out_path = dir_file + "/ClassLSTM_epoch_{}.pth".format(epoch)
torch.save(model_ft, model_out_path)
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
return allloss
def loading_crosssubj_data():
train_data = np.zeros((num_train, slen, 60))
test_data = np.zeros((num_test, slen, 60))
train_label = np.zeros((num_train,), dtype='uint8')
test_label = np.zeros((num_test,), dtype='uint8')
label = np.zeros((num_total,), dtype='uint8')
files = os.listdir('data/')
num = len(files)
training_num = 0
testing_num = 0
xx = 1
for i in range(num):
skelet = scio.loadmat(Path('data/', files[i]))
skelet = skelet['skeleton']
if len(skelet) == 0:
continue
print(files[i])
else:
xx = xx + 1
temp = files[i].split('_')[0].split('a')[1]
label[i] = int(temp) - 1
skelet = skelet.transpose(2, 0, 1).reshape(100, 60)
actor_id = int(files[i].split('_')[1].split('s')[1])
training_sub_list = [1, 3, 5, 7, 9]
if actor_id in training_sub_list:
train_data[training_num, :, :] = skelet
train_label[training_num] = label[i]
if training_num < 500:
training_num = training_num + 1
if training_num % 100 == 0:
print('Loading training samples = ', training_num)
else:
test_data[testing_num, :, :] = skelet
test_label[testing_num] = label[i]
if testing_num < 500:
testing_num = testing_num + 1
if testing_num % 100 == 0:
print('Loading testing samples = ', testing_num)
return train_data, train_label, test_data, test_label
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="hidden Num")
parser.add_argument('--hidden_Num', type=int, default=75)
args = parser.parse_args()
Num = args.hidden_Num
dir_file = 'model/models_{}'.format(Num)
if not (os.path.exists(dir_file)):
os.makedirs(dir_file)
batch_size = 1
train_data, train_label, test_data, test_label = loading_crosssubj_data()
dsets = TensorDataset(torch.from_numpy(train_data).float(), torch.from_numpy(train_label).long())
allloss = []
model_fitting = RecogNet().cuda()
criterion_fun = nn.CrossEntropyLoss().cuda()
MSEdis = nn.MSELoss().cuda()
optimizer = optim.Adam(list(model_fitting.parameters()), lr=0.001)
scheduler = StepLR(optimizer, step_size=400, gamma=0.1)
# Train
allloss = train_model(model_fitting, criterion_fun, MSEdis, optimizer, scheduler, num_epochs=500)