Hello everyone,
I am trying to use TBPTT on a multivariate time series, and I am facing a problem, my loss doesn’t decrease, and I don’t know what I am doing wrong.
Inputs shape (Batch_size,1270,6)
Output shape (Batch_size,1270)
There is a particularity with the Inputs:
-
6 Features correspond to A-B A-C A-D where A is the time step,
-
Between two inputs (Inputs[0] and Inputs[1]) features don’t have the same length, I padded all the Inputs using torch.nn.utils.rnn.pad_sequence(Mise_en_donnees,padding_value=-1,batch_first=True) . I tried padding_value=0. But it doesn’t change anything)
-
All Inputs are normalized using get_mean_std
def get_mean_std(loader,ignore_idx=-1.):
channels_sum,channels_squared_sum,num_batches=0,0,0
for data in loader:
a=torch.sum((data[:,0]!=ignore_idx)).item()-1
channels_sum+=torch.mean(data[:a],dim=[0])
channels_squared_sum+=torch.mean(data[:a]**2,dim=[0])
num_batches+=1
mean=channels_sum/num_batches
std=(channels_squared_sum/num_batches -mean**2)**0.5
return mean,std
There is my Model
#A classic Conv_Block
class conv_block (nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
super(conv_block, self).__init__()
self.relu = nn.LeakyReLU()
self.conv = nn.Conv1d(in_channels, out_channels, **kwargs)
self.batchnorm = nn.BatchNorm1d(out_channels)
def forward(self, x):
x=self.conv(x)
x= self.batchnorm(x)
return self.relu(x)
class Test (nn.Module):
def __init__(self,in_channels,num_layers,hidden_size, p,out_size):
super(Test ,self).__init__()
self.CNN=nn.Sequential(
#I am trying to apply filters on every two columns (A-B A-C A-D) using groups
conv_block(in_channels,3,kernel_size=2,stride=1,padding=1,groups=3),#,padding_mode="reflect"),
conv_block(3,32,kernel_size=2,stride=1,padding=0),
#SqueezeExcitation(32,16), #i tried but same results
conv_block(32,16,kernel_size=3,stride=1,padding=1),
conv_block(16,8,kernel_size=3,stride=1,padding=1),
)
self.rnn = nn.LSTM(8, hidden_size, num_layers)
self.rnn1 = nn.LSTM(hidden_size, hidden_size, num_layers)
#self.fc_hidden = nn.Linear(hidden_size * 2, hidden_size) # in case of using bidirectional
#self.fc_cell = nn.Linear(hidden_size * 2, hidden_size)
self.dropout = nn.Dropout(p)
self.num_layers=num_layers
self.fc_f=nn.Linear(out_size*hidden_size,out_size)
def forward(self,x,hidden, cell):
x=x.permute(0,2,1)
x=self.CNN(x)
x=x.permute(2,0,1)
x, (hidden, cell) = self.rnn(x) #i tried bidirectional but same results
#hidden = self.dropout(self.fc_hidden(torch.cat((hidden[0:self.num_layers], hidden[self.num_layers:2*self.num_layers]), dim=2)))
#cell = self.dropout(self.fc_cell(torch.cat((cell[0:self.num_layers], cell[self.num_layers:2*self.num_layers]), dim=2)))
x, (hidden, cell) = self.rnn1(x, (hidden, cell))
#hidden=hidden.repeat(2,1,1)
#cell=cell.repeat(2,1,1)
x=x.permute(1,0,2)
x=x.reshape(x.shape[0],-1)
x=self.fc_f(x) #final result
return x, hidden, cell
#hyperparameters
in_channels=6
num_layers=64
hidden_size=90
p=0.2
out_size=tbptt_steps=20 #truncated bptt steps
split_dim=1
nb_epoch=100
learning_rate=3e-4
Model=Test(in_channels,num_layers,hidden_size, p,out_size).to(device)
optimizer = optim.Adam(Model.parameters(), lr=learning_rate)
# I tired to test my model on the same inputs
X=Inputs[:5,:500,:-1].to(device)
Y=Inputs[:5,:500,-1].to(device)
#training loop
hidden=None
cell=None
for ep in range (nb_epoch):
Losses=0
for i, (x_, y_) in enumerate(zip(X.split(tbptt_steps, dim=split_dim), Y.split(tbptt_steps, dim=split_dim))):
optimizer.zero_grad()
#Model.train()
# Detach last hidden state, so the backprop-graph will be cut
if hidden is not None:
hidden.detach_()
if cell is not None:
cell.detach_()
# Forward path
y_pred, hidden, cell = Model(x_, hidden, cell)
#print("predict",y_pred.shape,y_.shape)
# Compute loss
loss = nn.functional.mse_loss(y_, y_pred)
# Backward path
loss.backward()
Losses+=loss.item()
# Update weights
optimizer.step()
if i==0:
print("Epoch ",ep," Loss ",loss.item())
print("#################################################")
print(Losses)
print("#################################################")
There is two problems with this Model:
- It doesn’t catch the padding_value
-The loss is high and didn’t decrease
I really hope that the Model is understandable, and we will correct it.
As you can see I am not a professional in Machine learning, I am really eager to understand more about my errors .
Thank you very much for your help