I am trying to use LSTM in pytorch to train on timeseries data in a federated way. I prepared the data , split and built the class LSTM for training as follows :
train_inputs = torch.tensor(train_seq,dtype=torch.float).tag("#iot", "#network", "#seq","#train")
train_labels = torch.tensor(train_labels, dtype=torch.float).tag("#iot", "#network","#label","#train")
print('train_inputs shape : {}'.format(train_inputs.shape))
print('train_labels shape : {}'.format(train_labels.shape))
test_inputs = torch.tensor(test_seq,dtype=torch.float).tag("#iot", "#network", "#seq", "#test")
test_labels = torch.tensor(test_labels, dtype=torch.float).tag("#iot", "#network","#label","#test")
print('test_inputs shape : {}'.format(test_inputs.shape))
print('test_labels shape : {}'.format(test_labels.shape))
# Send the training and test data to the gatways in equal proportion.
train_idx = int(len(X_train)/2)
test_idx = int(len(X_test)/2)
gatway1_train_dataset = sy.BaseDataset(train_inputs[:train_idx], train_inputs[:train_idx]).send(gatway1)
gatway2_train_dataset = sy.BaseDataset(train_inputs[train_idx:], train_inputs[train_idx:]).send(gatway2)
gatway1_test_dataset = sy.BaseDataset(test_inputs[:test_idx], test_inputs[:test_idx]).send(gatway1)
gatway2_test_dataset = sy.BaseDataset(test_inputs[test_idx:], test_inputs[test_idx:]).send(gatway2)
# Create federated datasets, an extension of Pytorch TensorDataset class
federated_train_dataset = sy.FederatedDataset([gatway1_train_dataset, gatway2_train_dataset])
federated_test_dataset = sy.FederatedDataset([gatway1_test_dataset, gatway2_test_dataset])
# Create federated dataloaders, an extension of Pytorch DataLoader class
federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, shuffle=True, batch_size=BATCH_SIZE)
federated_test_loader = sy.FederatedDataLoader(federated_test_dataset, shuffle=False, batch_size=BATCH_SIZE)
The outputs shape are like follow :
train_inputs shape : torch.Size([815913, 30])
train_labels shape : torch.Size([815913])
test_inputs shape : torch.Size([149970, 30])
test_labels shape : torch.Size([149970])
gatway1_train_dataset : <syft.frameworks.torch.federated.dataset.BaseDataset object at 0x7fe636af91d0>
gatway2_train_dataset : <syft.frameworks.torch.federated.dataset.BaseDataset object at 0x7fe636b0ae90>
federated_train_dataset : FederatedDataset
Distributed accross: gatway1, gatway2
Number of datapoints: 815913
federated_test_dataset : FederatedDataset
Distributed accross: gatway1, gatway2
Number of datapoints: 149970
federated_train_loader : <syft.frameworks.torch.federated.dataloader.FederatedDataLoader object at 0x7fe6312bb210>
federated_test_loader : <syft.frameworks.torch.federated.dataloader.FederatedDataLoader object at 0x7fe6312bb310>
everything fine until now , I created my LSTM class with pne lstm and huber loss function :
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.lstm = nn.LSTM(input_size, hidden_layer_size)
self.linear = nn.Linear(hidden_layer_size, output_size)
self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
torch.zeros(1,1,self.hidden_layer_size))
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
predictions = self.linear(lstm_out.view(len(input_seq), -1))
return predictions[-1]
and I run the training as follow :
def train(model, device, federated_train_loader, optimizer, epoch):
model.train()
# Iterate through each gateway's dataset
for idx, (seq, labels) in enumerate(federated_train_loader):
batch_idx = idx+1
# Send the model to the right gateway
model.send(seq.location)
# Move the data and target labels to the device (cpu/gpu) for computation
seq, labels = seq.to(device), labels.to(device)
# Clear previous gradients (if they exist)
optimizer.zero_grad()
# Make a prediction
print('seq shape : {}'.format(seq.shape))
output = model(seq)
# Calculate huber loss for regression problems
loss = loss_function(y_pred, labels)
# Calculate the gradients
loss.backward()
# Update the model weights
optimizer.step()
# Get the model back from the gateway
model.get()
if batch_idx==len(federated_train_loader) or (batch_idx!=0 and batch_idx % LOG_INTERVAL == 0):
# get the loss back
loss = loss.get()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * BATCH_SIZE, len(federated_train_loader) * BATCH_SIZE,
100. * batch_idx / len(federated_train_loader), loss.item()))
%%time
import torch.optim as optim
# Initialize the model
model = LSTM()
#
loss_function = nn.SmoothL1Loss(size_average=None, reduce=None, reduction='mean')
#Initialize the Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
for epoch in range(1, EPOCHS + 1):
# Train on the training data in a federated way
train(model, device, federated_train_loader, optimizer, epoch)
Although the seq shape is : seq shape : torch.Size([1000, 30])
but I am getting the error :
IndexError Traceback (most recent call last)
<timed exec> in <module>
<ipython-input-296-ba27722f70a1> in train(model, device, federated_train_loader, optimizer, epoch)
12 # Make a prediction
13 print('seq shape : {}'.format(seq.shape))
---> 14 output = model(seq)
15 # Calculate huber loss for regression problems
16 loss = loss_function(y_pred, labels)
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
<ipython-input-293-f3ee7a100904> in forward(self, input_seq)
16
17 def forward(self, input_seq):
---> 18 lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
19 predictions = self.fc(lstm_out.view(len(input_seq), -1))
20 return predictions[-1]
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
562 return self.forward_packed(input, hx)
563 else:
--> 564 return self.forward_tensor(input, hx)
565
566
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward_tensor(self, input, hx)
537 # type: (Tensor, Optional[Tuple[Tensor, Tensor]]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]
538 batch_sizes = None
--> 539 max_batch_size = input.size(0) if self.batch_first else input.size(1)
540 sorted_indices = None
541 unsorted_indices = None
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Can you please point me out in the right direction, I checked the dimensions several times and I am missing something in the sequence shape, since it is a regression problem but in my preprocessing phase I converted the problem to a kind of classification problem by dividing the tensor to sequences and labels with a step of 30 as follow :
test_data_size = 150000
X_train = data[:-test_data_size]
X_test = data[-test_data_size:]
print('X_train shape :{}'.format(X_train.shape))
print('X_test shape : {}'.format(X_test.shape))
#Transform and normalize X_train
# scaler = MinMaxScaler(feature_range=(-1, 1))
# X_train_normalized = scaler.fit_transform(X_train.reshape(-1, 1))
# X_train_tensor = torch.FloatTensor(X_train_normalized).view(-1)
print('X_train_tensor shape {}'.format(X_train_tensor.shape))
# In our dataset it is convenient to use a sequence length of 30 min
# since we have data by minute send by IoT devices to the cloud
train_window = 30
#Create the sequence for the training dataset
def create_train_sequences(input_data, tw):
seq = []
labels = []
L = len(input_data)
for i in range(L-tw):
train_seq = input_data[i:i+tw]
train_label = input_data[i+tw:i+tw+1]
seq.append((train_seq))
labels.append((train_label))
return seq, labels
train_seq, train_labels = create_train_sequences(X_train, train_window)
train_seq = torch.FloatTensor(train_seq)
train_labels = torch.FloatTensor(train_labels).view(-1)
print('train_seq shape: {}'.format(train_seq.shape))
print('train_labels shape : {}'.format(train_labels.shape))
#print('train_seq shape: {}'.format(len(train_seq)))
#print('train_labels shape : {}'.format(len(train_labels)))
def create_test_sequences(input_data, tw):
seq = []
labels = []
L = len(input_data)
for i in range(L-tw):
test_seq = input_data[i:i+tw]
test_label = input_data[i+tw:i+tw+1]
seq.append((test_seq))
labels.append((test_label))
return seq, labels
test_seq, test_labels = create_test_sequences(X_test, train_window)
test_seq = torch.FloatTensor(test_seq)
test_labels = torch.FloatTensor(test_labels).view(-1)
print('test_seq shape: {}'.format(test_seq.shape))
print('test_labels shape: {}'.format(test_labels.shape))