Federated learning with LSTM for time series

jagoul · January 14, 2020, 8:30pm

I am trying to use LSTM in pytorch to train on timeseries data in a federated way. I prepared the data , split and built the class LSTM for training as follows :

train_inputs = torch.tensor(train_seq,dtype=torch.float).tag("#iot", "#network", "#seq","#train")
train_labels = torch.tensor(train_labels, dtype=torch.float).tag("#iot", "#network","#label","#train")
print('train_inputs shape : {}'.format(train_inputs.shape))
print('train_labels shape : {}'.format(train_labels.shape))

test_inputs = torch.tensor(test_seq,dtype=torch.float).tag("#iot", "#network", "#seq", "#test")
test_labels = torch.tensor(test_labels, dtype=torch.float).tag("#iot", "#network","#label","#test")
print('test_inputs shape : {}'.format(test_inputs.shape))
print('test_labels shape : {}'.format(test_labels.shape))

# Send the training and test data to the gatways in equal proportion.
train_idx = int(len(X_train)/2)
test_idx = int(len(X_test)/2)
gatway1_train_dataset = sy.BaseDataset(train_inputs[:train_idx], train_inputs[:train_idx]).send(gatway1)
gatway2_train_dataset = sy.BaseDataset(train_inputs[train_idx:], train_inputs[train_idx:]).send(gatway2)
gatway1_test_dataset = sy.BaseDataset(test_inputs[:test_idx], test_inputs[:test_idx]).send(gatway1)
gatway2_test_dataset = sy.BaseDataset(test_inputs[test_idx:], test_inputs[test_idx:]).send(gatway2)

# Create federated datasets, an extension of Pytorch TensorDataset class
federated_train_dataset = sy.FederatedDataset([gatway1_train_dataset, gatway2_train_dataset])
federated_test_dataset = sy.FederatedDataset([gatway1_test_dataset, gatway2_test_dataset])

# Create federated dataloaders, an extension of Pytorch DataLoader class
federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, shuffle=True, batch_size=BATCH_SIZE)
federated_test_loader = sy.FederatedDataLoader(federated_test_dataset, shuffle=False, batch_size=BATCH_SIZE)

The outputs shape are like follow :

train_inputs shape : torch.Size([815913, 30])
train_labels shape : torch.Size([815913])
test_inputs shape : torch.Size([149970, 30])
test_labels shape : torch.Size([149970])
gatway1_train_dataset : <syft.frameworks.torch.federated.dataset.BaseDataset object at 0x7fe636af91d0>
gatway2_train_dataset : <syft.frameworks.torch.federated.dataset.BaseDataset object at 0x7fe636b0ae90>
federated_train_dataset : FederatedDataset
    Distributed accross: gatway1, gatway2
    Number of datapoints: 815913

federated_test_dataset : FederatedDataset
    Distributed accross: gatway1, gatway2
    Number of datapoints: 149970

federated_train_loader : <syft.frameworks.torch.federated.dataloader.FederatedDataLoader object at 0x7fe6312bb210>
federated_test_loader : <syft.frameworks.torch.federated.dataloader.FederatedDataLoader object at 0x7fe6312bb310>

everything fine until now , I created my LSTM class with pne lstm and huber loss function :

class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

and I run the training as follow :

def train(model, device, federated_train_loader, optimizer, epoch):
    model.train()
    # Iterate through each gateway's dataset
    for idx, (seq, labels) in enumerate(federated_train_loader):
        batch_idx = idx+1
        # Send the model to the right gateway
        model.send(seq.location)
        # Move the data and target labels to the device (cpu/gpu) for computation
        seq, labels = seq.to(device), labels.to(device)
        # Clear previous gradients (if they exist)
        optimizer.zero_grad()
        # Make a prediction
        print('seq shape : {}'.format(seq.shape))
        output = model(seq)
        # Calculate huber loss for regression problems
        loss = loss_function(y_pred, labels)
        # Calculate the gradients
        loss.backward()
        # Update the model weights
        optimizer.step()
        # Get the model back from the gateway
        model.get()
        if batch_idx==len(federated_train_loader) or (batch_idx!=0 and batch_idx % LOG_INTERVAL == 0):
            # get the loss back
            loss = loss.get()
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * BATCH_SIZE, len(federated_train_loader) * BATCH_SIZE,
                100. * batch_idx / len(federated_train_loader), loss.item()))

%%time
import torch.optim as optim

# Initialize the model
model = LSTM()

#
loss_function = nn.SmoothL1Loss(size_average=None, reduce=None, reduction='mean')

#Initialize the Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(1, EPOCHS + 1):
    # Train on the training data in a federated way
    train(model, device, federated_train_loader, optimizer, epoch)

Although the seq shape is : seq shape : torch.Size([1000, 30]) but I am getting the error :

IndexError                                Traceback (most recent call last)
<timed exec> in <module>

<ipython-input-296-ba27722f70a1> in train(model, device, federated_train_loader, optimizer, epoch)
     12         # Make a prediction
     13         print('seq shape : {}'.format(seq.shape))
---> 14         output = model(seq)
     15         # Calculate huber loss for regression problems
     16         loss = loss_function(y_pred, labels)

~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

<ipython-input-293-f3ee7a100904> in forward(self, input_seq)
     16 
     17     def forward(self, input_seq):
---> 18         lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
     19         predictions = self.fc(lstm_out.view(len(input_seq), -1))
     20         return predictions[-1]

~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
    562             return self.forward_packed(input, hx)
    563         else:
--> 564             return self.forward_tensor(input, hx)
    565 
    566 

~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward_tensor(self, input, hx)
    537         # type: (Tensor, Optional[Tuple[Tensor, Tensor]]) -> Tuple[Tensor, Tuple[Tensor, Tensor]]
    538         batch_sizes = None
--> 539         max_batch_size = input.size(0) if self.batch_first else input.size(1)
    540         sorted_indices = None
    541         unsorted_indices = None

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

Can you please point me out in the right direction, I checked the dimensions several times and I am missing something in the sequence shape, since it is a regression problem but in my preprocessing phase I converted the problem to a kind of classification problem by dividing the tensor to sequences and labels with a step of 30 as follow :

test_data_size = 150000

X_train = data[:-test_data_size]
X_test = data[-test_data_size:]
print('X_train shape :{}'.format(X_train.shape))
print('X_test shape : {}'.format(X_test.shape))

#Transform and normalize X_train
# scaler = MinMaxScaler(feature_range=(-1, 1))
# X_train_normalized = scaler.fit_transform(X_train.reshape(-1, 1))
# X_train_tensor = torch.FloatTensor(X_train_normalized).view(-1)
print('X_train_tensor shape {}'.format(X_train_tensor.shape))

# In our dataset it is convenient to use a sequence length of 30 min
# since we have data by minute send by IoT devices to the cloud
train_window = 30

#Create the sequence for the training dataset
def create_train_sequences(input_data, tw):
    seq = []
    labels = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        seq.append((train_seq))
        labels.append((train_label))

    return seq, labels

train_seq, train_labels = create_train_sequences(X_train, train_window)
train_seq = torch.FloatTensor(train_seq)
train_labels = torch.FloatTensor(train_labels).view(-1)
print('train_seq shape: {}'.format(train_seq.shape))
print('train_labels shape : {}'.format(train_labels.shape))
#print('train_seq shape: {}'.format(len(train_seq)))
#print('train_labels shape : {}'.format(len(train_labels)))

def create_test_sequences(input_data, tw):
    seq = []
    labels = []
    L = len(input_data)
    for i in range(L-tw):
        test_seq = input_data[i:i+tw]
        test_label = input_data[i+tw:i+tw+1]
        seq.append((test_seq))
        labels.append((test_label))
    
    return seq, labels

test_seq, test_labels = create_test_sequences(X_test, train_window)
test_seq = torch.FloatTensor(test_seq)
test_labels = torch.FloatTensor(test_labels).view(-1)
print('test_seq shape: {}'.format(test_seq.shape))
print('test_labels shape: {}'.format(test_labels.shape))