I have a GRU model which I am applying to time-series data , the class look like the following:
class GRUNet(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
super(GRUNet, self).__init__()
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x, h):
print('x inside forward {}'.format(x))
out, h = self.gru(x, h)
print('out shape :{}'.format(out.shape))
out = self.fc(self.relu(out[:,-1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
return hidden
and my training function is :
def train(model, device, federated_train_loader, optimizer, epoch):
model.train()
# Iterate through each gateway's dataset
for idx, (seq, labels) in enumerate(federated_train_loader):
batch_idx = idx+1
# Send the model to the right gateway
model.send(seq.location)
# Move the data and target labels to the device (cpu/gpu) for computation
seq, labels = seq.to(device), labels.to(device)
h = model.init_hidden(BATCH_SIZE)
# Clear previous gradients (if they exist)
optimizer.zero_grad()
# Make a prediction
print('seq shape : {}'.format(seq.shape))
print('labels shape : {}'.format(labels.shape))
output, h = model(seq, h)
# Calculate huber loss for regression problems
#labels =labels.view(-1)
#seq = seq.view(-1)
#labels = labels.unsqueeze(1)
#labels = labels.float()
loss = loss_function(output, labels)
# Calculate the gradients
loss.backward()
# Update the model weights
optimizer.step()
# Get the model back from the gateway
#model.get()
if batch_idx==len(federated_train_loader) or (batch_idx!=0 and batch_idx % LOG_INTERVAL == 0):
# get the loss back
loss = loss.get()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * BATCH_SIZE, len(federated_train_loader) * BATCH_SIZE,
100. * batch_idx / len(federated_train_loader), loss.item()))
I initiated and called the model and printed the shapes as follows :
model = GRUNet(input_dim=1, hidden_dim=100, output_dim=1, n_layers=2)
GRUNet(
(gru): GRU(1, 100, num_layers=2, batch_first=True, dropout=0.2)
(fc): Linear(in_features=100, out_features=1, bias=True)
(relu): ReLU()
)
seq shape : torch.Size([1024, 1, 1])
labels shape : torch.Size([1024, 1, 1])
x inside forward (Wrapper)>[PointerTensor | me:36457989435 -> gatway1:28694227328]
I got the following error at the end :
RuntimeError Traceback (most recent call last)
<timed exec> in <module>
<ipython-input-30-8013666c5ed1> in train(model, device, federated_train_loader, optimizer, epoch)
14 print('seq shape : {}'.format(seq.shape))
15 print('labels shape : {}'.format(labels.shape))
---> 16 output, h = model(seq, h)
17 # Calculate huber loss for regression problems
18 #labels =labels.view(-1)
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
<ipython-input-26-be5b95661398> in forward(self, x, h)
11 def forward(self, x, h):
12 print('x inside forward {}'.format(x))
---> 13 out, h = self.gru(x, h)
14 print('out shape :{}'.format(out.shape))
15 out = self.fc(self.relu(out[:,-1]))
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
727 return self.forward_packed(input, hx)
728 else:
--> 729 return self.forward_tensor(input, hx)
730
731
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward_tensor(self, input, hx)
719 sorted_indices = None
720 unsorted_indices = None
--> 721 output, hidden = self.forward_impl(input, hx, batch_sizes, max_batch_size, sorted_indices)
722 return output, self.permute_hidden(hidden, unsorted_indices)
723
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward_impl(self, input, hx, batch_sizes, max_batch_size, sorted_indices)
696 hx = self.permute_hidden(hx, sorted_indices)
697
--> 698 self.check_forward_args(input, hx, batch_sizes)
699 result = self.run_impl(input, hx, batch_sizes)
700 output = result[0]
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in check_forward_args(self, input, hidden, batch_sizes)
168 def check_forward_args(self, input, hidden, batch_sizes):
169 # type: (Tensor, Tensor, Optional[Tensor]) -> None
--> 170 self.check_input(input, batch_sizes)
171 expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
172
~/anaconda3/envs/ftorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
147 raise RuntimeError(
148 'input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
--> 149 self.input_size, input.size(-1)))
150
151 def get_expected_hidden_size(self, input, batch_sizes):
RuntimeError: input.size(-1) must be equal to input_size. Expected 1, got 0
A small note regarding this implementation, when I use The GRU alone and I generate the input using torch.randn(1024, 1,1)
it works . But when I use it on my dataset through syft library for federated data it doesn’t work. Can it be the reason behind it. I also provided the shapes of my federated data but it is the same as the randomized tensor :
train_inputs shape : torch.Size([815942, 1, 1])
train_labels shape : torch.Size([815942, 1])
test_inputs shape : torch.Size([149999, 1, 1])
test_labels shape : torch.Size([149999, 1])
gatway1_train_dataset : <syft.frameworks.torch.fl.dataset.BaseDataset object at 0x7fd7023e42d0>
gatway2_train_dataset : <syft.frameworks.torch.fl.dataset.BaseDataset object at 0x7fd6d0e4bf90>
federated_train_dataset : FederatedDataset
Distributed accross: gatway1, gatway2
Number of datapoints: 815942
federated_test_dataset : FederatedDataset
Distributed accross: gatway1, gatway2
Number of datapoints: 149999
I have been stuck for a while now and I have seen other GRU models working properly on federated data. Any clue ? much appriciated!!