I am using GRU in PyTorch, The output of GRU contains three dim logits, but I need two dim for further calculation, I was wondering what is the best way to extract output from the final layer, I have few ideas regarding this:

```
import torch
import torch.nn as nn
from torch.nn.init import xavier_uniform_
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
import numpy as np
class BaseModel(nn.Module):
def __init__(
self,
embed_vecs,
dropout=0.2,
activation='relu',
):
super().__init__()
self.embedding = nn.Embedding(len(embed_vecs), embed_vecs.shape[1], padding_idx=0)
self.embedding.weight.data = embed_vecs.clone()
self.embed_drop = nn.Dropout(p=dropout)
self.activation = getattr(F, activation)
class BiGRU(BaseModel):
def __init__(
self,
embed_vecs,
out_dim,
rnn_dim=512,
rnn_layers=1,
dropout=0.2,
activation='tanh',
**kwargs
):
super(BiGRU, self).__init__(embed_vecs, dropout, activation, **kwargs)
assert rnn_dim%2 == 0, """`rnn_dim` should be even."""
# BiGRU
emb_dim = embed_vecs.shape[1]
self.rnn = nn.GRU(emb_dim, rnn_dim//2, rnn_layers,
bidirectional=True, batch_first=True)
self.res = nn.Linear(rnn_dim, out_dim)
def forward(self, input):
text, length, indices = self.sort_data_by_length(input['text'], input['length'])
x = self.embedding(text)
x = self.embed_drop(x)
packed_inputs = pack_padded_sequence(x, length, batch_first=True)
x, _ = self.rnn(packed_inputs)
x = pad_packed_sequence(x)
x = x[0].permute(1, 0, 2)
# output is three dim, I need two dim for further calculation
# first method ( sum method )
x_sum = x.sum(dim = 2)
print("sum", x_sum.shape)
# second method (reshape)
x_reshape = x.reshape(x.shape[0],-1)
print("res", x_reshape.shape)
# third method ( taking last one )
x_forth = x[:,-1]
print("forth",x_forth.shape)
def sort_data_by_length(self, data, length):
length = torch.as_tensor(length, dtype=torch.int64)
length, sorted_indices = torch.sort(length, descending=True)
sorted_indices = sorted_indices.to(data.device)
data = data.index_select(0, sorted_indices)
data_size = sorted_indices.size(-1)
indices = torch.empty(data_size, dtype=torch.long)
indices[sorted_indices] = torch.arange(data_size)
return data, length, indices
data = torch.rand(40, 3)
data_out = BiGRU(data, 4)
datamm = [[0,12,11,1],[1,2,3,0],[1,1,0,0]]
print(torch.Tensor(datamm).type(torch.int32).shape)
inpi = {'text': torch.Tensor(datamm).type(torch.int32), 'length': torch.Tensor([4,3,2]).type(torch.int32)}
data_for = data_out(inpi)
```

Which method is better?