nn.GRU with mask

If you are using the packed sequence route, something like this might work:

import torch
import torch.nn as nn

# set variables
features = 16
hidden_dim = 32
seq_len = 128
batch_size = 64

model = nn.GRU(features, hidden_dim, bias=False)

# generate an input
dummy_input = torch.rand((seq_len, batch_size, features))

# generate a mask
mask = torch.rand((seq_len, batch_size, 1))>0.5 # 50% masked values

# get lengths
lengths = torch.sum(mask.squeeze(2), 0)

mask = mask.repeat(1,1,features) #this only repeats the same mask across the features dimension

# apply the mask
dummy_input_masked = torch.where(mask, 0, dummy_input) # filling masked values with 0

# pack the sequence
dummy_input_packed = torch.nn.utils.rnn.pack_padded_sequence(dummy_input_masked, lengths, enforce_sorted=False)

print(dummy_input_packed)

# make the hidden layer
h0 = torch.rand(1, batch_size, hidden_dim)

#put masked input and hidden layer into the model
output, hn = model(dummy_input_packed, h0)

print(output)