Could not calculate SHAP VALUES

I am training that model for classification problem of three classes , input sequence of length 341 of integers and output one class from {0,1,2}. I am trying to use shape analysis .
Shap explainer raised an error from lstm layer says
raise RuntimeError(msg.format(expected_hidden_size, list(hx.size())))
RuntimeError: Expected hidden[0] size (2, 32, 64), got [2, 16, 64]
and i tried to used different number of sequence to explain but it did not effect the input of lstm.

class PPS(nn.Module):
    def __init__(self):
        super(PPS, self).__init__()
        self.n_layers = 2
        self.seq_len = 341
        self.n_hidden = 64
        self.numb_label = 3

        self.lstm1 = nn.LSTM(
            input_size=self.seq_len,
            hidden_size=self.n_hidden,
            num_layers=1,
            bidirectional=True,
            batch_first=True, dropout=0.2)

        self.fc1 = nn.Linear(64 * 2, 32)
        self.fc2 = nn.Linear(32, 3)
        self.sof = nn.LogSoftmax(dim=-1)
        self.relu = nn.Sequential(nn.ReLU())

    def forward(self, x):
        h0 = torch.zeros(self.n_layers, BATCH_SIEZ, self.n_hidden)
        h0.requires_grad = True

        c0 = torch.zeros(self.n_layers, BATCH_SIEZ, self.n_hidden)
        c0.requires_grad = True

        h0, c0 = h0.cuda(), c0.cuda()
        print(x.shape)
        x = x.squeeze(-1).unsqueeze(1)
        print(x.shape)

        out, (_, _) = self.lstm1(x, (h0, c0))

        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sof(out)
        return out

N_EPOCHES = 1  # 20
LEARNING_RATE = 0.0001
BATCH_SIEZ = 16

params = {'batch_size': BATCH_SIEZ,
          'num_workers': 2
      
# Configuration options
loss_function = nn.CrossEntropyLoss()

# Set fixed random number seed
torch.manual_seed(42)
trainloader = torch.utils.data.DataLoader(
    dataset, **params, shuffle=True, drop_last=True)
testloader = torch.utils.data.DataLoader(
    test_set, **params, shuffle=True, drop_last=True)
# Init the neural network
model = PPS()
model.to(device)
model.apply(init_weights)
# Initialize optimizer
optimizer = optim.RMSprop(model.parameters(), lr=LEARNING_RATE)

# Run the training loop for defined number of epochs
valid_losses = []
train_losses = []
avg_valid_losses = []
avg_train_losses = []
for epoch in range(0, N_EPOCHES):
    model.train()

    for i, data in enumerate(trainloader, 0):
        inputs, targets = data
        inputs = inputs.unsqueeze(-1)
        optimizer.zero_grad()
        # Perform forward pass
        inputs = inputs.cuda()
        targets = targets.cuda()
        outputs = model(inputs)
        outputs = outputs.squeeze()
        if CUDA:
            loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        epoch_loss += loss
    print("epoch", epoch + 1, "acc", sum(a) / len(dataset), "loss", epoch_loss / len(trainloader))

#shap Analysis
batch = next(iter(testloader))
sequences, _ = batch
background_sequences = sequences[:100].to(device)
test_sequences = protein_sequence[100:105].to(device)

sequences_to_explain = background_sequences[:32]
e = shap.DeepExplainer(model, sequences_to_explain)
shap_values = e.shap_values(sequences_to_explain)