Summary of LSTM Model

Hi !
I have problem with summary method. I am working with basic Lstm model and I don’t know how fix the problem.
Model:

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(64 * hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = torch.flatten(out,start_dim= 1, end_dim= 2)
        out = self.fc(out) 
        return out
model = LSTMModel(8, 18, 7, 9)
summary(model,(64, 8))

I receive AttributeError: ‘tuple’ object has no attribute ‘size’. So how can I make summary ?
Can you help me with this ? Thanks !

Your input seems to be missing the batch dimension and after fixing it torchinfo.summary works:

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(64 * hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = torch.flatten(out,start_dim= 1, end_dim= 2)
        out = self.fc(out) 
        return out

model = LSTMModel(8, 18, 7, 9)
x = torch.randn(64, 8)
out = model(x)
# RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors

x = torch.randn(1, 64, 8)
out = model(x) # works

torchinfo.summary(model, (1, 64, 8), device="cpu")
# ==========================================================================================
# Layer (type:depth-idx)                   Output Shape              Param #
# ==========================================================================================
# LSTMModel                                [1, 9]                    --
# ├─LSTM: 1-1                              [1, 64, 18]               18,432
# ├─Linear: 1-2                            [1, 9]                    10,377
# ==========================================================================================
# Total params: 28,809
# Trainable params: 28,809
# Non-trainable params: 0
# Total mult-adds (M): 1.19
# ==========================================================================================
# Input size (MB): 0.00
# Forward/backward pass size (MB): 0.01
# Params size (MB): 0.12
# Estimated Total Size (MB): 0.13
# ==========================================================================================

Thanks! It works :blush:. Are there any differences between torchinfo.summary and torchsummary.summary? For torchsummary it does not work.

In case you are referring to this repository then be aware of the note in the README:

Use the new and updated torchinfo.

as the last change to torchsummary directly was done ~3 years ago.