I’m working on a CNN LSTM model. While attempting to pass input through the LSTM, I get the following error:
TypeError: 'generator' object is not subscriptable
The entire problem stems from keeping the hidden state initialization outside the dataloader loop. As soon as I move it in, it works, which would mean that my hidden state would be reset every batch. This is clearly undesirable.
for loader in loaders:
hidden = model.hidden() # Fails
for X, y in loader:
hidden = model.hidden() # Works
# Reshape data
y_em, y_seg = y[0][1:], y[0][:1]
x_l, x_c, x_r = X[0].permute(1, 0, 2, 3), X[1], X[2].permute(1, 0, 2, 3)
Here’s the entire fit function for reference:
def fit(model, loaders, run, rm, criterion, optimizer, num_epochs, device='cpu'):
for epoch in range(num_epochs):
rm.begin_epoch()
for loader in loaders:
hidden = model.hidden()
for X, y in loader:
# Reshape data
y_em, y_seg = y[0][1:], y[0][:1]
x_l, x_c, x_r = X[0].permute(1, 0, 2, 3), X[1], X[2].permute(1, 0, 2, 3)
# Run model
pred_em, pred_seg, hidden = model(x_l, x_c, x_r, hidden)
# Convert segment prediction to single output
pred_seg = torch.tensor(
[torch.argmax(pred_seg, dim=0)]
).float().to(device)
# Calculate loss
loss_em = criterion(y_em, pred_em).view([1])
loss_seg = criterion(y_seg, pred_seg).view([1])
total_loss = sum(loss_em, loss_seg)
# Perform backprop
total_loss.backward(retain_graph=True)
optimizer.step()
hidden = (x.detach() for x in hidden)
# Track metrics
rm.track_metrics(
loss_seg,
loss_em,
pred_seg,
y_seg,
pred_em,
y_em
)
rm.end_epoch()
And this is the code of the model:
class CNN_LSTM(nn.Module):
def __init__(self, context_size):
super(CNN_LSTM, self).__init__()
# Context encoder
self.context_encoder_lr = ContextEncoder()
self.context_encoder_c = ContextEncoder()
# Hidden state generation
self.lstm = nn.LSTM(72, 36)
# Attention generation
self.attention = nn.Linear(36, 36)
# Output layers
self.emotions = nn.Linear((2 * context_size + 1) * 36, 13)
self.segment = nn.Linear((2 * context_size + 1) * 36, 2)
def hidden(self):
"""Reset hidden state with (h_n, c_n)"""
return (torch.zeros(1,1,36).to("cuda"), torch.zeros(1,1,36).to("cuda"))
def forward(self, x_left, x_center, x_right, hidden):
# Encode context
c_l = self.context_encoder_lr(x_left)
c_c = self.context_encoder_c(x_center)
c_r = self.context_encoder_lr(x_right)
# Stack contexts and unsqueeze batch size
c = torch.cat([c_l, c_c, c_r])
c = c.unsqueeze(1)
# Pass context through LSTM
x, hidden = self.lstm(c, hidden)
# Generate attention
attn = self.attention(x)
attn = F.softmax(attn, dim=1)
# Apply attention
x = x * attn
# Flatten layers
x = x.view(-1)
# Feed to output layers
out_emotions = self.emotions(x)
out_segment = self.segment(x)
return F.sigmoid(out_emotions), F.softmax(out_segment), hidden```