RuntimeError: For unbatched 2-D input, hx should also be 2-D but got 3-D tensor

class GRU_Model(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_layers, num_outputs):
        super(GRU_Model, self).__init__()
        self.D = num_inputs
        self.M = num_hidden
        self.K = num_outputs
        self.L = num_layers
        
        self.gru = nn.GRU(
            input_size=self.D,
            hidden_size=self.M,
            num_layers=self.L,
            batch_first=True)
        
        self.fc = nn.Linear(self.M, self.K)
        
    def forward(self, X):
        h0 = torch.zeros(self.L, X.size(0), self.M).to(device) # fix hidden state size
        out, _ = self.gru(X, h0)
        
        out = self.fc(out[:, -1, :])
        return out
# get some random training images
dataiter = iter(train_loader)
sets, labels = dataiter.next()

print(sets[0].shape)
print(sets[0].ndim)

print(sets.shape)
print(sets.ndim)

torch.Size([100, 2, 1])
3
torch.Size([32, 100, 2, 1])
4
from torchviz import make_dot

netD = model.to(device) # move the model to the same device as the input tensor
x = torch.zeros(100, 2, 1).to(device) # create a dummy input tensor on the same device
out = netD(x)
print("out shape:", out.shape)
make_dot(out, params=dict(netD.named_parameters()))

![Capture|525x500](upload://3HdgozuIQJTTWKvnbLIKfLYYgLL.png)

![Capture|690x112](upload://vyXntHcf5QEtLXHDzFjuDeJ7srx.png)

Please anyone help me out from this, thanks in advance.

Iā€™m unsure why the error is raised as it seems you are passing a batched 3D input to the model. Are you seeing this issue using code code snippet or at another line of code?

I make the dataset this way. But other code here I upload as it is.

model = GRU_Model(num_inputs=1, num_hidden=5, num_layers=1, num_outputs=4)
model.to(device)
def data_pep(df, feat_col_idx, target_col_idx, seq_len=100):
    X_out = []
    y_out = []
    X = df[:, feat_col_idx]
    y = df[:, target_col_idx]

    start_idx = 0
    end_idx = seq_len
    length = len(df)

    while end_idx <= length:
        X_seq = X[start_idx:end_idx].reshape(seq_len, -1, 1)
        y_seq = y[end_idx-1].reshape(-1, 1)
        X_out.append(X_seq)
        y_out.append(y_seq)
        start_idx += seq_len
        end_idx += seq_len

    X_out = np.array(X_out)
    y_out = np.array(y_out)
    
    return X_out, y_out
from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        X_item = self.X[idx]
        y_item = self.y[idx]
        return X_item, y_item
def train_one_epoch(model, train_loader, optimizer, device, criterion):
    """Train model for one epoch and return the mean train_loss."""
    model.train()
    running_loss_train = 0
    for inputs, labels in train_loader:
        labels = labels.type(torch.LongTensor)
        inputs = inputs.type(torch.cuda.FloatTensor)
        #labels = labels.type(torch.cuda.FloatTensor)
        inputs, labels = inputs.to(device=device), labels.to(device=device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss_train += loss.item()
    train_loss = running_loss_train / len(train_loader.dataset)
    return train_loss
def validate(model, valid_loader, device, criterion):
    """Validate model and return the accuracy and mean loss."""
    model.eval()
    correct = 0
    running_loss_val = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            labels = labels.type(torch.LongTensor)
            inputs = inputs.type(torch.cuda.FloatTensor)
            #labels = labels.type(torch.cuda.FloatTensor)
            inputs, labels = inputs.to(device=device), labels.to(device=device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            pred = outputs.argmax(dim=1)
            correct += pred.eq(labels).sum().item()
            running_loss_val += loss.item()
    val_acc = correct / len(valid_loader.dataset)
    val_loss = running_loss_val / len(valid_loader.dataset)
    return val_acc, val_loss
def fit(model, train_loader, valid_loader, learning_rate, num_epochs):
    criterion = nn.CrossEntropyLoss(reduction='sum')
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    es = EarlyStopping(mode='min', path='./x.pth', patience=10)
    model = model.to(device)
    scheduler = ExponentialLR(optimizer, gamma=0.1)

    for epoch in range(1, num_epochs + 1):
        train_loss = train_one_epoch(model, train_loader, optimizer, device, criterion)
        val_acc, val_loss = validate(model, valid_loader, device, criterion)
        scheduler.step()
        print(f'Epoch {epoch:2}/{num_epochs}',
              f'train loss: {train_loss:.4f}',
              f'val loss: {val_loss:.4f}',
              f'val acc: {val_acc:.2%}',
              sep=' | '
             )
        if es(val_loss, model):
            break

Your initial code works fine for me:

device = "cpu"
model = GRU_Model(1, 1, 1, 1)

netD = model.to(device) # move the model to the same device as the input tensor
x = torch.zeros(100, 2, 1).to(device) # create a dummy input tensor on the same device
out = netD(x)
print("out shape:", out.shape)
# out shape: torch.Size([100, 1])
make_dot(out, params=dict(netD.named_parameters()))

and returns a valid output so I would need more information how to reproduce the issue and where the error is raised from.

1 Like

@ptrblck Actually I tried to find the problem but I could not find out. For that reason I restarted to build the model now it worked. Thanks for your help.