RuntimeError: shape '[-1, 1881]' is invalid for input of size 1100

nikeair · October 12, 2021, 7:46am

I am trying to set up a FFNN for my external data. It has a shape of 256x11 (plus 1 for label) and I am splitting it in 171x11 for training and 85x11 for testing.

I am getting following error while executing:

Traceback (most recent call last):
  File "C:\Users\MyMachine\PycharmProjects\MaProject\My_FFNN_Ex.py", line 159, in <module>
    images = images.reshape(-1, input_size).to(device)
RuntimeError: shape '[-1, 1881]' is invalid for input of size 1100
<torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x0000022D302AA940>
torch.Size([100, 11]) torch.Size([100])

How can I solve the problem?

My Code:

import matplotlib.pyplot as plt
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler


# total length: 256* (11 + Label)
df_ip = pd.read_csv('./data_lstm.csv')


class FeatureDatasetTrain(Dataset):
    def __init__(self, file_name):
        df_input = pd.read_csv('./data_lstm.csv')

        # Set 2/3 of the dataset as training-data --> 171*11
        x = df_input.values[0:round(df_input.shape[0]/3*2), :]

        # Set label-column, based on value in "my_col"
        for j in range(df_input.shape[0]):
            tmp_float = df_input.iloc[j]['my_col']

            if tmp_float < 500.0:
                df_input.at[j, 'label'] = 1
            elif tmp_float >= 500.0 and tmp_float < 600.0:
                df_input.at[j, 'label'] = 2
            else:
                df_input.at[j, 'label'] = 3

        # Set labels as y --> 171*1
        y = df_input.iloc[0:round((df_input.shape[0]/3*2)), -1].values


        sc = StandardScaler()
        x_train = sc.fit_transform(x)
        y_train = y

        self.X_train = torch.tensor(x_train, dtype=torch.float32)
        self.y_train = torch.tensor(y_train)

    def __len__(self):
        return len(self.y_train)

    def __getitem__(self, idx):
        return self.X_train[idx], self.y_train[idx]



# Do the same for Test-data, but use the last 1/3 of the dataset --> 85*11
class FeatureDatasetTest(Dataset):
    def __init__(self, file_name):
        df_input = pd.read_csv('./data_lstm.csv')
        x = df_input.values[(round(df_input.shape[0] / 3 * 2)+1):, :]
        for j in range(df_input.shape[0]):
            tmp_float = df_input.iloc[j]['my_col']

            if tmp_float < 500.0:
                df_input.at[j, 'label'] = 1
            elif tmp_float >= 500.0 and tmp_float < 600.0:
                df_input.at[j, 'label'] = 2
            else:
                df_input.at[j, 'label'] = 3

        # --> 85*1
        y = df_input.iloc[(round(df_input.shape[0] / 3 * 2))+1:, -1].values

        sc = StandardScaler()
        x_test = sc.fit_transform(x)
        y_test = y

        self.X_test = torch.tensor(x_test, dtype=torch.float32)
        self.y_test = torch.tensor(y_test)

    def __len__(self):
        return len(self.y_test)

    def __getitem__(self, idx):
        return self.X_test[idx], self.y_test[idx]

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Hyper-Parameter
input_size = 616 
hidden_size = 9
num_classes = 3 # Anzahl der Label
num_epochs = 10
batch_size = 100
learning_rate = 0.001


train_dataset = FeatureDatasetTrain('./data_lstm.csv')
test_dataset = FeatureDatasetTest('./data_lstm.csv')


train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle= True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle= False)


examples = iter(train_loader)
print(examples)
samples, labels = examples.next()
print(samples.shape, labels.shape)



class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)

        return out

model = NeuralNet(input_size, hidden_size, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        #images = images.to(device)
        images = images.reshape(-1, input_size).to(device)
        labels = labels.to(device)

        # forward
        outputs = model(images)
        loss = nn.NLLLoss()

        # backward
        optimizer.zero_grad()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

# test

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)

        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

nikeair · October 13, 2021, 6:17am

Someone? I already found two mistakes with my training dataset (code edited), but this doesn’t fix my problem.

ptrblck · October 13, 2021, 6:38am

This line of code is raising the issue:

images = images.reshape(-1, input_size).to(device)

since the input_size is incompatible to the shape of images.
If you want to flatten the tensor, use x = x.view(x.size(0), -1) instead which will also keep the batch size equal.
In your code I can only find input_size = 616, which also doesn’t match the posted error message, so I guess you might have changed the code already.

nikeair · October 13, 2021, 7:00am

Thanks for your answer. Do you mean that I should delete the line with images = images.reshape(-1, input_size).to(device) and include x = x.view(x.size(0), -1) in my “FeatureDatasetTrain”- and “FeatureDatasetTest”-class?

Because this gives me a new error: TypeError: 'int' object is not callable

I changed the input_size to 171 x 11 (–> (256 / 3 x 2) x11)

ptrblck · October 13, 2021, 7:25am

x was just used as a placeholder so replace it with images.
The input shape would thus be [batch_size, 171*11=1881] in the first case and [batch_size, 1877.33] in the second one (you would have to round it to an integer though).
However, the error claims it’s finding 1100 input values so did you check the shape of images?

nikeair · October 13, 2021, 9:30am

Right now, the shape of “images” is [171,11]. I changed images = images.reshape(-1, input_size).to(device) to images = images.view(images.size(0), -1).to(device) in both of my loops. Now I am getting the error:

Traceback (most recent call last):
  File "D:\Users\Google Drive\PycharmProjects\\My_FFNN_Ex.py", line 184, in <module>
    outputs = model(images)
  File "F:\ProgramData\Anaconda3\envs\placeholder\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "D:\Users\Google Drive\PycharmProjects\My_FFNN_Ex.py", line 160, in forward
    out = self.l1(x)
  File "F:\ProgramData\Anaconda3\envs\placeholder\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "F:\ProgramData\Anaconda3\envs\placeholder\lib\site-packages\torch\nn\modules\linear.py", line 96, in forward
    return F.linear(input, self.weight, self.bias)
  File "F:\ProgramData\Anaconda3\envs\placeholder\lib\site-packages\torch\nn\functional.py", line 1847, in linear
    return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (171x11 and 1881x9)

This is what my code looks like right now:

import matplotlib.pyplot as plt
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

df_ip = pd.read_csv('./data_lstm.csv')
df_timestamp = df_ip['bucket']


class FeatureDatasetTrain(Dataset):
    def __init__(self, file_name):
        df_input = pd.read_csv('./data_lstm.csv')

        # 2/3 of thge data for training
        x = df_input.values[0:round(df_input.shape[0]/3*2), :]
        # generate labels
        for j in range(df_input.shape[0]):
            tmp_float = df_input.iloc[j]['my_col']

            if tmp_float < 500.0:
                df_input.at[j, 'label'] = 1
            elif tmp_float >= 500.0 and tmp_float < 600.0:
                df_input.at[j, 'label'] = 2
            else:
                df_input.at[j, 'label'] = 3


        # Label for first 2/3 as y
        y = df_input.iloc[0:round((df_input.shape[0]/3*2)), -1].values


        sc = StandardScaler()
        print(x.shape)
        print(x)
        x_train = sc.fit_transform(x)
        print(x_train.shape)
        print(x_train)
        y_train = y

        # 256*11
        self.X_train = torch.tensor(x_train, dtype=torch.float32)
        self.y_train = torch.tensor(y_train)
        print(self.y_train.shape)



    def __len__(self):
        return len(self.y_train)

    def __getitem__(self, idx):
        return self.X_train[idx], self.y_train[idx]


class FeatureDatasetTest(Dataset):
    def __init__(self, file_name):
        df_input = pd.read_csv('./data_lstm.csv')

        # 1/3 of thge data for training
        x = df_input.values[(round(df_input.shape[0] / 3 * 2)+1):, :]
        # generate label
        for j in range(df_input.shape[0]):
            tmp_float = df_input.iloc[j]['my_col']

            if tmp_float < 500.0:
                df_input.at[j, 'label'] = 1
            elif tmp_float >= 500.0 and tmp_float < 600.0:
                df_input.at[j, 'label'] = 2
            else:
                df_input.at[j, 'label'] = 3


        # Last 1/3 as y
        y = df_input.iloc[(round(df_input.shape[0] / 3 * 2))+1:, -1].values

        sc = StandardScaler()
        x_test = sc.fit_transform(x)
        y_test = y

        self.X_test = torch.tensor(x_test, dtype=torch.float32)
        self.y_test = torch.tensor(y_test)

    def __len__(self):
        return len(self.y_test)

    def __getitem__(self, idx):
        return self.X_test[idx], self.y_test[idx]

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Hyper-Parameter
input_size = 171*11
hidden_size = 9
num_classes = 3 # number of labels
num_epochs = 10
#batch_size_full = 256    #round((df_ip.shape[0]/2*3))
batch_size_train = round((df_ip.shape[0]/3*2)) # = 171
batch_size_test = round(df_ip.shape[0] - round(df_ip.shape[0]/3*2)) # = 85
learning_rate = 0.001


train_dataset = FeatureDatasetTrain('./data_lstm.csv')
test_dataset = FeatureDatasetTest('./data_lstm.csv')


train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size_train, shuffle= True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size_test, shuffle= False)


examples = iter(train_loader)
samples, labels = examples.next()


class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)

        return out

model = NeuralNet(input_size, hidden_size, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.view(images.size(0), -1).to(device)
        print('->')
        print(images.size())
        print('<-')
        labels = labels.to(device)

        # forward
        outputs = model(images)
        loss = nn.NLLLoss()

        # backward
        optimizer.zero_grad()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

# test

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.view(images.size(0), -1).to(device)
        labels = labels.to(device)
        outputs = model(images)

        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    print(n_samples)
    print(n_correct)
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

ptrblck · October 13, 2021, 7:34pm

Something still doesn’t seem to be right.
You are using a batch_size of 100, so the tensor shape should be [batch_size=100, *].
Where is the 171 coming from?

nikeair · October 14, 2021, 6:26am

the 171 is 2/3 of my dataset (round(256 lines / 3 x 2))

ptrblck · October 14, 2021, 5:22pm

This would thus be the number of samples in the batch, wouldn’t it?
If so, then you should not flatten this dimension with the features, but could use the input directly as [batch_size=171, nb_features=11] and set in_features=11 in the linear layer.

nikeair · November 2, 2021, 8:27am

You were right, now I am using a batch size of 171 and an input size of 11. My code is kind of working right now, but the accuracy is still pretty low. It is always between 45 and 57 percent. I played around with the hyper parameters as you can see in my code:

import matplotlib.pyplot as plt
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

df_ip = pd.read_csv('./data_lstm.csv')
df_timestamp = df_ip['bucket_timeline']


class FeatureDatasetTrain(Dataset):
    def __init__(self, file_name):
        df_input = pd.read_csv('./data_lstm.csv')
        df_input = df_input.drop('id_imei', axis=1)
        df_input = df_input.drop('bucket_timeline', axis=1)

        # 2/3 of the dataset for training
        x = df_input.values[0:round(df_input.shape[0]/3*2), :]
        # Generate Labels by value ranges
        for j in range(df_input.shape[0]):
            tmp_float = df_input.iloc[j]['some_value_2']

            if tmp_float < 500.0:
                df_input.at[j, 'label'] = 0
            elif tmp_float >= 500.0 and tmp_float < 600.0:
                df_input.at[j, 'label'] = 1
            else:
                df_input.at[j, 'label'] = 2

        # label of the first 2/3 of the dataset as y
        y = df_input.iloc[0:round((df_input.shape[0]/3*2)), -1].values


        sc = StandardScaler()
        x_train = sc.fit_transform(x)
        y_train = y

        # 256*11
        self.X_train = torch.tensor(x_train, dtype=torch.float32)
        self.y_train = torch.tensor(y_train, dtype=torch.long)




    def __len__(self):
        return len(self.y_train)

    def __getitem__(self, idx):
        return self.X_train[idx], self.y_train[idx]


class FeatureDatasetTest(Dataset):
    def __init__(self, file_name):
        df_input = pd.read_csv('./data_lstm.csv')
        df_input = df_input.drop('id_imei', axis=1)
        df_input = df_input.drop('bucket_timeline', axis=1)

        # 1/3 of the dataset for testing
        x = df_input.values[(round(df_input.shape[0] / 3 * 2)+1):, :]
        # Label anhand von Wertebereichen generieren
        for j in range(df_input.shape[0]):
            tmp_float = df_input.iloc[j]['some_value_2']

            if tmp_float < 500.0:
                df_input.at[j, 'label'] = 0
            elif tmp_float >= 500.0 and tmp_float < 600.0:
                df_input.at[j, 'label'] = 1
            else:
                df_input.at[j, 'label'] = 2


        # label of the last 1/3 of the dataset as y
        y = df_input.iloc[(round(df_input.shape[0] / 3 * 2))+1:, -1].values

        sc = StandardScaler()
        x_test = sc.fit_transform(x)
        y_test = y

        self.X_test = torch.tensor(x_test, dtype=torch.float32)
        self.y_test = torch.tensor(y_test)

    def __len__(self):
        return len(self.y_test)

    def __getitem__(self, idx):
        return self.X_test[idx], self.y_test[idx]

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Hyper-Parameter
input_size = 11 
hidden_size = 70
num_classes = 3 # number of labels
num_epochs = 100
batch_size_train = round((df_ip.shape[0]/3*2)) # 171
batch_size_test = round(df_ip.shape[0] - round(df_ip.shape[0]/3*2)) # 85
learning_rate = 4


train_dataset = FeatureDatasetTrain('./data_lstm.csv')
test_dataset = FeatureDatasetTest('./data_lstm.csv')

# print(y_train)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size_train, shuffle= True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size_test, shuffle= False)



class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)

        return out


model = NeuralNet(input_size, hidden_size, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

n_total_steps = len(train_loader)


for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):

        images = images.view(images.size(0), -1).to(device)

        labels = labels.to(device)

        # forward
        outputs = model(images)
        #loss = nn.NLLLoss()
        loss = criterion(outputs, labels)

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

# test
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        #images = images.reshape(-1, input_size).to(device)

        images = images.view(images.size(0), -1).to(device)

        #images.to(device)
        labels = labels.to(device)
        outputs = model(images)

        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    print(n_samples)
    print(n_correct)
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Thanks a lot until here