Teach CNN on own dataset

Hi!

I am the new one in python and pytorch. I am trying, to make a CNN on pytorch, which will be able to learn on my own dataset, but getting too much errors every time. I’ve made CNN with MNIST dataset, but i can’t make it with my own.
The error is RuntimeError: shape ‘[10, 400]’ is invalid for input of size 595360. Can anybody explain, what i am doing wrong and what should i change?
My dataset have 445 files in sum, which 333 of them is in one class, and other 112 in another. If i understood correctly, my error is somewhere in ConvNet class.
Thanks!

import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.autograd import Variable
import torchvision.transforms as transforms
import torchvision.datasets
from torchvision import models
from torchsummary import summary
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import LinearAxis, Range1d
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
num_epochs = 4
BATCH_SIZE = 10
learning_rate = 0.001
TRAIN_DATA_PATH = 'D:/dataset/train_noise/'
TEST_DATA_PATH = "D:/dataset/obychennij_dataset_noise"
TRANSFORM_IMG = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(256),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225] ),
    ])

train_data = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=TRANSFORM_IMG)
train_data_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
test_data = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=TRANSFORM_IMG)
test_data_loader  = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) 

class ConvNet(nn.Module):
  def __init__(self):

    super(ConvNet, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = 3,out_channels = 6, kernel_size=5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size=5)
    self.fc1 = nn.Linear(in_features = 16 * 5 * 5, out_features = 300) 
    self.fc2 = nn.Linear(in_features = 300,out_features =  100)
    self.fc3 = nn.Linear(in_features = 100,out_features = 2)

  def forward(self, x):

    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = x.view(x.size(0), 16*5*5)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

  def num_flat_features(self, x):

    size = x.size()[1:]
    num_features = 1
    for s in size:
      num_features *= s
    return num_features 

if __name__ == "__main__":

    print("Number of train samples: ", len(train_data))
    print("Number of test samples: ", len(test_data))
    print("Detected Classes are: ", train_data.class_to_idx)
    model = ConvNet().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 
    total_step = len(train_data_loader)
    loss_list = []
    acc_list = []
    for epoch in range(num_epochs):
        for i, (x,y) in enumerate(train_data_loader):
            b_x = Variable(x)   # batch x (image)
            b_y = Variable(y)   # batch y (target)
            output = model(b_x)[0]          
            loss = loss_func(output, b_y)   
            optimizer.zero_grad()           
            loss.backward()                 
            optimizer.step()

            if step % 50 == 0:
                test_x = Variable(test_data_loader)
                test_output, last_layer = model(test_x)
                pred_y = torch.max(test_output, 1)[1].data.squeeze()
                accuracy = sum(pred_y == test_y) / float(test_y.size(0))
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy)

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for x, y in train_data_loader:
        outputs = model(x)
        _, predicted = torch.max(outputs.data, 1)
        total += y.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the test images: {} %'.format((correct / total) * 100))

The error is raised, since the flattened activation output contains more number of features than the first linear layer (self.fc1) expects. This line of code:

x = x.view(x.size(0), 16*5*5)

creates a tensor in the shape [batch_size, 16*5*5=400], while (based on the error message) it contains 595360 features.
You could thus use:

x = x.view(x.size(0), -1)

to flatten the tensor and create self.fc1 as:

self.fc1 = nn.Linear(in_features=595360, out_features = 300) 

or alternatively resize the input tensor to a smaller spatial size than 256x256.

1 Like

Thanks @ptrblck, it helped to resolve this error, but now i get another one :sweat_smile:
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
and exception thrown at loss = loss_func(output, b_y)
What does it mean?

I’m not sure, why this indexing is needed here:

output = model(b_x)[0]    

as it would only use the first sample in the batch and will raise the error, since the batch dimension is missing.
Removing it should solve the issue, but you should check, why it was used in the first place.