Cannot train CIFAR10 with automatically generated models.

Background

Hello, I am Dan . My language is not English. if my English is difficult to understand, please tell me that. I rewrite about this.

I’m trying to implement a program that automatically creates machine learning models using Pytorch with Colab. I have completed the code to automatically create the model, but I am getting an error code when evaluating the automatically generated model with CIFAR10.

ErroeCode

This error happen in loss = criterion(outputs, labels)

RuntimeError: 1only batches of spatial targets supported (3D tensors) but got targets of size: : [64]

The source code in Problem

I compile this code in Colab.
・“MakeModel” function automatically creates a model and returns nn.modules.container.Sequential.
・ net = MakeModel([] , True , 12) to create a four-layer model with three elements in each layer.
・Train_dataset , val_dataset = Make_CIFAR10_Dataset(1000) to divide the training dataset into 1000 and 49000 evaluation sets.

import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
import random
import os
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim

import torchvision
from torchvision import models
import torchvision.transforms as transforms
from torchvision import datasets

# Can all elements of a given list be converted to int type? Are the numbers within the range 0~4?
def CheckList(LIST):

    Objlis = []

    for List_Element in LIST:

        T_F_str = isinstance(List_Element, str)

        if (T_F_str == False):

            if (0 <= List_Element <= 4):
                Objlis.append(List_Element)
            else:
                sys.exit("Invalid Number was Enterd")

        else:
            try:
                List_Element = int(List_Element)

                if (0 <= List_Element <= 4):
                    Objlis.append(List_Element)
                else:
                    sys.exit("Invalid Number was Enterd")

            except ValueError:
                sys.exit("Invalid Element was Enterd")

    return Objlis

# Store the three elements and return nn sequential.
def Make_Layer(ORDER_LIS , INPUT_CHANNEL , OUTPUT_CHANNEL):

    # if ther is no conv , add the conv
    if (ORDER_LIS.count(0) == 0):
        Del_Insert_Idx = random.randrange(0,3)
        ORDER_LIS[Del_Insert_Idx] = 0

    Layer = []

    for Number in (ORDER_LIS):

        if Number == 0:
            Layer.append(nn.Conv2d(INPUT_CHANNEL , OUTPUT_CHANNEL , kernel_size=3, padding=1))

        elif Number == 1:
            Layer.append(nn.BatchNorm2d(OUTPUT_CHANNEL))

        elif Number == 2:
            Layer.append(nn.ReLU(inplace=True))

        elif Number == 3:
            Layer.append(nn.Sigmoid())


        elif Number == 4:
            # probability of an element to be zeroed. Default: 0.5
            Layer.append(nn.Dropout(0.5))

    Pytorch_Layer = nn.Sequential(*Layer)


    return Pytorch_Layer

def MakeModel(OBJLIS , TF_RANDOM , RANDOM_MODELSIZE):

    #Are there any problematic numbers/characters in ObjLis?
    ObjLis = CheckList(OBJLIS)

    #  Process to create a random model.
    if TF_RANDOM == True:
        random.seed(0)
        Criteria_List = [0,1,2,3,4]
        ObjLis = random.choices(Criteria_List, k = int(RANDOM_MODELSIZE))

    Model = []

    InputChannel = 3
    OutPutChannel = 64

    Counter = 0
    tmplis = []

    for OP_Number in ObjLis:

        # 3 is one layer, so add a layer to the model every multiple of 3
        if (Counter % 3 == 0) and (Counter != 0):

            OP_Layer = Make_Layer(tmplis , InputChannel , OutPutChannel)
            Model.append(OP_Layer)

            tmplis = []
            tmplis.append(OP_Number)

            InputChannel = OutPutChannel
            OutPutChannel = OutPutChannel * 2


        else:
            tmplis.append(OP_Number)

        Counter = Counter + 1


     #Handle list sizes that are not divisible by a multiple of 3.
    if 0< len(tmplis) < 3:
        InputChannel = OutPutChannel
        OutPutChannel = OutPutChannel * 2
        OP_Layer = (tmplis,InputChannel,OutPutChannel)
        Model.append(OP_Layer)

    Model = nn.Sequential(*Model)

    return Model

def Make_CIFAR10_Dataset(TrainSize):

    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainval_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

    n_samples = len(trainval_dataset)
    Train_Per = (TrainSize) / n_samples
    train_size = int(len(trainval_dataset) * Train_Per)
    val_size = n_samples - train_size

    train_dataset, val_dataset = torch.utils.data.random_split(trainval_dataset, [train_size, val_size])`


    print("TrainSize is " + str(train_size))
    print("ValSize is " + str(val_size))

    return train_dataset , val_dataset
# GPUの使えるか?
!nvidia-smi
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_dataset , val_dataset = Make_CIFAR10_Dataset(1000)

net = MakeModel([] , True , 12)
net.to(device)

import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

List_Answer = [] 
EpochList = [10]
BatchList = [64]

for Epoch in EpochList:

    for Batch in BatchList:

        train_loader = torch.utils.data.DataLoader(train_dataset, Batch, shuffle =True)
        testloader = torch.utils.data.DataLoader(val_dataset, Batch,shuffle=False, num_workers=2)   

        for i in range(int(Epoch)):

            running_loss = 0.0

            for i, data in enumerate(train_loader, 0):

                # get the inputs; data is a list of [inputs, labels]
                inputs, labels = data[0].to(device), data[1].to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()
                if i % 2000 == 1999:    # print every 2000 mini-batches
                    print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 2000))


        running_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for (images, labels) in testloader:
                outputs = net(images.to(device)) #for gpu
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels.to(device)).sum().item()
        print('Accuracy: {:.2f} %  TestData'.format(100 * float(correct/total)))
        List_Answer.append([Epoch , Batch , (100 * correct / total)])

        running_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for (images, labels) in train_loader:
                outputs = net(images.to(device)) #for gpu
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels.to(device)).sum().item()
        print('Accuracy: {:.2f} %  TrainData'.format(100 * float(correct/total)))
        List_Answer.append([Epoch , Batch , (100 * correct / total)])

Anyone know how to solve this problem?
Thank you very much in advance for your help!
Cheers!

This error is most likely raised by nn.CrossEntropyLoss when you are providing wrongly shaped tensors to it.
Here is a small example:

criterion = nn.CrossEntropyLoss()
output = torch.randn(2, 3, 4, requires_grad=True)
target = torch.randint(0, 3, (2, 4))

loss = criterion(output, target) # works

target = torch.randint(0, 3, (2,))
loss = criterion(output, target) # breaks

nn.CrossEntropyLoss expects a model output in the shape [batch_size, nb_classes, *] containing logits and a target in the shape [batch_size, *] containing the class indices in [0, nb_classes-1].
Note that the * stands for additional dimensions, so for e.g. a segmentation use case these would also be valid shapes:

output: [batch_size, nb_classes, height, width]
target: [batch_size, height, width]