I’m trying to build a CNN for a 100 class 3 channel images, but I’m getting the error: “RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 1”
I tried to flatten the output with dim_start=1, but the loss was increasing with each mini-batch until NaN.
Code:
import os
import sys
from google.colab import drive
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision as tv
from torchvision.transforms import v2
DRIVE_DEFAULT_PATH = ‘/content/drive’
if not os.path.exists(DRIVE_DEFAULT_PATH):
drive.mount(DRIVE_DEFAULT_PATH)
DRIVE_DEFAULT_PATH = DRIVE_DEFAULT_PATH + ‘/MyDrive’
CLASS_DEFAULT_PATH = ‘/RNP’
ASSIGNMENT_PATH = ‘/Trabalho 01/Sports’
WORK_PATH = DRIVE_DEFAULT_PATH + CLASS_DEFAULT_PATH + ASSIGNMENT_PATH
def setLoader(path, batch_size, train):
if(train):
transforms = v2.Compose([v2.ToImage(),
v2.ToDtype(torch.float32, scale=True),
v2.Normalize(mean=[0.4713, 0.4699, 0.4548], std=[0.3081, 0.3020, 0.2961])])
return torch.utils.data.DataLoader(tv.datasets.ImageFolder(WORK_PATH + path, transform=transforms),
batch_size=batch_size,
shuffle=True,
num_workers=0)
class CNN(nn.Module):
def init(self):
super().init()
self.step1 = nn.Sequential(nn.Conv2d(3,400,3,padding=1),
nn.ReLU(),
#2nd
nn.Conv2d(400,400,5,padding=2),
nn.ReLU(),
nn.MaxPool2d(3),
#3rd
nn.Conv2d(400,400,3,padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
#4th
nn.Conv2d(400,200,7),
nn.ReLU(),
nn.MaxPool2d(2),
#5th
nn.Conv2d(200,100,5,padding=2),
nn.ReLU(),
nn.MaxPool2d(5)
)
def forward(self, x):
return self.step1(x)
def train():
lr = 0.3
num_epochs = 1
model = CNN()
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), lr=lr)
for epoch in range(num_epochs):
train_loader = setLoader(“/train”, 2, True)
for batch, (X,y) in enumerate(train_loader):
optim.zero_grad()
y_hat = model(X)
print(y_hat.shape)
print(‘-’)
print(y)
loss = loss_fn(y_hat,y)
print(f’Batch: {batch}, Loss: {loss}')
loss.backward()
optim.step()
train()