RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7], but got 3-dimensional input of size [1, 384, 384] instead

I have two codes here!..the training and the prediction code.

model.py

import pandas as pd 
import numpy as np

import torch
import torch.nn as nn

import os

import cv2

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import albumentations as A

from torch.utils.data import TensorDataset, DataLoader,Dataset
from torchvision import models
from collections import defaultdict
from torch.utils.data.sampler import RandomSampler
import torch.optim as optim
from torch.optim import lr_scheduler
from sklearn import model_selection
from tqdm import tqdm
from sklearn.metrics import classification_report

 # including both 2020 and 2019 data    

# generate data from csv file
class Build_dataset(Dataset):
        def __init__(self, csv, split, mode, transform=None):
            self.csv = csv.reset_index(drop=True)
            self.split = split
            self.mode = mode
            self.transform = transform

        def __len__(self):
            return self.csv.shape[0]

        def __getitem__(self, index):
            row = self.csv.iloc[index]

            image = cv2.imread(row.filepath)
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            if self.transform is not None:
                res = self.transform(image=image)
                image = res['image'].astype(np.float32)
            else:
                image = image.astype(np.float32)

            image = image.transpose(2, 0, 1)
            data = torch.tensor(image).float()

            if self.mode == 'test':
                return data
            else:
                return data, torch.tensor(self.csv.iloc[index].target).long()

# training data           
def train_epoch(model, loader, optimizer,scheduler, loss_fn,device,n_examples):

    model = model.train()

    losses = []
    correct_predictions = 0

    for inputs, labels in tqdm(loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)

        _, preds = torch.max(outputs, dim=1)
        loss = loss_fn(outputs, labels)
        
        
        correct_predictions += torch.sum(preds == labels)
        losses.append(loss.item())
        
        
    
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    scheduler.step()

    return correct_predictions.double() / n_examples, np.mean(losses)

# validation data 
def val_epoch(model, loader,loss_fn, device,n_examples):

    model = model.eval()

    losses = []
    correct_predictions = 0

    with torch.no_grad():
        for inputs, labels in tqdm(loader):
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, dim=1)
            loss = loss_fn(outputs, labels)
            
            correct_predictions += torch.sum(preds == labels)
            losses.append(loss.item())

            

    return correct_predictions.double() / n_examples, np.mean(losses)

            


def train(fold, model,device, num_epochs):
    
    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)
    # generate data
    dataset_train = Build_dataset(df_train,  'train', 'train', transform=transforms_train)
    dataset_valid = Build_dataset(df_valid, 'train', 'val', transform=transforms_val)
    
    #load data 
    train_loader = DataLoader(dataset_train, batch_size = 64,sampler=RandomSampler(dataset_train), num_workers=4)
    valid_loader = DataLoader(dataset_valid, batch_size = 32,shuffle = True, num_workers= 4 )
    
    dataset_train_size = len(dataset_train)
    
    dataset_valid_size = len(dataset_valid)
    
    optimizer = optim.Adam(model.parameters(), lr = 1e-4)

    model = model.to(device)
    
    scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    loss_fn = nn.CrossEntropyLoss().to(device)
    
    history = defaultdict(list)
    
    best_accuracy = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1} / {num_epochs}')
        print ('-'*30)
        #model, loader, optimizer,scheduler, loss_fn,device,n_examples
        train_acc, train_loss = train_epoch(model, train_loader, optimizer, scheduler, loss_fn, device, dataset_train_size)
        print(f'Train loss {train_loss} accuracy {train_acc}')
        valid_acc, valid_loss = val_epoch(model, valid_loader, loss_fn, device,dataset_valid_size)
        print(f'Val   loss {valid_loss} accuracy {valid_acc}')
        print()
        
        history['train_acc'].append(train_acc)
        history['train_loss'].append(train_loss)
        history['val_acc'].append(valid_acc)
        history['val_loss'].append(valid_loss)
        
        if valid_acc > best_accuracy:
            print('saving model')
            torch.save(model.state_dict(), f'best_model_{fold}.bin')
            best_accuracy = valid_acc
        
    print(f'Best Accuracy: {best_accuracy}')
    
    model.load_state_dict(torch.load(f'best_model_{fold}.bin'))
    
    return model, history

            

if __name__ == '__main__':
    #competition data -2020
    data_dir = "../input/jpeg-melanoma-384x384"
    #competition data - 2019
    data_dir2 = "../input/jpeg-isic2019-384x384"
    # device
    device = torch.device("cuda")
    
    # augmenting images


    image_size = 384
    transforms_train = A.Compose([
        A.Transpose(p=0.5),
        A.VerticalFlip(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightness(limit=0.2, p=0.75),
        A.RandomContrast(limit=0.2, p=0.75),
        A.OneOf([
            A.MedianBlur(blur_limit=5),
            A.GaussianBlur(blur_limit=5),
            A.GaussNoise(var_limit=(5.0, 30.0)),
        ], p=0.7),

        A.OneOf([
            A.OpticalDistortion(distort_limit=1.0),
            A.GridDistortion(num_steps=5, distort_limit=1.),
            A.ElasticTransform(alpha=3),
        ], p=0.7),

        A.CLAHE(clip_limit=4.0, p=0.7),
        A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85),
        A.Resize(image_size, image_size),
        A.Cutout(max_h_size=int(image_size * 0.375), max_w_size=int(image_size * 0.375), num_holes=1, p=0.7),    
        A.Normalize()
    ])

    transforms_val = A.Compose([
        A.Resize(image_size, image_size),
        A.Normalize()
    ])
    
    transforms_test = A.Compose([
        A.Resize(image_size, image_size),
        A.Normalize()
    ])
    # create data
    df_train = pd.read_csv(os.path.join(data_dir, "train.csv"))  #/kaggle/input/siim-isic-melanoma-classification/train.csv
    df_train.head()

    df_train['is_ext'] = 0
    df_train['filepath'] = df_train['image_name'].apply(lambda x: os.path.join(data_dir, 'train', f'{x}.jpg'))

    # dataset from 2020 data
    df_train['diagnosis'] = df_train['diagnosis'].apply(lambda x: x.replace('seborrheic keratosis', 'BKL'))
    df_train['diagnosis'] = df_train['diagnosis'].apply(lambda x: x.replace('lichenoid keratosis', 'BKL'))
    df_train['diagnosis'] = df_train['diagnosis'].apply(lambda x: x.replace('solar lentigo', 'BKL'))
    df_train['diagnosis'] = df_train['diagnosis'].apply(lambda x: x.replace('lentigo NOS', 'BKL'))
    df_train['diagnosis'] = df_train['diagnosis'].apply(lambda x: x.replace('cafe-au-lait macule', 'unknown'))
    df_train['diagnosis'] = df_train['diagnosis'].apply(lambda x: x.replace('atypical melanocytic proliferation', 'unknown'))
    
    # dataset from 2019 data
    df_train2 = pd.read_csv(os.path.join(data_dir2, "train.csv"))
    df_train2 = df_train2[df_train2['tfrecord'] >= 0].reset_index(drop=True)
    df_train2['fold'] = df_train2['tfrecord'] % 5
    df_train2['is_ext'] = 1
    df_train2['filepath'] = df_train2['image_name'].apply(lambda x: os.path.join(data_dir2, 'train', f'{x}.jpg'))
    
    df_train2['diagnosis'] = df_train2['diagnosis'].apply(lambda x: x.replace('NV', 'nevus'))
    df_train2['diagnosis'] = df_train2['diagnosis'].apply(lambda x: x.replace('MEL', 'melanoma'))
    
    #concat both 2019 and 2020 data
    df_train = pd.concat([df_train, df_train2]).reset_index(drop=True)
    
    # shuffle data
    df = df_train.sample(frac=1).reset_index(drop=True)
    
    # creating 8 different target values
    new_target = {d: idx for idx, d in enumerate(sorted(df.diagnosis.unique()))}
    df['target'] = df['diagnosis'].map(new_target)
    mel_idx = new_target['melanoma']
    
    # creating 10 fold cross validation data
    df = df_train.sample(frac=1).reset_index(drop=True)
    df['kfold'] = -1
    y = df_train.target.values
    kf = model_selection.StratifiedKFold(n_splits=5,shuffle=True)
    idx = kf.get_n_splits(X=df,y=y)
    print(idx)
    for fold,(x,y) in enumerate(kf.split(X=df,y=y)):
        df.loc[y,'kfold'] = fold
    
    df = df[['filepath','diagnosis', 'target', 'is_ext', 'kfold']]
    
    class_names = list(df['diagnosis'].unique())

    

    
    
    # create model

    def create_model(n_classes):
        model = models.resnet18(pretrained=True)

        n_features = model.fc.in_features
        model.fc = nn.Linear(n_features, n_classes)
        return model.to(device)
        
    base_model = create_model(len(class_names)) # model ready
        
        
        
    # run the model
    for i in range(10):
        #train
        base_model, history = train(i, base_model, device, num_epochs = 1)


        

predict.py

import cv2
import albumentations as A
import numpy as np 
import torch
import torch.functional as F
import torch.nn as nn

from torchvision import models
from torch.utils.data import DataLoader
from tqdm import tqdm

image_size = 384

device = torch.device('cuda')
transforms_test = A.Compose([
    A.Resize(image_size, image_size),
    A.Normalize()
])

img_path = '../input/jpeg-melanoma-384x384/train/ISIC_0015719.jpg'
image = cv2.imread(img_path)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
res = transforms_test(image = image)
image = res['image'].astype(np.float32)
image = image.transpose(2, 0, 1)
data = torch.tensor(image).float()
#print(data)

test_loader = DataLoader(data, batch_size = 1, num_workers=0)
print(test_loader)

preds = []
def predict(model, loader):
    model.eval()
    model = model.to(device)
    model.load_state_dict(torch.load('../input/model-bin/model.bin'))
    for (inputs) in tqdm(loader):
        with torch.no_grad():
            outputs = model(inputs)
            preds.append(outputs.cpu())
            
            
    pred = np.vstack((preds)).ravel()
    return pred.flatten()

def create_model(n_classes):
    model = models.resnet18(pretrained=True)

    n_features = model.fc.in_features
    model.fc = nn.Linear(n_features, n_classes)
    return model.to(device)

base_model = create_model(n_classes = 9)

predict(base_model, test_loader)
    
#res = self.transform(image = image).unsqueeze(0)
#image = res['image'].astype(np.float32)
#image = image.transpose(2, 0, 1)
#data = torch.tensor(image).float()

#test_loader = torch.utils.data.DataLoader(data, batch_size = 1, shuffle = False, num_workers = 0)

The data is from a competition in kaggle(https://www.kaggle.com/c/siim-isic-melanoma-classification).

The model from the training data is saved and loaded in the prediction file.

The prediction.py takes in a single image of size (384, 384). The batch_size = 1 and num_workers is 0. The expected output should be from 0-8 since there are 9 target values.

But I am getting this error

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7], but got 3-dimensional input of size [1, 384, 384] instead

Any help is much appreciated!!

For 2d conv you need a 4 dimensional input (batch, channel, height, width). Maybe you can try by replacing

outputs = model(inputs)

with

outputs = model(inputs[None, ...])

Yes! I have seen this solution in another query. But I still don’t understand [None, …]. Could you please elaborate on it? Thanks!

Like I said in the previous post, you need 4 dimensions. You have 3 right now. The

inputs[None, ...]

is basically syntactic sugar for

inputs.unsqueeze(0)

which adds an extra dimension in the 0th position.

It has been implemented, But,
Now, I get this error!

RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 1, 384, 384] to have 3 channels, but got 1 channels instead

how do I change the channel size?

Ah, the model expects 3 channels in the input. Since, I believe the model is already trained, you can concatenate the image 3 times by adding this

image = np.concatenate([image, image, image], 0)

after

image = image.transpose(2, 0, 1)

Unfortunately, it ends up with the same error even after concatenation

RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 1, 384, 384] to have 3 channels, but got 1 channels instead

Where did you do this? Training or validation…There are 2 places…

I did this under the (predict.py) test function.The training function didn’t seem to have a problem.