RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[3, 1, 224, 224] to have 3 channels, but got 1 channels instead



from __future__ import division
import argparse
import torch
from torch.utils import model_zoo
from torch.autograd import Variable
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import models
import utils
import os
import pickle
import pandas as pd
# from Lenet import *
# from Utils import *
import scipy.io
import numpy as np
import matplotlib.pyplot as plt

from data_loader import get_train_test_loader, get_office31_dataloader
from sklearn.utils import resample    

import warnings
warnings.filterwarnings("ignore")

import logging
handler=logging.basicConfig(level=logging.INFO)
lgr = logging.getLogger(__name__)

from sklearn.metrics import roc_auc_score, log_loss, roc_auc_score, roc_curve, auc,accuracy_score
from utils import accuracy, Tracker

from torchmetrics.classification import BinaryAccuracy




########################################################################

fnameand='vectors_Qv_vlen1_updated_location_variance_android.csv'
fnameios='vectors_Qv_vlen1_updated_location_variance_ios.csv'
  
  
dfand = pd.read_csv(fnameand, sep=',')
dfios = pd.read_csv(fnameios, sep=',')


dfandupsample = resample(dfand,replace=True,n_samples=len(dfios),random_state=42)

    
Xs=dfios[["location_variance0","time_spent_moving0","total_distance0","AMS0","unique_locations0","entropy0","normalized_entropy0","time_home0"]]
ys = dfios[['finallabel']]        
# changing labels to 1 or 0
ys.loc[ys["finallabel"] == "improved", "finallabel"] = 0
ys.loc[ys["finallabel"] == "nonImproved", "finallabel"] = 1

ys=np.array(ys).astype("float32")



Xt=dfandupsample[["location_variance0","time_spent_moving0","total_distance0","AMS0","unique_locations0","entropy0","normalized_entropy0","time_home0"]]
yt = dfandupsample[['finallabel']]    

yt.loc[yt["finallabel"] == "improved", "finallabel"] = 0
yt.loc[yt["finallabel"] == "nonImproved", "finallabel"] = 1

yt=np.array(yt).astype("float32")


trainX, trainY = Xs, ys
targetX,targetY=Xt,yt


print (trainX.shape,trainY.shape,targetX.shape,targetY.shape)

########################################################################################

def XnumpyToTensor(x_data_np):
    x_data_np = np.array(x_data_np.values, dtype=np.float32)        
    print(x_data_np.shape)
    print(type(x_data_np))
    
    #x_data_np.reshape(1,209,8)

    lgr.info ("Using the CPU")
    X_tensor = Variable(torch.from_numpy(x_data_np)) # Note the conversion for pytorch
    
    print(type(X_tensor.data)) # should be 'torch.cuda.FloatTensor'            
    print((X_tensor.data.shape)) # torch.Size([108405, 29])
    return X_tensor



def YnumpyToTensor(y_data_np):    
    y_data_np=y_data_np.reshape((y_data_np.shape[0],1)) # Must be reshaped for PyTorch!
    print(y_data_np.shape)
    print(type(y_data_np))


    lgr.info ("Using the CPU")                   
    Y_tensor = Variable(torch.from_numpy(y_data_np)).type(torch.FloatTensor)  # BCEloss requires Float        

    print(type(Y_tensor.data)) # should be 'torch.cuda.FloatTensor'
    print(y_data_np.shape)
    print(type(y_data_np))    
    return Y_tensor











#######################################################################################
use_cuda=False
X_tensor_train= XnumpyToTensor(trainX) # default order is NBC for a 3d tensor, but we have a 2d tensor
X_shape=X_tensor_train.data.size()



DEBUG_ON=False

def debug(x):
    if DEBUG_ON:
        print ('(x.size():' + str (x.size()))

##########################################################################################



class Net22(nn.Module):
    def __init__(self, num_classes: int = 2, dropout: float = 0.5):
        super(Net22, self).__init__()

        #_log_api_usage_once(self)
        self.features = nn.Sequential(
            nn.Conv1d(2, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
            nn.Conv1d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
            nn.Conv1d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x








#  #--------------------------------------------------------------------------------------
# # Dimensions



net = Net22()
   
print("model description---")
print(net)





##################################################################################
import time
start_time = time.time()    
epochs=20
all_losses = []

X_tensor_train= XnumpyToTensor(trainX)
Y_tensor_train= YnumpyToTensor(trainY)




X_tensor_target= XnumpyToTensor(targetX)
Y_tensor_target= YnumpyToTensor(targetY)





#################################################################

def train(model, epoch, param):
    discriminative_loss_param=param[0]
    domain_loss_param=param[1]
    adver_loss_param=param[2]
    
    result = []
    
    source_out,target_out = net(X_tensor_train),net(X_tensor_target)
    

#----------------------------------------------------------------



if __name__=='__main__':   
    discriminative_loss_param = 0.01 ##0.03 for InstanceBased method, 0.01 for CenterBased method
    domain_loss_param = 8
    adver_loss_param=0
    param=[discriminative_loss_param, domain_loss_param,adver_loss_param]
    
    training_statistic = []
    testing_s_statistic = []
    testing_t_statistic = []
    
    
    final_res=[]
    tracker = Tracker()
    tuf=[]
    accuracies_source = []
    accuracies_target= []
    
    
    for e in range(0,epochs):
        print("epoch===",e)
        total=0
        running_accuracy = 0.0
        correct=0

        res = train(net, e, param=param)
        
        
        

        
        


















Thanks for your reply. Here is my code with backticks.
This time I am not reshaping the inputs but still getting the same error

“RuntimeError: Given groups=1, weight of size [64, 2, 11], expected input[1, 209, 8] to have 2 channels, but got 209 channels instead”.

Don’t know how should I reshape my inputs to get this model work.

Based on the error message it seems you are still reshaping the input somewhere to 209 channels.
Using your claimed shape of:

print((X_tensor.data.shape)) # torch.Size([108405, 29])

yields another error:

model = Net22() 
x = torch.randn(108405, 29)
out = model(x)
# RuntimeError: Given groups=1, weight of size [64, 2, 11], expected input[1, 108405, 29] to have 2 channels, but got 108405 channels instead

sorry about the confusion.
My actual data shape is -

print((X_tensor.data.shape)) # torch.Size([209, 8])

Hello, I try to use chat gpt to help me fine-tune the pre-trained model of SlowFast by pytorch, But when I start training the model,and run to outputs = model(inputs_list), I get an error “RuntimeError: Given groups=1, weight of size [64, 3, 1, 7, 7], expected input[1, 4, 3, 224, 224] to have 3 channels, but got 4 channels instead”. Hope someone can tell me how to solve it. Or can tell me if there is a complete Finetuning SlowFast model by pytorch, it will be very helpful to me, thank you very much!

Here is my code:

import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
import torchvision.datasets.video_utils
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import DatasetFolder
from torchvision.datasets.folder import make_dataset
from torchvision.io import read_video
from torchvision.datasets.video_utils import VideoClips
from torchvision.transforms import Compose, RandomResizedCrop, RandomHorizontalFlip, ColorJitter, ToTensor, Normalize

import cv2
from PIL import Image, ImageTk
import os
import glob
import numpy as np
import argparse, time, logging, os, sys, math, random, shutil
import matplotlib.pyplot as plt

print(torch.version)

print(torch.version.cuda)
print(torch.backends.cudnn.version())

if(torch.cuda.is_available()):
device = torch.device(“cuda”)
print(device, torch.cuda.get_device_name(0))
else:
device= torch.device(“cpu”)
print(device)

data_transforms = {
‘train’: transforms.Compose([
transforms.Resize((224, 224)), # resize the image
transforms.RandomHorizontalFlip(),
transforms.ToTensor(), # convert the PIL image to a tensor
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]),
‘val’: transforms.Compose([
transforms.Resize((224, 224)), # resize the image
transforms.RandomHorizontalFlip(),
transforms.ToTensor(), # convert the PIL image to a tensor
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
}

Define batch size and number of workers for data loading

batch_size = 4
num_workers = 0

Load your own dataset using DataLoader

train_data = torchvision.datasets.ImageFolder(‘SlowFastDataset/dataset/train’, transform=data_transforms[‘train’])
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)

Evaluate the model

val_data = torchvision.datasets.ImageFolder(‘SlowFastDataset/dataset/val’, transform=data_transforms[‘val’])
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)

Load Slow Fast from Pytorch hub

torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
model = torch.hub.load(‘facebookresearch/pytorchvideo’, ‘slowfast_r50’, pretrained=True)
model = model.to(device)

for i in range(6):
model.blocks[i].eval()
model.blocks[6].proj = torch.nn.Linear(in_features=2304, out_features=2, bias=True)

print(model)

Learning rate decay factor

lr_decay = 0.1

Epochs where learning rate decays

lr_decay_epoch = [40, 80, 100]

Define a loss function and an optimizer

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

Train the model

num_epochs = 100
lr_decay_count = 0

train_loss = []
train_acc = []
val_loss = []
val_acc = []

for epoch in range(num_epochs):
model.train()
train_loss = 0.0
train_correct = 0.0
train_total = 0.0
start_time = time.time()

# Learning rate decay
if epoch == lr_decay_epoch[lr_decay_count]:
    trainer.set_learning_rate(trainer.learning_rate*lr_decay)
    lr_decay_count += 1

# Loop over the training data
for inputs, labels in train_loader:
    inputs = inputs.to(device)
    labels = labels.to(device)
    
    print(inputs.shape)
    print(type(inputs))  
    
    inputs = inputs.unsqueeze(1)
    print(inputs.shape)
    
    inputs = inputs.expand(-1, 16, -1, -1, -1)
    
    print(inputs.shape)
    
    inputs = inputs[:, :, :3, :, :]
    
    print(inputs.shape)
    
    inputs_list = torch.split(inputs, 1, dim=1)
    inputs_list = [tensor.squeeze(1) for tensor in inputs_list]
    
    
    
    optimizer.zero_grad()
    outputs = model(inputs_list)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
    _, predicted = torch.max(outputs.data, 1)
    train_total += labels.size(0)
    train_correct += (predicted == labels).sum().item()
train_accuracy = 100 * train_correct / train_total
train_loss /= len(train_loader)
    
    # Compute epoch statistics
epoch_time = time.time() - start_time
    
# Save training statistics
train_loss.append(train_loss)
train_acc.append(train_accuracy)

print('[Epoch %d] train accuracy=%.3f train loss=%.3f train time: %.3f' %
    (epoch + 1, train_accuracy, train_loss, epoch_time))

# Evaluate the model on the validation set
model.eval()

val_loss = 0.0
val_correct = 0.0
val_total = 0.0
start_time = time.time()
           

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()
val_accuracy = 100 * val_correct / val_total
val_loss /= len(val_loader)

# Compute epoch statistics
epoch_time = time.time() - start_time
        
# Save validation statistics
val_loss.append(val_loss)
val_acc.append(val_accuracy)
        
        
print('[Epoch %d] validation accuracy: %.3f validation loss: %.3f' % (epoch + 1, val_accuracy, val_loss))

I’m unsure which input shapes you are using but this small example should work:

model = torch.hub.load('facebookresearch/pytorchvideo', 'slowfast_r50', pretrained=False)

model.eval()
x = torch.randn(1, 3, 8, 256, 256)
y = torch.randn(1, 3, 32, 256, 256)
out = model([x, y])

Could you compare your inputs to these random ones?

hey there, I have similar issue with my code.
issue:
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 1, 150, 150] to have 3 channels, but got 1 channels instead

code:

import os
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

!pip install torchvision

os.listdir('drive/MyDrive/Dataset/dataset')

training_dataset_path = 'drive/MyDrive/Dataset/dataset/train'
test_dataset_path = 'drive/MyDrive/Dataset/dataset/val'

!pip install numpy

import numpy as np

train_transforms = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    # since data is already normalized so need of it our case.
    ])
test_transforms = transforms.Compose([
    transforms.Resize((150,150)),
    # since data is already normalized so need of it our case.
    ])

def npy_loader(path):
    sample = torch.from_numpy(np.load(path))
    return sample


train_dataset = torchvision.datasets.DatasetFolder(
    root = training_dataset_path,
    loader = npy_loader,
    extensions=['.npy'],
    transform = train_transforms
)

test_dataset = torchvision.datasets.DatasetFolder(
    root = training_dataset_path,
    loader = npy_loader,
    extensions=['.npy'],
    transform = test_transforms
)

  # Define the train_dataset here
def show_transformed_images(dataset):
    loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    batch = next(iter(loader))
    images, labels = batch

    grid = torchvision.utils.make_grid(images, nrow= 3)
    plt.figure(figsize=(11,11))
    plt.imshow(np.transpose(grid, (2,1,0)))
    print('labels:', labels)

show_transformed_images(train_dataset)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

def set_device():
    if torch.cuda.is_available():
        dev = "cuda:0"
    else:
        dev = "cpu"
    return torch.device(dev)

def train_nn(model, train_loader, test_loader, criterion, optimizer, n_epochs):
    device = set_device()

    for epoch in range(n_epochs):
        print("Epoch number %d" %(epoch +1))
        model.train()
        running_loss = 0.0
        running_correct = 0.0
        total = 0

    for data in train_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        total += labels.size(0)

        optimizer.zero_grad()

        outputs = model(images)

        _,predicted =  torch.max(outputs.data, 1)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        running_correct += (labels==predicted).sum().item()

    epoch_loss = running_loss/len(train_loader)
    epoch_acc = 100.0 * running_correct / total

    print("   - Training dataset. Got %d out of %d images correctly (%.3f%%). Epoch loss: %.3f"
         % (running_correct, total, epoch_acc, epoch_loss))

    evaluate_model_on_test_set(model, test_loader)

    print("Finished")
    return model

def evaluate_model_on_test_set(model, test_loader):
    model.eval()
    predicted_correctly_on_epoch = 0
    total = 0
    device =set_device()

    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            total += labels.size(0)

            outputs = model(images)

            _,predicted =  torch.max(outputs.data, 1)

            predicted_correctly_on_epoch += (predicted == labels).sum().item()

    epoch_acc = 100.0 * predicted_correctly_on_epoch / total
    print("   - Testing dataset. Got %d out of %d images correctly (%.3f%%)"
         % (predicted_correctly_on_epoch, total, epoch_acc))


import torchvision.models as models
import torch.nn as nn
import torch.optim as optim

resnet18_model = models.resnet18(pretrained = False)
num_ftrs = resnet18_model.fc.in_features
number_of_classes = 10
resnet18_model.fc = nn.Linear(num_ftrs, number_of_classes)
device = set_device()
resnet18_model = resnet18_model.to(device)
loss_fn = nn.CrossEntropyLoss()

optimizer = optim.SGD(resnet18_model.parameters(), lr = 1, momentum=1, weight_decay=1)

train_nn(resnet18_model, train_loader, test_loader, loss_fn, optimizer, 150)

my dataset is in .npy format and I have to do image classification

The error is raised since you are passing grayscale inputs to a model expecting RGB inputs with 3 channels. You could transform your samples via torchvision.transforms.Grayscale using num_output_channels=3 assuming all your samples are already in the grayscale format.