Expected 4-dimensional input for 4-dimensional weight [64, 3, 3, 3], but got 3-dimensional input of size [3, 112, 112] instead

Archy_dragon · August 7, 2020, 10:22am

Hi when i try to run my train code this error pop up and im not sure how to fix it this is my code

##########################################################################################################
#try to train
if __name__ == '__main__':

    #set_trace()
    args = edict({
        'operation' : 'train',
        'feature_file' : None,
         'result_sample_path' : None,
         'gpu' : 'GPU',
         'path_image_query' : None,
         'query_label' : 'Query label',
         'dataset' : None,
         'specific_dataset_folder_name' : 'lfw',
         'img_extension' : 'jpg',
         'preprocessing_method' : 'sphereface',
         'model_name' : 'mobiface',
         'batch_size' : 3,
         'image_query':'/content/drive/My Drive/recfaces13/recfaces/datasets/LFW',
         'train':True,
         'device':'cuda'
})
    print(args)

    # selecting the size of the crop based on the network
    if args.model_name == 'mobilefacenet' or args.model_name == 'sphereface':
        crop_size = (96, 112)
    elif args.model_name == 'mobiface' or args.model_name == 'shufflefacenet':
        crop_size = (112, 112)
    elif args.model_name == 'openface':
        crop_size = (96, 96)
    elif args.model_name == 'facenet':
        crop_size = (160, 160)
    else:
        raise NotImplementedError("Model " + args.model_name + " not implemented")

    if args.dataset is not None:
        # process whole dataset
        assert args.specific_dataset_folder_name is not None, 'To process a dataset, ' \
                                                              'the flag --specific_dataset_folder_name is required.'
        process_dataset(args.operation, args.model_name, args.batch_size,
                        args.dataset, args.specific_dataset_folder_name,
                        args.img_extension, args.preprocessing_method, crop_size,
                        args.result_sample_path, args.feature_file)
    #elif args.image_query is not None:
        # process unique image
      #  dataset = ImageDataLoader(args.image_query, args.preprocessing_method,
      #                            crop_size, args.operation == 'extract_features')
      #  dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, drop_last=False)
      #  features = None
    elif args.operation == 'train':
      ##########set_trace()

      net = load_net('mobilefacenet', 'gpu')
      net = net.cuda()
      model_name=args.model_name
      
      dataset = LFW(args.image_query,args.specific_dataset_folder_name, args.img_extension, args.preprocessing_method, crop_size)
      dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, drop_last=False)

      
    #  data_counter_per_class = np.zeros((len(dataloader)))
    #  for i in range(len(dataloader)):
     #   path = os.path.join('image_query', dataloader[i])
    #    files = get_files_from_folder(path)
    #    data_counter_per_class[i] = len(files)
    #    test_counter = np.round(data_counter_per_class * (1 - train_ratio))
      #dataloader1=dataloader.split(',')
      #train,test=train_test_split(dataloader,test_size=0.2)
      #trainloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2, drop_last=False)
     # testloader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=False, num_workers=2, drop_last=False) //create path//
     #create array of data path split that data path and 
      features = None

      if args.feature_file is not None and os.path.isfile(args.feature_file):
            features = scipy.io.loadmat(args.feature_file)      
      epoch = 2
      criterion = nn.CrossEntropyLoss()
      optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
      train_loss = list()
      #set_trace()
      
      for i, data in enumerate(dataloader):
        
        inpss, labs = data
        for inps in inpss:
          #set_trace()
          inps, labs = inps.cuda(args['device']), labs.cuda(args['device'])
          inps.squeeze_(0)
          labs.squeeze_(0)
          inps = Variable(inps).cuda(args['device'])
          labs = Variable(labs).cuda(args['device'])
          optimizer.zero_grad()
          outs = net(inps)
          soft_outs = F.softmax(outs, dim=1)
          prds = soft_outs.data.max(1)[1]
          loss = criterion(outs, labs)
          loss.backward()
          optimizer.step()
          prds = prds.squeeze_(1).squeeze_(0).cpu().numpy()
          inps_np = inps.detach().squeeze(0).cpu().numpy()
          labs_np = labs.detach().squeeze(0).cpu().numpy()
          train_loss.append(loss.data.item())
          print('[epoch %d], [iter %d / %d], [train loss %.5f]' % (epoch, i + 1, len(train_loader), np.asarray(train_loss).mean()))

and this is my dataload

class LFW(object):
    def __init__(self, root, specific_folder, img_extension, preprocessing_method=None, crop_size=(96, 112)):
        """
        Dataloader of the LFW dataset.

        root: path to the dataset to be used.
        specific_folder: specific folder inside the same dataset.
        img_extension: extension of the dataset images.
        preprocessing_method: string with the name of the preprocessing method.
        crop_size: retrieval network specific crop size.
        """

        self.preprocessing_method = preprocessing_method
        self.crop_size = crop_size
        self.imgl_list = []
        self.classes = []
        self.people = []
        self.model_align = None

        # read the file with the names and the number of images of each people in the dataset
        with open(os.path.join(root, 'people.txt')) as f:
            people = f.read().splitlines()[1:]

        # get only the people that have more than 20 images
        for p in people:
            p = p.split('\t')
            if len(p) > 1:
                if int(p[1]) >= 20:
                    for num_img in range(1, int(p[1]) + 1):
                        self.imgl_list.append(os.path.join(root, specific_folder, p[0], p[0] + '_' +
                                                           '{:04}'.format(num_img) + '.' + img_extension))
                        self.classes.append(p[0])
                        self.people.append(p[0])

        le = preprocessing.LabelEncoder()
        self.classes = le.fit_transform(self.classes)

        print(len(self.imgl_list), len(self.classes), len(self.people))

    def __getitem__(self, index):
        imgl = imageio.imread(self.imgl_list[index])
        cl = self.classes[index]

        # if image is grayscale, transform into rgb by repeating the image 3 times
        if len(imgl.shape) == 2:
            imgl = np.stack([imgl] * 3, 2)

        imgl, bb = preprocess(imgl, self.preprocessing_method, crop_size=self.crop_size,
                              is_processing_dataset=True, return_only_largest_bb=True, execute_default=True)

        # append image with its reverse
        imglist = [imgl, imgl[:, ::-1, :]]

        # normalization
        for i in range(len(imglist)):
            imglist[i] = (imglist[i] - 127.5) / 128.0
            imglist[i] = imglist[i].transpose(2, 0, 1)
        imgs = [torch.from_numpy(i).float() for i in imglist]

        return imgs, cl, imgl, bb, self.imgl_list[index], self.people[index]

    def __len__(self):
        return len(self.imgl_list)

user_123454321 · August 8, 2020, 10:58pm

There must be batch dimension as well, so your input should have size [1, 3, 112, 112]. For this you can change net(inps) to net(inps[None, ...])…

Archy_dragon · August 9, 2020, 7:58pm

Hi can you explain what do you mean by that?

Archy_dragon · August 9, 2020, 8:01pm

how to chnge the size ? i really have no idea can you help

user_123454321 · August 9, 2020, 10:41pm

Can you change
outs = net(inps)
to

outs = net(inps[None, ...])

Archy_dragon · August 9, 2020, 10:54pm

hi thank i change it and now the error become
Given groups=1, weight of size [64, 3, 3, 3], expected input[1, 112, 112, 3] to have 3 channels, but got 112 channels instead

Archy_dragon · August 9, 2020, 10:55pm

do you know how would i able to fix this? sorry just im not quite good with python

Archy_dragon · August 9, 2020, 11:08pm

si just wonder what it mean by 4dimension?

user_123454321 · August 10, 2020, 7:07am

Hey can you send all your code in a file maybe? That is if it is not confidential.

Archy_dragon · August 10, 2020, 2:35pm

yes that will be fine
i work on google colab

this is my link to file
https://colab.research.google.com/drive/1kw_LGQIPsosuxnlZuLy585LnhYp3idEO?usp=sharing

and this is the whole file
https://drive.google.com/drive/folders/1EhNbxNx707irJUGH_3DgS2kzFUMrBUmc?usp=sharing

Archy_dragon · August 10, 2020, 2:37pm

im doing my master and this is my dissertation project i cant move on at all and start write it error keep pop up and i have no idea what went wrong

user_123454321 · August 10, 2020, 3:23pm

I had to do a lot of changes to make it start training…

      for i, data in enumerate(dataloader):
        
        inps, labs = data
        inps, labs = inps.cuda(args['device']), labs.cuda(args['device'])

        inps = Variable(inps).cuda(args['device'])
        labs = Variable(labs).cuda(args['device'])
        optimizer.zero_grad()
        outs = net(inps.permute(0, 3, 1, 2).float())
        soft_outs = F.softmax(outs, dim=1)
        prds = soft_outs.data.max(1)[1]
        loss = criterion(outs, labs)
        loss.backward()
        optimizer.step()
        prds = prds.cpu().numpy()
        inps_np = inps.detach().cpu().numpy()
        labs_np = labs.detach().cpu().numpy()
        train_loss.append(loss.data.item
                        ())
        print('[epoch %d], [iter %d / %d], [train loss %.5f]' % (epoch, i + 1, len(dataloader), np.asarray(train_loss).mean()))

Archy_dragon · August 10, 2020, 3:26pm

so am i correct replace this in to training part

Archy_dragon · August 10, 2020, 3:28pm

thank you so much how can i repay you. you really help me so much. Just wonder my dataloader part is ok am i correct

Archy_dragon · August 11, 2020, 12:24pm

just wonder can you explain to me how you fix the 122 channel?

user_123454321 · August 11, 2020, 12:29pm

Ah…the 112 are not channels but height (and width) of the image. There are only 3 channels in the image ( red, green and blue). Pytorch expects channels to be the 1st dimension (0th dimension is batch dimension), 2nd dimension to be height and 3rd to be width…So we need to transpose the image to get channels first…

Archy_dragon · August 11, 2020, 12:52pm

am i correct this line of code that the defined thos RGB
outs = net(inps.permute(0, 3, 1, 2).float())

Archy_dragon · August 11, 2020, 1:05pm

just quick question to test this module can you give some suggestion how would i able to save this result so i can use it in testing stage? just right now im not sure how would i use it for testing

user_123454321 · August 11, 2020, 1:08pm

You can save a model in pytorch using the torch.save function like this

torch.save(net.state_dict(), 'my_model.pth')

Archy_dragon · August 11, 2020, 1:47pm

just quick question when test my model do i actually need to save my module or not nessesery? im pretty new to this idea so im not quite sure do i need to save it or not really