PyTorch Model - Run prediction on OPENCV live feed

Hello!
I used this code from kaggle to train the garbage classification dataset.

I added this line on the end of the notebook to save the model:

torch.save(model.state_dict(), "q2.pt")

But, how many times I tried, I can’t seem to make it run on openCV to check the garbage that’s on the camera.

ERROR:

 return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 480, 640, 3] to have 3 channels, but got 480 channels instead

Here is the code:

import os
import torch
import torchvision
from torch.utils.data import random_split
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

data_dir  = 'dataset'
classes = os.listdir(data_dir)
print(classes)

from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms

transformations = transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor()])
dataset = ImageFolder(data_dir, transform = transformations)
train_ds, val_ds, test_ds = random_split(dataset, [1593, 176, 758])



def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch {}: train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch+1, result['train_loss'], result['val_loss'], result['val_acc']))
            
class ResNet(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        self.network = models.resnet50(pretrained=True)
        num_ftrs = self.network.fc.in_features
        self.network.fc = nn.Linear(num_ftrs, 6)
    
    def forward(self, xb):
        return torch.sigmoid(self.network(xb))

model = ResNet()

FILE = "q.pt"
model.load_state_dict(torch.load(FILE)) # it takes the loaded dictionary, not the path file itself
model.eval()

def get_default_device():
    """Pick GPU if available, else CPU"""
    # if torch.cuda.is_available():
    #     return torch.device('cuda')
    # else:
    #     return torch.device('cpu')
    return torch.device('cpu')

    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

device = get_default_device()
print(device)


def predict_image(img, model):
    # xb = to_device(img.unsqueeze(0), device)
    xb = to_device(img, device)
    yb = model(xb)
    prob, preds  = torch.max(yb, dim=1)
    return dataset.classes[preds[0].item()]


img, label = test_ds[17]
print(img.shape)
# plt.imshow(img.permute(1, 2, 0))
# print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

import cv2
import numpy as np
from PIL import Image
from keras import models

labels = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
video = cv2.VideoCapture(1)

while True:
    _, frame = video.read()
    
    cv2.imshow("Prediction", frame)
    key = cv2.waitKey(1)
    if key == ord('q'):
        break
    elif cv2.waitKey(33) == ord('a'):
        cv2.imwrite('check/_check.png',frame)

        imm = cv2.imread('check/_check.png')
        imm = torch.Tensor(imm)

        q = predict_image(imm, model)
        print(q)


video.release()
cv2.destroyAllWindows()

Any inputs are appreciated. thank you!

As you can see in the error message the memory layout of your input tensor is wrong as it’s in the “channels-last” layout.
.permute the tensor to channels-first and it should work.
Assuming your input is 4-dimensional, this should work:

x = x.permute(0, 3, 1, 2).contiguous()