Using dataloader with single image tensor

Jbrec · May 6, 2021, 8:35pm

Hi there, I’m having an issue with my CNN returning a tensor instead of a image label. I take a single frame from my video, run facial_recognition on it, then when I try to pass to my CNN it returns a tensor to me when it should be returning a label. Any help would be appreciated.

#This library taken from https://face-recognition.readthedocs.io/en/latest/face_recognition.html
import face_recognition
import cv2
from PIL import Image

def emotion_recognition2(image):
  
  outline = None
  image = image

  # Find all the faces with HOG model.

  face_locations = face_recognition.face_locations(image)

  if len(face_locations) > 0:
    faces = []
    area = []
    for face_location in face_locations:

        # Find the location of each face in this image
        top, right, bottom, left = face_location
        # We append all the face coordinates to an array
        faces.append(image[top:bottom, left:right])
        #we only want to return one face, the one with the largest area
        area.append(abs((right-left)*(top-bottom)))
    rectangle_index = area.index(max(area))
    biggest_face = faces[rectangle_index]
    pil_image = Image.fromarray(biggest_face)

    return pil_image
  else:
    return None

test = video[499,:,:,:]

face = emotion_recognition2(test)

def emotion_check(image):
  dataloader = transforms.Compose([transforms.ToTensor(),
                               transforms.Resize((100, 100)),
                               transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
  image = dataloader(image)
  image = Variable(image, requires_grad=True)
  image = image.unsqueeze(0)
  return image

def predict_expression(image):
  raw = emotion_check(image)
  output = model_resnet34(raw.cuda())
  _, prediction = torch.max(output, 1)
  return prediction

emotion_check(face)

which returns me:

tensor([[[[-0.5398, -0.6200, -0.6836, …, -0.8040, -0.7873, -0.7488], [-0.5480, -0.6206, -0.7035, …, -0.7887, -0.7798, -0.7190], [-0.5321, -0.6105, -0.6945, …, -0.7523, -0.7386, -0.6872], …, [-0.3604, -0.3647, -0.3716, …, -0.8795, -0.8835, -0.8973], [-0.3755, -0.3552, -0.3524, …, -0.8843, -0.8929, -0.9059], [-0.3977, -0.3449, -0.3247, …, -0.8843, -0.8929, -0.9059]], [[-0.7023, -0.7768, -0.8169, …, -0.8710, -0.8582, -0.8380], [-0.7106, -0.7775, -0.8369, …, -0.8672, -0.8651, -0.8367], [-0.6947, -0.7674, -0.8278, …, -0.8435, -0.8382, -0.8127], …, [-0.6051, -0.6029, -0.6029, …, -0.1982, -0.1892, -0.1835], [-0.5984, -0.5782, -0.5610, …, -0.2029, -0.1986, -0.1922], [-0.5624, -0.5096, -0.4816, …, -0.2029, -0.1986, -0.1922]], [[-0.7843, -0.8474, -0.8953, …, -0.9637, -0.9473, -0.9300], [-0.7925, -0.8481, -0.9153, …, -0.9613, -0.9578, -0.9230], [-0.7766, -0.8380, -0.9062, …, -0.9464, -0.9337, -0.9147], …, [-0.7366, -0.7324, -0.7137, …, 0.0499, 0.0604, 0.0596], [-0.7126, -0.6903, -0.6575, …, 0.0451, 0.0510, 0.0510], [-0.6600, -0.5959, -0.5522, …, 0.0451, 0.0510, 0.0510]]]], grad_fn=)

Jbrec · May 7, 2021, 4:05pm

Solved!

def emotion_check(image):

  dataloader = transforms.Compose([transforms.ToTensor(),

                               transforms.RandomHorizontalFlip(),

                               transforms.Resize((100, 100)),

                               transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])

  """load image, returns cuda tensor"""

  # image = Image.open(image_name)

  image = dataloader(image).float()

  image = Variable(image, requires_grad=True)

  image = image.unsqueeze(0)  

  return image.cuda()  

def expression(image):

  image = emotion_check(image)

  labeled_face = model_resnet34(image)

  _, prediction = torch.max(labeled_face, 1)

  return class_names[prediction]