Hello everyone,
I’m trying to run classification on the CIFAR10 dataset using a custom CNN which looks like this:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv = nn.Sequential(nn.Conv2d(3, 16, kernel_size=3, stride=2),
nn.BatchNorm2d(16), nn.ReLU(inplace=True),
nn.Conv2d(16, 32, kernel_size=3, stride=2),
nn.BatchNorm2d(32), nn.ReLU(inplace=True),
nn.Conv2d(32, 64, kernel_size=3, stride=2),
nn.BatchNorm2d(64), nn.ReLU(inplace=True),
nn.Conv2d(64, 10, kernel_size=3),
nn.BatchNorm2d(10), nn.Flatten())
def forward(self, x):
x = self.conv(x)
return x
The training and test goes well, I’m getting around 73% accuracy. Then I save the model using torch.save(model.state_dict(), save_path)
.
From another script I load the model like this:
def load_model(path):
model = Net()
model.load_state_dict(torch.load(path))
print(model)
model.eval()
return model
Thing is, I want to do detection on CIFAR10 but directly from images without using DataLoader or Dataset classes. I saved images from the DataLoader into class_i.png
files and then load them like this:
def load_imgs(path):
imgs = []
labels = []
img_files = os.listdir(path)
for i in range(len(img_files)):
label = re.search(r"\w*(?=_)", img_files[i]).group(0)
img_path = os.path.join(path, img_files[i])
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
img = np.asarray(img)
imgs.append(img)
labels.append(label)
return imgs, labels
Finally, I’m running classification on these images by firstly converting them to tensors:
def classification(model, imgs):
if torch.cuda.is_available():
print(f"Using CUDA device {torch.cuda.get_device_name(0)}")
device = torch.device("cuda:0")
else:
print("No CUDA device found, using CPU")
device = torch.device("cpu")
to_tensor = torchvision.transforms.ToTensor()
tensor_imgs = [to_tensor(img).float() for img in imgs]
pred = []
with torch.no_grad():
for img in tensor_imgs:
img = img.to(device)
model = model.to(device)
output = model(img[None, ...])
pred.append(output.argmax(dim=1, keepdim=True).cpu().squeeze().item())
return pred
But when comparing predicted classes with the original labels, I’m getting less than 25% of accuracy. My guess is that the problem is somewhere around the way I pass images for detection. Unfortunately, I am strictly limited to the OpenCV library for loading the images.
What would be the correct way to run classification on imported images without using data loaders?