Hello, I use pretrained resnet-101 for feature extraction at last convolution layer [MxNx2048], and do average pooling, normalization, then use cosine similarity for retrieval, but the result is extremely low compare to keras. I also tried vgg16, same thing happens.
In my code, It doesn’t have the image resize, because it might low the performance for retrieval.
Compare to other platform, pytorch is very easy to use, and I love it.
It has confused my for days, please help me, I am beginner for pytorch, thank you very very much!
God bless you!
import torch
import torch.nn as nn
from torchvision import models
from torchvision import transforms
from sklearn.preprocessing import normalize
from scipy.misc import imread
import numpy as np
torch.cuda.set_device(0)
resnet101 = models.resnet101(pretrained=True)
models = list(resnet101.children())[:-2]
resnet101 = nn.Sequential(*models)
resnet101 = resnet101.cuda()
for p in resnet101.parameters():
p.requires_grad = False
resnet101.eval()
if __name__ == '__main__':
input_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
transform = transforms.Compose([transforms.ToTensor(), input_normalize])
L = 2048
ave_Result = []
for kk in range(len(All_Query)): # All_Query is the list for image address
print(kk)
QFile = All_Query[kk]
Qimg = imread('Image Folder Address' + QFile, mode='RGB')
Qimg = transform(Qimg).cuda()
Qimg = torch.unsqueeze(Qimg, 0)
outputQ = resnet101(Qimg).cuda()
outputQ = outputQ.cpu().numpy()[0]
P5A = np.transpose(outputQ,(1,2,0))
P5A = np.float64(P5A)
P5A = np.reshape(P5A,(np.shape(P5A)[0]*np.shape(P5A)[1],L))
ave_DPA = np.mean(P5A,axis=0) # average pooling
ave_DPA = np.reshape(ave_DPA,(1,2048))
ave_DPA = normalize(ave_DPA,axis=0)
ave_Result.append(ave_DPA)