I have some problems when i try to run inference.py. I tried to fix this error following similar topics but it’s still get this error. This is my code
from lib import *
from l2_norm import L2Norm
from default_box import DefBox
import torch.nn.functional as F
def create_vgg():
layers = []
in_channels = 3
cfgs = [64, 64, 'M', 128, 128, 'M',
256, 256, 256, 'MC', 512, 512, 512, 'M',
512, 512, 512]
for cfg in cfgs:
if cfg == 'M': # floor
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
elif cfg == 'MC': # ceiling
layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
else:
conv2d = nn.Conv2d(in_channels, cfg, kernel_size=3, padding=1)
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = cfg
pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
layers += [pool5, conv6, nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
return nn.ModuleList(layers)
def create_extras():
layers = []
in_channels = 1024
cfgs = [256, 512, 128, 256, 128, 256, 128, 256]
layers += [nn.Conv2d(in_channels, cfgs[0], kernel_size=1)]
layers += [nn.Conv2d(cfgs[0], cfgs[1], kernel_size=3, stride=2, padding=1)]
layers += [nn.Conv2d(cfgs[1], cfgs[2], kernel_size=1)]
layers += [nn.Conv2d(cfgs[2], cfgs[3], kernel_size=3, stride=2, padding=1)]
layers += [nn.Conv2d(cfgs[3], cfgs[4], kernel_size=1)]
layers += [nn.Conv2d(cfgs[4], cfgs[5], kernel_size=3)]
layers += [nn.Conv2d(cfgs[5], cfgs[6], kernel_size=1)]
layers += [nn.Conv2d(cfgs[6], cfgs[7], kernel_size=3)]
return nn.ModuleList(layers)
def create_loc_conf(num_classes=1,bbox_aspect_num=[4, 6, 6, 6, 4, 4]):
loc_layers = []
conf_layers = []
# source1
# loc
loc_layers += [nn.Conv2d(512, bbox_aspect_num[0] * 4, kernel_size = 3, padding=1)]
# conf
conf_layers += [nn.Conv2d(512, bbox_aspect_num[0] * num_classes, kernel_size=3, padding=1)]
# source2
# loc
loc_layers += [nn.Conv2d(1024, bbox_aspect_num[1] * 4, kernel_size=3, padding=1)]
# conf
conf_layers += [nn.Conv2d(1024, bbox_aspect_num[1] * num_classes, kernel_size=3, padding=1)]
# source3
# loc
loc_layers += [nn.Conv2d(512, bbox_aspect_num[2] * 4, kernel_size=3, padding=1)]
# conf
conf_layers += [nn.Conv2d(512, bbox_aspect_num[2] * num_classes, kernel_size=3, padding=1)]
# source4
# loc
loc_layers += [nn.Conv2d(256, bbox_aspect_num[3] * 4, kernel_size=3, padding=1)]
# conf
conf_layers += [nn.Conv2d(256, bbox_aspect_num[3] * num_classes, kernel_size=3, padding=1)]
# source5
# loc
loc_layers += [nn.Conv2d(256, bbox_aspect_num[4] * 4, kernel_size=3, padding=1)]
# conf
conf_layers += [nn.Conv2d(256, bbox_aspect_num[4] * num_classes, kernel_size=3, padding=1)]
# source6
# loc
loc_layers += [nn.Conv2d(256, bbox_aspect_num[5] * 4, kernel_size=3, padding=1)]
# conf
conf_layers += [nn.Conv2d(256, bbox_aspect_num[5] * num_classes, kernel_size=3, padding=1)]
return nn.ModuleList(loc_layers), nn.ModuleList(conf_layers)
cfg = {
"num_classes" : 2, #we only have 1 class: fire
"input_size" : 300, #SSD 300
"bbox_aspect_num" : [4, 6, 6, 6, 4, 4], # ty le cho source 1 -> 6
"feature_maps" : [38, 19, 10, 5, 3, 1],
"steps" : [8, 16, 32, 64, 100, 300], # Size of default box
"min_size" : [30, 60, 111, 162, 213, 264],
"max_size" : [60, 111, 162, 213, 264, 315],
"aspect_ratios" : [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
}
class SSD(nn.Module):
def __init__(self, phase, cfg):
super(SSD, self).__init__()
self.phase = phase
self.num_classes = cfg['num_classes']
# Create main modules
self.vgg = create_vgg()
self.extras = create_extras()
self.loc, self.conf = create_loc_conf(cfg['num_classes'], cfg['bbox_aspect_num'])
self.L2Norm = L2Norm()
# Create default box
dbox = DefBox(cfg)
self.dbox_list = dbox.create_defbox()
if phase == "inference":
self.detect = Detect()
def forward(self, x):
sources = list()
loc = list()
conf = list()
for k in range(23):
x = self.vgg[k](x)
# source 1
source1 = self.L2Norm(x)
sources.append(source1)
for k in range(23, len(self.vgg)):
x = self.vgg[k](x)
# source 2
sources.append(x)
# source 3-6
for k, v in enumerate(self.extras):
x = F.relu(v(x), inplace = True)
if k % 2 == 1:
sources.append(x)
for (x, l, c) in zip(sources, self.loc, self.conf):
# Data có dạng (batch_size, 4*aspect_ratio_num, featuremap_height, featuremap_width)
# aspect_ratio_num = 4, 6, ...
# -> (batch_size, featuremap_height, featuremap_width, 4*aspect_ratio_num)
loc.append(l(x).permute(0, 2, 3, 1).contiguous())
conf.append(c(x).permute(0, 2, 3, 1).contiguous())
loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) #(batch_size, 34928)
conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) # (btach_size, 8732)
loc = loc.view(loc.size(0), -1, 4) # (batch_size, 8732, 4)
conf = conf.view(conf.size(0), -1, self.num_classes) # (batch_size, 8732, 2)
output = (loc, conf, self.dbox_list)
if self.phase == "inference":
return self.detect(output[0], output[1], output[2])
else:
return output
def decode(loc, defbox_list):
'''
loc: [8732, 4] (delta_x, delta_y, delta_w, delta_h)
defbox_list: [8732, 4] (cx_d, cy_d, w_d, h_d)
returns: boxes[xmin, ymin, xmax, ymax]
'''
boxes = torch.cat((defbox_list[:, :2] + loc[:, :2]*defbox_list[:, 2:]),
defbox_list[:, 2:] * torch.exp(loc[:, 2:] * 0.2), dim = 1)
boxes[:, :2] -= boxes[:, 2:] / 2 # calculate x_min, y_min
boxes[:, 2:] += boxes[:, :2] / 2 # calculate x_max, y_max
return boxes
def nms(boxes, scores, overlap = 0.45, top_k = 200):
"""
boxes: [num_box, 4] # có 8732 num_box
scores: [num_box]
"""
count = 0
keep = scores.new(scores.size(0)).zero_().long()
# boxes:
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
# boxes area
area = torch.mul(x2 - x1, y2 - y1)
tmp_x1 = boxes.new()
tmp_y1 = boxes.new()
tmp_x2 = boxes.new()
tmp_y2 = boxes.new()
tmp_w = boxes.new()
tmp_h = boxes.new()
value, idx = scores.sort(0)
idx = idx[-top_k:] # lấy 200 cái bdbox có độ tự tin cao nhất
while idx.numel() > 0:
i = idx[-1] # id của box có độ tự tin cao nhất
keep[count] = i
count += 1
if id.size(0) == 1:
break
idx = idx[:, -1] # id của các boxes ngoại trừ box có độ tự tin cao nhất
#information boxes
torch.index_select(x1, 0, idx, out = tmp_x1) # lấy ra những thằng có gt trong khoảng idx trong x1
torch.index_select(y1, 0, idx, out = tmp_y1)
torch.index_select(x2, 0, idx, out = tmp_x2)
torch.index_select(y2, 0, idx, out = tmp_y2)
tmp_x1 = torch.clamp(tmp_x1, min = x1[i]) # = x1[i] nếu tmp_x1 < x[i]
tmp_y1 = torch.clamp(tmp_y1, min = y1[i])
tmp_x2 = torch.clamp(tmp_x2, max = x2[i])
tmp_y2 = torch.clamp(tmp_y2, max = y2[i]) # = y2[i] nếu tmp_y2 > y2[i]
# chuyển về tensor có size sao cho index giảm đi 1
tmp_w.resize_as_(tmp_x2)
tmp_h.resize_as_(tmp_y2)
tmp_w = tmp_x2 - tmp_x1
tmp_h = tmp_y2 - tmp_y1
tmp_w = torch.clamp(tmp_w, min = 0.0) # đưa phần tử < 0 về 0
tmp_h = torch.clamp(tmp_h, min = 0.0)
# dien tich overlap
inter = tmp_w * tmp_h # dien tich phan trung nhau
others_area = torch.index_select(area, 0, idx) # dien tich cua cac bbox con lai
union = area[i] + others_area - inter
iou = inter / union
idx = idx[iou.le(overlap)] # giu cac box co idx nho hon 0.45
return keep, count
class Detect(Function):
def __init__(self, conf_thresh = 0.01, top_k = 200, nms_thresh = 0.45):
self.softmax = nn.Softmax(dim = -1)
self.conf_thresh = conf_thresh
self.top_k = top_k
self.nms_thresh = nms_thresh
@staticmethod
def forward(self, loc_data, conf_data, dbox_list):
num_batch = loc_data.size(0)
num_dbox = loc_data.size(1) # tra ve 8732 dbox
num_class = conf_data.size(2) # tra ve so class la 1
conf_data = self.softmax(conf_data) # tinh xac suat, dang co dinh dang (btach_num, 8732, 1)
conf_preds = conf_data.transpose(2,1) # thanh (batch_num, num_class, num_dbox)
output = torch.zeros(num_batch, num_class, self.top_k, 5)
#xu li tung anh trong 1 batch
for i in range(num_batch):
# tinh bbox tu offset information va default box
decode_boxes = decode(loc_data[i], dbox_list)
# copy conf score cua anh thu i
conf_scores = conf_preds[i].clone()
for cl in range(1, num_class):
c_mask = conf_preds[cl].gt(self.conf_thresh) # chi lay nhung conf > 0.01
scores = conf_preds[cl][c_mask] # CHỖ NÀY CẦN XEM LẠI
if scores.nelement() == 0: # numel
continue
l_mask = c_mask.unsqueeze(1).expand_as(decode_boxes) # để đưa chiều về giống chiều của decode_box
boxes = decode_boxes[l_mask].view(-1, 4)
ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1)
return output
if __name__ == "__main__":
# vgg = create_vgg()
# print(vgg)
extras = create_extras()
print(extras)
loc, conf = create_loc_conf()
print(loc)
print(conf)
ssd = SSD(phase="train", cfg=cfg)
print(ssd)
and here’re inference.py
from lib import *
from model import SSD
from transform import DataTransform
classes = ["fire"]
cfg = {
"num_classes": 2, #VOC data include 20 class + 1 background class
"input_size": 300, #SSD300
"bbox_aspect_num": [4, 6, 6, 6, 4, 4], # Tỷ lệ khung hình cho source1->source6`
"feature_maps": [38, 19, 10, 5, 3, 1],
"steps": [8, 16, 32, 64, 100, 300], # Size of default box
"min_size": [30, 60, 111, 162, 213, 264], # Size of default box
"max_size": [60, 111, 162, 213, 264, 315], # Size of default box
"aspect_ratios": [[2], [2,3], [2,3], [2,3], [2], [2]]
}
net = SSD(phase="inference", cfg=cfg)
net_weights = torch.load("./weights/ssd300_20.pth", map_location={"cuda:0":"cpu"})
net.load_state_dict(net_weights)
def predict(img_file_path):
img = cv2.imread(img_file_path)
color_mean = (8, 17, 32)
input_size = 300
transform = DataTransform(input_size, color_mean)
phase = "val"
img_tranformed, boxes, labels = transform(img, phase, "", "")
img_tensor = torch.from_numpy(img_tranformed[:,:,(2,1,0)]).permute(2,0,1)
net.eval()
inputs = img_tensor.unsqueeze(0) #(1, 3, 300, 300)
output = net(inputs)
plt.figure(figsize=(10, 10))
colors = [(255,0,0), (0,255,0), (0,0,255)]
font = cv2.FONT_HERSHEY_SIMPLEX
detections = output.data #(1, 21, 200, 5) 5: score, cx, cy, w, h
scale = torch.Tensor(img.shape[1::-1]).repeat(2)
for i in range(detections.size(1)):
j = 0
while detections[0, i, j, 0] >= 0.6:
score = detections[0, i, j, 0]
pt = (detections[0, i, j, 1:]*scale).cpu().numpy()
cv2.rectangle(img,
(int(pt[0]), int(pt[1])),
(int(pt[2]), int(pt[3])),
colors[i%3], 2
)
display_text = "%s: %.2f"%(classes[i-1], score)
cv2.putText(img, display_text, (int(pt[0]), int(pt[1])),
font, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
j += 1
cv2.imshow("Result", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
img_file_path = "/home/huynth/ImageProcessing/result/Median.jpg"
predict(img_file_path)
Please help me T.T Thank you.