This is RFCN’s definition, and RPN and VGG16 in it is also nn.Module.
class RFCN(nn.Module):
n_classes = 21
classes = np.asarray(['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor'])
PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
SCALES = (600,)
MAX_SIZE = 1000
def __init__(self, classes=None, debug=False):
super(RFCN, self).__init__()
if classes is not None:
self.classes = classes
self.n_classes = len(classes)
self.rpn = RPN()
#self.psroi_pool = PSRoIPool(7,7,1.0/16,7,15) This is for test
self.psroi_pool_cls = PSRoIPool(7,7, 1.0/16, 7, self.n_classes)
self.psroi_pool_loc = PSRoIPool(7,7, 1.0/16, 7, 8)
self.new_conv = Conv2d(512, 1024, 1, same_padding=False)
self.rfcn_score = Conv2d(1024,7*7*8, 1,1, bn=False)
self.rfcn_bbox = Conv2d(1024, 7*7*self.n_classes,1,1,bn=False)
self.bbox_pred = nn.AvgPool2d((7,7),stride=(7,7))
self.cls_score = nn.AvgPool2d((7,7),stride=(7,7))
# loss
self.cross_entropy = None
self.loss_box = None
# for log
self.debug = debug
@property
def loss(self):
# print self.cross_entropy
# print self.loss_box
# print self.rpn.cross_entropy
# print self.rpn.loss_box
return self.cross_entropy + self.loss_box * 10
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
if self.training:
roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
rois = roi_data[0]
# roi pool
conv_new1 = self.new_conv(features)
r_score_map = self.rfcn_score(conv_new1)
r_bbox_map = self.rfcn_bbox(conv_new1)
psroi_pooled_cls = self.psroi_pool_cls(r_score_map, rois)
psroi_pooled_loc = self.psroi_pool_loc(r_bbox_map, rois)
bbox_pred = self.bbox_pred(psroi_pooled_loc)
bbox_pred = torch.squeeze(bbox_pred)
cls_score = self.cls_score(psroi_pooled_cls)
cls_score = torch.squeeze(cls_score)
cls_prob = F.softmax(cls_score)
if self.training:
self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)
return cls_prob, bbox_pred, rois
def build_loss(self, cls_score, bbox_pred, roi_data):
# classification loss
label = roi_data[1].squeeze()
fg_cnt = torch.sum(label.data.ne(0))
bg_cnt = label.data.numel() - fg_cnt
# for log
if self.debug:
maxv, predict = cls_score.data.max(1)
self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt])) if fg_cnt > 0 else 0
self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt
ce_weights = torch.ones(cls_score.size()[1])
ce_weights[0] = float(fg_cnt) / bg_cnt
ce_weights = ce_weights.cuda()
cross_entropy = F.cross_entropy(cls_score, label, weight=ce_weights)
# bounding box regression L1 loss
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
bbox_targets = torch.mul(bbox_targets, bbox_inside_weights)
bbox_pred = torch.mul(bbox_pred, bbox_inside_weights)
loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-4)
return cross_entropy, loss_box
@staticmethod
def proposal_target_layer(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes):
"""
----------
rpn_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2]
gt_boxes: (G, 5) [x1 ,y1 ,x2, y2, class] int
# gt_ishard: (G, 1) {0 | 1} 1 indicates hard
dontcare_areas: (D, 4) [ x1, y1, x2, y2]
num_classes
----------
Returns
----------
rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2]
labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1}
bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2]
bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss
"""
rpn_rois = rpn_rois.data.cpu().numpy()
rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = \
proposal_target_layer_py(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes)
# print labels.shape, bbox_targets.shape, bbox_inside_weights.shape
rois = network.np_to_variable(rois, is_cuda=True)
labels = network.np_to_variable(labels, is_cuda=True, dtype=torch.LongTensor)
bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True)
bbox_inside_weights = network.np_to_variable(bbox_inside_weights, is_cuda=True)
bbox_outside_weights = network.np_to_variable(bbox_outside_weights, is_cuda=True)
return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def interpret_faster_rcnn(self, cls_prob, bbox_pred, rois, im_info, im_shape, nms=True, clip=True, min_score=0.0):
# find class
scores, inds = cls_prob.data.max(1)
scores, inds = scores.cpu().numpy(), inds.cpu().numpy()
keep = np.where((inds > 0) & (scores >= min_score))
scores, inds = scores[keep], inds[keep]
# Apply bounding-box regression deltas
keep = keep[0]
box_deltas = bbox_pred.data.cpu().numpy()[keep]
box_deltas = np.asarray([
box_deltas[i, (inds[i] * 4): (inds[i] * 4 + 4)] for i in range(len(inds))
], dtype=np.float)
boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2]
pred_boxes = bbox_transform_inv(boxes, box_deltas)
if clip:
pred_boxes = clip_boxes(pred_boxes, im_shape)
# nms
if nms and pred_boxes.shape[0] > 0:
pred_boxes, scores, inds = nms_detections(pred_boxes, scores, 0.3, inds=inds)
return pred_boxes, scores, self.classes[inds]
def detect(self, image, thr=0.3):
im_data, im_scales = self.get_image_blob(image)
im_info = np.array(
[[im_data.shape[1], im_data.shape[2], im_scales[0]]],
dtype=np.float32)
cls_prob, bbox_pred, rois = self(im_data, im_info)
pred_boxes, scores, classes = \
self.interpret_faster_rcnn(cls_prob, bbox_pred, rois, im_info, image.shape, min_score=thr)
return pred_boxes, scores, classes
def get_image_blob_noscale(self, im):
im_orig = im.astype(np.float32, copy=True)
im_orig -= self.PIXEL_MEANS
processed_ims = [im]
im_scale_factors = [1.0]
blob = im_list_to_blob(processed_ims)
return blob, np.array(im_scale_factors)
def get_image_blob(self, im):
"""Converts an image into a network input.
Arguments:
im (ndarray): a color image in BGR order
Returns:
blob (ndarray): a data blob holding an image pyramid
im_scale_factors (list): list of image scales (relative to im) used
in the image pyramid
"""
im_orig = im.astype(np.float32, copy=True)
im_orig -= self.PIXEL_MEANS
im_shape = im_orig.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
processed_ims = []
im_scale_factors = []
for target_size in self.SCALES:
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > self.MAX_SIZE:
im_scale = float(self.MAX_SIZE) / float(im_size_max)
im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR)
im_scale_factors.append(im_scale)
processed_ims.append(im)
# Create a blob to hold the input images
blob = im_list_to_blob(processed_ims)
return blob, np.array(im_scale_factors)
def load_from_npz(self, params):
self.rpn.load_from_npz(params)
pairs = {'fc6.fc': 'fc6', 'fc7.fc': 'fc7', 'score_fc.fc': 'cls_score', 'bbox_fc.fc': 'bbox_pred'}
own_dict = self.state_dict()
for k, v in pairs.items():
key = '{}.weight'.format(k)
param = torch.from_numpy(params['{}/weights:0'.format(v)]).permute(1, 0)
own_dict[key].copy_(param)
key = '{}.bias'.format(k)
param = torch.from_numpy(params['{}/biases:0'.format(v)])
own_dict[key].copy_(param)
When I run the train.py, an error occured in net(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)
(net is a RFCN instance)
Runtime Error:aruguments located on different GPUs
The error comes from that I simply add one line net = torch.nn.DataParallel(model, device_ids=[0, 1])
Why does this error happen, and how should I use multi-GPU to train.Is there any clue?
Thanks a lot!!