The dataset:
class carData(Dataset):
def __init__(self,image_dir):
self.df=pd.read_csv('Images/data/train_solution_bounding_boxes (1).csv')
self.dir=image_dir
self.image_ids=self.df['image'].unique()
def __getitem__(self,index):
image_id=self.image_ids[index]
bboxes=self.df[self.df['image']==image_id]
img_path=os.path.join(self.dir,image_id)
image=cv.imread(img_path,cv.IMREAD_COLOR)
image=cv.cvtColor(image,cv.COLOR_BGR2RGB).astype(np.float32)
image /=255.0
boxes=bboxes[['xmin','ymin','xmax','ymax']].values
area= (boxes[:,3] -boxes[:,1]) * (boxes[:,2] - boxes[:,0])
boxes=torch.as_tensor(boxes,dtype=torch.float32)
area=torch.as_tensor(area,dtype=torch.float32)
labels=torch.ones((bboxes.shape[0],),dtype=torch.int64)
iscrowd=torch.zeros((bboxes.shape[0],),dtype=torch.int64)
target={}
target['boxes']=boxes
target['labels']=labels
target['image_id'] = torch.tensor([index])
target['area']=area
target['iscrow']=iscrowd
image=torchvision.transforms.ToTensor()(image)
image=image.permute(1,2,0)
return image,target
def __len__(self):
return self.image_ids.shape[0]
Dataloader:
def collate_fn(batch):
return tuple(zip(*batch))
train_data_loader = DataLoader(
dataset,
batch_size=6,
shuffle=False,
collate_fn=collate_fn
)
Model:
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
num_classes = 2 # 1 class (car) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace pre-trained head with new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
Hyperparameters:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
train:
def train(model,optim,dataloader,path,num_of_epochs):
itr=1
try:
os.mkdir(path)
except: path=path
for epoch in range(num_of_epochs):
for images,targets in dataloader:
images=list(image.to(device) for image in images)
targets=[{k:v.to(device) for k,v in t.items()} for t in targets]
loss_dict=model(images,targets)
losses=sum(loss for loss in loss_dict.values())
losses_value=losses.item()
optim.zero_grad()
losses.backward()
optim.step()
if itr %20 == 0:
print(f"iteration {itr} , Loss : {losses_value}")
print(f"epoch: {epoch} , Loss: {losses_value}")
torch.save(model.state_dict(),f"{path}/model.pt")
Error :
RuntimeError Traceback (most recent call last)
<ipython-input-174-8a2434479479> in <module>()
----> 1 train(model,optimizer,train_data_loader,'models',2)
5 frames
<ipython-input-173-fb5393e1415a> in train(model, optim, dataloader, path, num_of_epochs)
10 targets=[{k:v.to(device) for k,v in t.items()} for t in targets]
11
---> 12 loss_dict=model(images,targets)
13 losses=sum(loss for loss in loss_dict.values())
14 losses_value=losses.item()
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
75 original_image_sizes.append((val[0], val[1]))
76
---> 77 images, targets = self.transform(images, targets)
78
79 # Check for degenerate boxes
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/transform.py in forward(self, images, targets)
109 raise ValueError("images is expected to be a list of 3d tensors "
110 "of shape [C, H, W], got {}".format(image.shape))
--> 111 image = self.normalize(image)
112 image, target_index = self.resize(image, target_index)
113 images[i] = image
/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/transform.py in normalize(self, image)
134 mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
135 std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
--> 136 return (image - mean[:, None, None]) / std[:, None, None]
137
138 def torch_choice(self, k: List[int]) -> int:
RuntimeError: The size of tensor a (380) must match the size of tensor b (3) at non-singleton dimension 0
Sitting whole day. can’t find any solution.