I try to use rcnn to train my model,. I follow the guidence to create my dataset and put it into the model, but it seems that I got something wrong in my model and I don’t know why. Who can tell me what is wrong since I have been frustrated by this part.
Here is my dataloader
import os
import numpy as np
import torch
from PIL import Image
import xml.dom.minidom as xmldom
from torchvision import transforms as T
def parse_xml(fn):
xml_file = xmldom.parse(fn)
eles = xml_file.documentElement
print(eles.tagName)
xmin = eles.getElementsByTagName("xmin")[0].firstChild.data
xmax = eles.getElementsByTagName("xmax")[0].firstChild.data
ymin = eles.getElementsByTagName("ymin")[0].firstChild.data
ymax = eles.getElementsByTagName("ymax")[0].firstChild.data
return xmin, xmax, ymin, ymax
class MaskDataset(object):
def __init__(self, pic_root, mask_root ,transforms):
self.pic_root = pic_root
self.mask_root = mask_root
self.transforms = transforms
self.imgs = list(sorted(os.listdir(os.path.join(pic_root, "image_mask/"))))
self.loc = list(sorted(os.listdir(os.path.join(mask_root, "location/"))))
def __getitem__(self, idx):
# load images ad masks
img_path = os.path.join(self.pic_root, "image_mask", self.imgs[idx])
loc_path = os.path.join(self.mask_root, "location", self.loc[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
xmin,xmax,ymin,ymax= parse_xml(loc_path)
xmin = int(xmin)
xmax = int(xmax)
ymin = int(ymin)
ymax = int(ymax)
boxes = []
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
labels = torch.ones(1,dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = torch.tensor([idx])
if self.transforms is not None:
img = self.transforms(img)
return img, target
def __len__(self):
return len(self.imgs)
path = 'C:/Users/msi/Desktop/Final/image/'
transforms=T.Compose([
T.Resize((256,256),Image.BICUBIC),
T.ToTensor(),
T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
dataset=MaskDataset(path,path,transforms)
image,target=dataset.__getitem__(0)
for key in target:
print(key)
print(target[key])
data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True,num_workers=0)
Here is my model
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2 # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
And every time I try to train my model, it shows the error like this
from tqdm import tqdm
model.train()
for epoch in range(num_epochs):
for image,target in tqdm(data_loader):
image = image.to(device)
for key in target:
target[key].to(device)
output = model(image,target)
>TypeError Traceback (most recent call last)
<ipython-input-285-a118b838b79c> in <module>
6 for key in target:
7 target[key].to(device)
----> 8 output = model(image,target)
E:\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
E:\Anaconda3\lib\site-packages\torchvision\models\detection\generalized_rcnn.py in forward(self, images, targets)
61 assert targets is not None
62 for target in targets:
---> 63 boxes = target["boxes"]
64 if isinstance(boxes, torch.Tensor):
65 if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
TypeError: string indices must be integers