I’m trying to make object detection using Pytorch. I can’t solve the error in the title. The data file uses a csv file.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
from pandas import Series, DataFrame
from tqdm import tqdm
import io
import glob
import os
import time
import torch
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.utils.data as data
import torchvision.datasets as datasets
from torch import utils
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms as T
from torchvision.models import detection
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
num_epoch = 100
num_classes = 6
batch_size = 5
info = 'info.json'
train = 'annotations/'
class PennFudanDataset(Dataset):
def __init__(self, root, transform=None):
self.root = root
self.transform = transform
self.imgs = list(sorted(os.listdir('images/')))
self.my_data = list(sorted(pd.read_csv(os.path.join(root, 'annotations.csv'))))
def __getitem__(self, index):
my_data = pd.read_csv(os.path.join(self.root, 'annotations.csv'))
img_path = os.path.join('images/', self.imgs[index])
img = Image.open(img_path)
mask = np.array(my_data)
obj_ids = mask[1:]
masks = mask == obj_ids[:, None, None]
for i in range(num_classes):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes = []
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
labels = torch.ones((num_classes,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = torch.tensor([index])
if self.transform is not None:
img = self.transform(img)
return img, target
def __len__(self):
return len(self.my_data)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
transforms = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = opt.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
model = model.to(device)
train_loader = PennFudanDataset(train, transform=transforms)
train_loader = DataLoader(train_loader, batch_size=1)
model.train()
for i, (img, target) in enumerate(train_loader):
img = img.to(device)
target = target
outputs = model(img, target)
loss = criterion(outputs) # 損失を計算
loss.backward() # 逆伝播で勾配を計算
losses.append(loss.item())
optimizer.step() # 最適化\
for epoch in range(num_epoch):
print('Epoch {}/{}'.format(epoch, num_epoch))
print('-' * 10)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/var/folders/yc/zkgdxm9j5yv1_zlyqb_djtjc0000gn/T/ipykernel_2905/2861585909.py in <module>
5 target = target
6
----> 7 outputs = model(img, target)
8 loss = criterion(outputs) # 損失を計算
9 loss.backward() # 逆伝播で勾配を計算
/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/opt/anaconda3/lib/python3.8/site-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
62 assert targets is not None
63 for target in targets:
---> 64 boxes = target["boxes"]
65 if isinstance(boxes, torch.Tensor):
66 if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
TypeError: string indices must be integers