Why 'TypeError: 'Column' object is not callable' error?

import pandas as pd
import numpy as np
import cv2
import os
import re
import glob
from PIL import Image
from natsort import natsorted
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.models.detection.ssd import SSDClassificationHead, SSD, SSDScoringHead
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection._utils import retrieve_out_channels
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler
from matplotlib import pyplot as plt
from multiprocessing import Process, Pool
from pyspark import SparkContext, RDD, Row
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, DateType
from pyspark.sql.functions import isnan, desc, asc, lit, when, col, year, month, dayofmonth, when
from pyspark.ml.feature import StringIndexer, VectorAssembler
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml.evaluation import BinaryClassificationEvaluator, ClusteringEvaluator
from pyspark.ml.clustering import KMeans
from petastorm.spark import SparkDatasetConverter, make_spark_converter
import pyspark.sql.functions as F

info = 'info.json'
spark = SparkSession.builder.appName("Pytorch").getOrCreate()
default_index = StringIndexer(inputCol="default", outputCol="default_index")
image_paths = glob.glob('test_data/test/' + "/*.jpg")
path_to_annotations = f'train/'
classes = pd.read_csv(f'{path_to_annotations}metadata/classes.csv')
csv = f'{path_to_annotations}labels/detections.csv'
#train_df = spark.read.option("multiLine", "true").option("encoding", "SJIS").csv(csv, header=True, sep=',', inferSchema=False)
train_df = spark.read.option('inferSchema', 'True').option('header', 'True').csv(csv)
train_df = train_df[['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']]
predictions = []
num_epoch = 100
num_classes = len(classes)
class WheatDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None):

        self.image_ids = dataframe['ImageID'].unique()
        self.df = dataframe
        self.image_dir =  f'{image_dir}data/'
        self.transforms = transforms

    def __getitem__(self, index: int):

        ImageID = self.image_ids[index]
        records = self.df[self.df['ImageID'] == ImageID]    
        image = cv2.imread(f'{self.image_dir}{ImageID}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        boxes = records[['XMin', 'YMin', 'XMax', 'YMax']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        labels = torch.ones((records.shape[0],), dtype=torch.int64)
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['ImageID'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)

        return image, target, ImageID

    def __len__(self) -> int:
        return self.image_ids.shape[0]
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')                                         
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = None
loss_hist = Averager()
itr = 1
delta = 1e-7

# Albumentations
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = WheatDataset(train_df, path_to_annotations, get_train_transform())
train_loader = DataLoader(train_loader, batch_size=1, collate_fn=collate_fn)

TypeError Traceback (most recent call last)
/Users/yutanagao/Desktop/Object/Pytorch.ipynb Cell 5’ in <cell line: 4>()
1 def collate_fn(batch):
2 return tuple(zip(*batch))
----> 4 train_loader = WheatDataset(train_df, path_to_annotations, get_train_transform())
5 train_loader = DataLoader(train_loader, batch_size=1, collate_fn=collate_fn)

/Users/yutanagao/Desktop/Object/Pytorch.ipynb Cell 2’ in WheatDataset.init(self, dataframe, image_dir, transforms)
2 def init(self, dataframe, image_dir, transforms=None):
----> 4 self.image_ids = dataframe[‘ImageID’].unique()
5 self.df = dataframe
6 self.image_dir = f’{image_dir}data/’

TypeError: ‘Column’ object is not callable

The issue is raised in:

self.image_ids = dataframe[‘ImageID’].unique()

which I guess is raised by spark. Make sure dataframe is an actual pandas.DataFrame which you can index.

1 Like