Why my binary classification of field parcel images does not work?

Hi,

so I have 3000 images of parcel, which are split into 80% for training, 10% for validation and testing each.

I am using densenet121 with pretrained weights.
I am currently do not get any worthwhile results while trying momentums - 0.7, 0.8, 0.9, 1.0 and learning rates - 0.01, 0.001, 0.0001, 0.02, 0.002, 0.0002, 0.03, 0.003, 0.0003, 0.04, 0.004, 0.0004.

Do I just need more images or am I doing something fundamentaly wrong?

Data examples.
Positive classes:
image
image
image

Negative classes:
image
image
image

Results examples:
image

Model

model = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)
for param in model.parameters():
    param.requires_grad = False
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 2)
# model.classifier = nn.Sequential(nn.Linear(num_ftrs, 512), nn.ReLU(), nn.Dropout(0.5), nn.Linear(512, 2)
parameters_to_train = model.classifier.parameters()
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(parameters_to_train, lr=l, momentum=m)
model.train()

Using dataset and dataloader like this:

hflipper = transforms.RandomHorizontalFlip(p=0.5)
vflipper = transforms.RandomVerticalFlip(p=0.5)

random_transf = transforms.RandomApply(nn.ModuleList([hflipper, vflipper]), p=0.3)
transf = transforms.Compose([Mode(kernel_size=5), random_transf])

train_ds = ParcelDataset(CSV_PATH, IMAGE_PATH, "train", "good", transforms=transf)
val_ds = ParcelDataset(CSV_PATH, IMAGE_PATH, "valid", "good", transforms=transf)

train_dl = DataLoader(train_ds, batch_size=1, shuffle=True, num_workers=0)
val_dl = DataLoader(val_ds, batch_size=1, shuffle=True, num_workers=0)

ParcelDataset:

code_dict = {0: "KVŽ", 1: "RAŽ", 2: "KVV", 3: None}


class ParcelDataset(Dataset):
    def __init__(
        self, csv, image_path, mode, rez_col, transforms=None, only_these=None
    ):
        if mode == "train":
            self.data_size = 109
        else:
            self.data_size = int(round(10 * 0.2, 0))
        self.transforms = transforms
        self.only_these = only_these
        self.rez_col = rez_col
        self.csv = self.__cleancsv__(csv, mode)
        self.image = image_path
        self.labels = self.csv[self.rez_col].tolist()
        self.class_to_idx = self.__findclasses__()

    def __bigger_than__(self, siz):
        to_del = []
        for i, row in self.csv.iterrows():
            x, y = row["x"], row["y"]

            if isinstance(x, float) and not math.isnan(x):
                x, y = int(x), int(y)
                if int(x) < siz or int(y) < siz:
                    to_del.append(i)
        self.csv = self.csv.drop(to_del)
        with open(f"{OUTPUT_PATH}/do_not_resuscitate.txt", "w") as f:
            for td in to_del:
                f.write(f"{td}\n")
        return self.csv

    def __findclasses__(self):
        classes = sorted(set(self.labels))
        class_to_idx = {cls: idx for idx, cls in enumerate(classes)}
        return class_to_idx

    def __len__(self):
        return self.csv.shape[0]

    def __getitem__(self, idx):
        try:
            parcel_id = self.csv.iloc[idx].map_id
            rez = self.csv.iloc[idx][self.rez_col]

            p = f"{OUTPUT_PATH}/shapes/res_{parcel_id}.tif"

            if os.path.exists(p):
                image = get_3d_array(p)
            else:
                try:
                    output = cut_parcel(parcel_id, IMAGE_PATH, CONTURES)
                    image = get_3d_array(output)
                except:
                    print(p)

            image = image / 255
            p = "/home/" + "/".join(p.split("/")[2:])
            sample = {"image": image, "rez": rez, "path": p}

            if self.transforms != None:
                moded = "/".join(sample["path"].split("/")[:-1]) + "/"
                end = sample["path"].split("/")[-1].split("_")
                end = end[0] + "_moded_" + end[1]
                p = moded + end

                if not os.path.exists(p):
                    image = self.transforms(sample)
                    image = np.array(image)
                    image = image.transpose((2, 0, 1))
                else:
                    image = get_3d_array(p)
            return {"image": image, "rez": rez, "path": p}

        except Exception as e:
            print(f"Error loading file at index {idx}: {p}")
            print(f"Error details: {e}")
            raise

    def __cleancsv__(self, csv, mode):
        full_df = pd.read_csv(csv)
        df = full_df.loc[full_df["mode"] == mode]
        if self.only_these is not None:
            df["map_id"] = df["map_id"].astype(int)
            # print(type(self.only_these[0]), df['map_id'].dtypes)
            df = df[df["map_id"].isin(self.only_these)]
        if self.rez_col == "type":
            df = df.loc[(df["type"] == 0) | (df["type"] == 1) | (df["type"] == 2)]

        df = df.reset_index(
            level=None, inplace=False, col_level=0, col_fill="", drop=True
        )
        # print(f'df ilgis: {len(df)}')
        to_del = []
        all_shapes = {}

        data = ogr.Open(CONTURES)
        layer = data.GetLayer()

        all_codes = [l.GetField("glk_dkl_id") for l in layer]
        part_codes = df["map_id"].tolist()
        new_list = list(set(all_codes) & set(part_codes))
        doesnt_exist = list(set(part_codes) - set(new_list))
        print(f"neegzistuoja: {doesnt_exist}")

        old_df = len(df)
        df = df[df["map_id"].isin(new_list)]
        # print(f'Is {old_df} liko {len(df)} {mode} df eilutes.')

        with open(f"{OUTPUT_PATH}/do_not_resuscitate.txt", "r") as f:
            dnr_codes = f.readlines()
        dnr_codes = [int(dc[:-1]) for dc in dnr_codes]

        for i, row in df.iterrows():
            parcel_id, parcel_type = row["map_id"], row["type"]

            p = f"{OUTPUT_PATH}/shapes/res_{parcel_id}.tif"
            if not os.path.exists(p):
                # try:
                res = cut_parcel(parcel_id, parcel_type, IMAGE_PATH, CONTURES)
                if res is None:
                    print(f"{parcel_id}: {res}")
                img = gdal.Open(res)
                np_array = np.array(img.GetRasterBand(1).ReadAsArray())
                if int(np.amax(np_array)) == 0:
                    # TODO sunaikinti to_del
                    to_del.append(i)
                all_shapes[parcel_id] = np_array.shape

        cols = df.columns.values.tolist()
        if "x" not in cols:
            df["x"], df["y"] = ["NaN"] * len(df), ["NaN"] * len(df)

        for k, i in all_shapes.items():
            idx = df.index[df["map_id"] == k].tolist()[0]
            full_idx = full_df.index[full_df["map_id"] == k].tolist()[0]
            x, y = i
            df.at[idx, "x"], df.at[idx, "y"] = x, y
            full_df.at[full_idx, "x"], full_df.at[full_idx, "y"] = x, y
        full_df.to_csv(csv, index=False)

        return df

Training:

for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs in train_dl:
        inputs, labels, path = (
            inputs["image"].to(device),
            inputs["rez"].to(device),
            inputs["path"],
        )

        old_h, old_w = inputs.shape[2], inputs.shape[3]
        if inputs.shape[2] <= n:
            inputs = resize_h_with_ratio(inputs, n)
        if inputs.shape[3] <= n:
            inputs = resize_w_with_ratio(inputs, n)
        inputs = inputs.float()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_dl.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
torch.save(model.state_dict(), f"{OUTPUT_PATH}/densenet_fine.pth",)

Validation:

model = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 2)
model.load_state_dict(torch.load(f"{OUTPUT_PATH}/densenet_fine.pth"))
model = model.to(device)
model.eval()

total_prediction = 0
correct_prediction = 0

truths = []
pred = []

for inputs in val_dl:
    inputs, labels, path = (
        inputs["image"].to(device),
        inputs["rez"].to(device),
        inputs["path"],
    )
    old_h, old_w = inputs.shape[2], inputs.shape[3]
    if inputs.shape[2] <= n:
        inputs = resize_h_with_ratio(inputs, n)
    if inputs.shape[3] <= n:
        inputs = resize_w_with_ratio(inputs, n)
    inputs = inputs.float()

    with torch.no_grad():
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        truths.append(labels)
        pred.append(predicted)
        total_prediction += labels.size(0)
        correct_prediction += (predicted == labels).sum().item()

accuracy = correct_prediction / total_prediction

print(accuracy)
print(total_prediction, correct_prediction)
    
truths = [t.item() for t in truths]
pred = [p.item() for p in pred]

tp, tn, fp, fn, f1 = get_confusion_matrix_and_f1(truths, pred)

get_confusion_matrix_and_f1:

def get_confusion_matrix_and_f1(truths, pred):
    tp = sum([1 for t, p in zip(truths, pred) if p == 1 and t == p])
    tn = sum([1 for t, p in zip(truths, pred) if p == 0 and t == p])
    fp = sum([1 for t, p in zip(truths, pred) if p == 1 and t != p])
    fn = sum([1 for t, p in zip(truths, pred) if p == 0 and t != p])
    f1 = tp / (tp + (1 / 2) * (fp + fn))

    return tp, tn, fp, fn, f1

Thanks you!!

1 Like

could you try to run it without augmentations .
also, i couldn’t find codes for normalizing data for pre trained model.
i think you should have something like :

transform.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

in your code.

Sorry for a very late reply. Due to the time it takes to augment all photos, I had done these step separately and had tried running the model on original photos as well as augmented and results are similar