One hot encode label for multi-label classification

Chamroukhi · May 18, 2021, 11:29am

Dear all,
im try to prepare dataset for multi-label classification with pytorch, there is an example with pytorch (dataloader) for multi-label classification?
thanks

pascal_notsawo · May 18, 2021, 11:48am

It is for the classification of text, images …?

Chamroukhi · May 18, 2021, 11:50am

video classification

pascal_notsawo · May 18, 2021, 12:51pm

In this case you can proceed as follows (I am just making an illustration)

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MultiLabelBinarizer

torch.manual_seed(0) 

class VideoDataset(Dataset):
    """ Dataset class for Video Classification"""
    def __init__(self, data_folder, in_memory : bool):
        super().__init__()

        # I guess here you go so file to extract the videos and labels .     
        # l = label, ptf = path_to_file
        files =  ["ptf1.mp4",    "ptf2.mp4", "ptf3.mp4",         "ptf4.mp4",   "ptf5.mp4"]
        labels = [ ["l1", "l2"],  ["l2"],    ["l1", "l2", "l3"], ["l2", "l3"], ["l2", "l3"]]

        mlb = MultiLabelBinarizer()
        labels = mlb.fit_transform(labels) #array([[1, 1, 0], [0, 1, 0], [1, 1, 1], [0, 1, 1], [0, 1, 1]])
        
        
        self.in_memory = in_memory
        if self.in_memory :
            # preprocess the data beforehand : costly in memory, especially if the data are large
            self.data = [(self.video_to_tensor(f), torch.tensor(l, dtype=torch.long)) for f, l in zip(files, labels)]
        else :
            # pre-processing of data during loading: time consuming, because at each time the same processing will be done on the same data
            self.data = [(f, torch.tensor(l, dtype=torch.long)) for f, l in zip(files, labels)]

    def __getitem__(self, index):
        if self.in_memory :
            return self.data[index]
        else :
            inst = self.data[index]
            return self.video_to_tensor(inst[0]), inst[1]

    def __len__(self):
        return len(self.data)

    def video_to_tensor(self, file) :
        """convert a video to a tensor : can be a feature extraction .... depend on what you classify"""
        return torch.empty(5).uniform_(-10, 10)

dataset = VideoDataset(data_folder="my date folder path", in_memory=True)
len(dataset) # 5

batch_size = 2
shuffle = True
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn = None)

Here is how the classification can be done

class Model(nn.Module) :
    def __init__(self, input_dim=5, output_dim=3) :
        super().__init__()
        self.linear_layer = nn.Linear(input_dim, output_dim) # suppose 3 labels
        
    def forward(self, x) :
        return self.linear_layer(x)  

model = Model()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
criterion = nn.BCEWithLogitsLoss() # because of multi-label

for x, y in dataloader :
    optimizer.zero_grad()
    y_pred = model(x)
    loss = criterion(y, y_pred)
    loss.backward() 
    optimizer.step()