Multi Label Classification in pytorch

ok, @AjayTalati, you can try license number plate dataset.
there are six or seven digits/letters in plate, if choose 7,
every character has 36 possible classes(A-Z,0-9)
so every number plate has 736 labels as targets, the value 1 indicate the position related to a special character’s value,i36+k(0<=i<=num_character, 0<=k<=35), i indicate the position, and k indicate the value of character.
for example,if target[49]=1, means 1*36+13, the 2nd charater is ‘M’

i’m also learning pytorch, and take it as an exercise,

the input is BCHW, using multilabelmarginloss()

1 Like

Hey @dablyo ,

thanks a lot, great idea, :smile: ! And thank you for the example too!

Have you got a link to the data set you are using please? I’d like to work on it too as an exercise :slight_smile:,

I’ve tried a few experiments with using multilabelmarginloss(), but I couldn’t get it to work ?

I could still train multiple, multi-class, classifiers on the same dataset. So for example I could train, 7 classifiers, one for each digit/letter in a number plate, but that’s not too smart because there’s some structure in the sequence of digits/letters, at least for UK number plates :smile: .

Kind regards,

Ajay

1 Like

In http://matthewearl.github.io/2016/05/06/cnn-anpr/, the author has generate dataset from backgroupd picture and number plate font file, you can learn from it.

I’ve meet some problem
while training ,in 2nd minibatch, the output of multilabelmarginloss() is zero, i cann’t find out the reason.

the inputting image size 224224, the target vector width is 252(736),
‘X’ versus 000000000000000000000001000000000000
’A’ versus 100000000000000000000000000000000000
the source code is as follows:

import …

DIGITS = "0123456789"
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
CHARS = LETTERS + DIGITS
NPLEN=7
NUM_CLASSES=252

class anprmodel(nn.Module):

def __init__(self):
    super(anprmodel,self).__init__()
    self.num_classes=NUM_CLASSES
    self.conv1=nn.Conv2d(1,48,kernel_size=5,padding=2)  
    self.pool1=nn.MaxPool2d(kernel_size=(2,2),stride=2)
    self.conv2=nn.Conv2d(48,64,kernel_size=5,padding=2)
    self.pool2=nn.MaxPool2d(kernel_size=(2,2),stride=(2,2))
    self.conv3=nn.Conv2d(64,128,kernel_size=5,padding=2)
    self.pool3=nn.MaxPool2d(kernel_size=(2,2),stride=(2,2))        
    self.fc1=nn.Linear(28*28*128,2048)  
    self.fc2=nn.Linear(2048,NUM_CLASSES)
    
def forward(self,x): 
    x=F.relu(self.pool1(self.conv1(x)))  #input: 224*224
    x=F.relu(self.pool2(self.conv2(x)))  #input 112*112
    x=F.relu(self.pool3(self.conv3(x)))  #input 56*56
    x=x.view(-1,28*28*128)             #28*28
    x=F.relu(self.fc1(x))
    x=self.fc2(x)                            #output: 252
    return x

class NPSET(torch_utils_data.Dataset):
picroot=‘np’

def code_to_vec(self,p, code):
    def char_to_vec(c):
        y = np.zeros((len(CHARS),))
        y[CHARS.index(c)] = 1.0
        return y
    c = np.vstack([char_to_vec(c) for c in code])
    return c.flatten()
def __getitem__(self,index):
    label,img=self.labels[index], self.dataset[index]
    if self.data_transform is not None:
        img=self.data_transform(img)
    labelarray=self.code_to_vec(1,label)
    return (img,labelarray)
def __len__(self):
    return self.len
def __init__(self,root,data_transform=None):
    self.picroot=root
    self.data_transform=data_transform
    if not os.path.exists(self.picroot):
        raise RuntimeError('{} doesnot exists'.format(self.picroot))
    for root,dnames,filenames in os.walk(self.picroot):
        imgs=[] 
        labels=[]
        for filename in filenames:
            picfilename=os.path.join(self.picroot,filename)  #file name:
            im=cv2.imread(picfilename,cv2.IMREAD_GRAYSCALE)
            im=cv2.resize(im,(224,224))
            imgs.append(im)
            m=filename.split('_')  #filename style: xxxxxxxx_yyyyyyy_z.png
            labels.append(m[1])
        self.dataset=imgs
        self.labels=labels
        self.len=len(filenames)

def accuracy(output,target): #Tensor:Tensor #size: batchsize252
batchsize=output.size(0)
assert(batchsize==target.size(0))
p=torch.chunk(output,7,1) #p[0]–p[6], batchsize
36
t=torch.chunk(target,7,1)

a=np.ones((batchsize,1),np.dtype('i8'))*7      #7,7,7,7,7.....7   num is batchsize
ts=torch.from_numpy(a)   #LongTensor, tmp, and will be cut
ps=torch.from_numpy(a)

for i in range(0,NPLEN):   # the index of max value in every segment
    _,pred=torch.max(p[i],1)
    ps=torch.cat((ps,pred),1)
    _,pred=torch.max(t[i],1)
    ts=torch.cat((ts,pred),1)
sub=torch.LongTensor([1,2,3,4,5,6,7])    
ts=torch.index_select(ts,1,sub)   #LongTensor
ps=torch.index_select(ps,1,sub) #LongTensor
tspseq=torch.eq(ts,ps)      #ByteTensor
tspseqsum=torch.sum(tspseq,1)   #ByteTensor ,it will be 7 if right
a=np.ones((batchsize,1),np.uint8)*7   #byte ndarray
result=torch.eq(tspseqsum,torch.from_numpy(a))
return batchsize,torch.sum(result)   #batchsize  number of right

class recMeter(object):
def init(self):
self.reset()
is_best=False
best=0
current=0

def reset(self):
    self.right = 0
    self.sum = 0
    current=0

def updatecnt(self, n, r):
    self.right+=r
    self.sum+=n
    
def updateaccurate(self):
    self.current=self.right/self.sum
    if ac > best:
        is_best=True
        best=ac

if name == “main”:
model=anprmodel()
model.cuda()
cudnn.benchmark=True
batch_size=10
data_transform=transforms.Compose([transforms.ToTensor(),
transforms.Normalize((107.897212036,), (3893.57887653,)),
])
npset = NPSET(root=’/home/wang/git/nppic/nproot/plate’, data_transform=data_transform)
nploader = torch.utils.data.DataLoader(npset, batch_size=batch_size, shuffle=True, num_workers=1) #train
npvalset=NPSET(root=’/home/wang/git/nppic/npval/plate’, data_transform=data_transform)
npvalloader=torch.utils.data.DataLoader(npvalset, batch_size=batch_size, shuffle=False, num_workers=1) #validate
criterion=nn.MultiLabelMarginLoss()
optimizer=torch.optim.SGD(model.parameters(),0.1,momentum=0.9)

meter=recMeter()
for epoch in range(0,1):
    #Sets the learning rate to the initial LR decayed by 10 every 30 epochs
    lr=0.1*(0.1**(epoch//30))
    #for param_group in optimizer.param_groups:
    #    param_group['lr']=lr
    #train
    model.train()
    for i,data in enumerate(nploader):
        inputs,targets = data   #inputs size: batchsize*224*224
        inputs=torch.unsqueeze(inputs,1)  ##inputs size: batchsize*1*224*224
        targets=torch.LongTensor(np.array(targets.numpy(),np.long))
        targets=targets.cuda()
        inputs=inputs.cuda()
        input_var=torch.autograd.Variable(inputs)
        target_var=torch.autograd.Variable(targets)
        
        optimizer.zero_grad()
        output_var=model(input_var)
        #porcess loss
        character_loss=criterion(output_var,target_var)
    
        # compute gradient and do SGD step
        character_loss.backward()
        optimizer.step()

I’ve execute train loop in python console by this way:

npiter=iter(nploader)

then

(inputs,targets)=npiter.next()
inputs=torch.unsqueeze(inputs,1)
targets=torch.LongTensor(np.array(targets.numpy(),np.long))
targets=targets.cuda()
inputs=inputs.cuda()
input_var=torch.autograd.Variable(inputs)
target_var=torch.autograd.Variable(targets)
optimizer.zero_grad()
output_var=model(input_var)
character_loss=criterion(output_var,target_var)
character_loss.backward()
optimizer.step()
print(‘Loss: {:.6f}’.format(character_loss.data[0]))

from 2nd mini batch, the loss become 0.
I’ve stucked here.

I got simiar result, I got zero loss from the second epoch. Do you find the reason?

1 Like

Hi there,

I can’t get multi-label classification working either, but @bartolsthoorn and @mratsim have found possible ways to do it here

hope that helps?

Aj

@AjayTalati OK I wrote a simple example here: https://gist.github.com/bartolsthoorn/36c813a4becec1b260392f5353c8b7cc

For accuracy it is important to note that you can pass the output first through nn.Sigmoid and everything > 0.5 is true (look at the Sigmoid function: https://en.wikipedia.org/wiki/Sigmoid_function

3 Likes

Hey @bartolsthoorn,

that’s really helpful, thank you very much :smile:, really nice example :smile:

Kind regards,

Ajay

@AjayTalati

Either after your last fc you do a sigmoid and then you use BCELoss or F.binary_crossentropy as your criterion/lossfunction

Or you directly use MultiLabelSoftMarginLoss as your loss function (it comes with sigmoid inside)

Now once you have your prediction, you need to threshold. 0.5 is the default naive way but it’s probably not optimal. In any case, once you get there, great !

Next part is technical optimization, you can do Multilabel classification without

Regarding the threshold, you might want to optimize either a common threshold for all your outputs (it can be 0.2, 0.5, 0.123456 who knows) or optimize a threshold per label class, especially if your classes as unbalanced.
You will need a solid validation set and a MultiLabel evaluation metrics (Hamming Loss, F1-score, Fbeta score).

An example code for the first strategy is here on Kaggle.

For the second strategy, I’m deep into various papers myself so I can’t help yet.
One thing to keep in mind is your “best threshold” will probably overfit the validation set, so use regularization, cross-validation or other anti-overfitting strategy.

8 Likes

Does anyone understand how MultiMarginLoss is calculated exactly? I’m not sure I understand completely.

loss(x, y) = sum_ij(max(0, 1 - (x[y[j]] - x[i]))) / x.size(0)
where i == 0 to x.size(0), j == 0 to y.size(0), y[j] != 0, and i != y[j] for all i and j.

The docs say y is a set of indices. If y[j] != 0 is enforced, how do you check the loss for class 0? Also if x belongs to two or more classes, how does max(0, 1 - (x[y[j]] - x[i])) contribute to the loss when both y[j] and x[i] are classes that x belongs to?

I also don’t know how to find the source code for MultiMarginLoss the docs link isn’t very informative.

1 Like

Had the same issue with the loss documentation. Myguess is that the current code is probably this one https://github.com/pytorch/pytorch/blob/master/torch/legacy/nn/MultiLabelMarginCriterion.py and what is in the docs a stub before it’s converted to the new APIs

2 Likes

Hi Everyone,

I’m trying to Finetune the pre-trained convnets (e.g., resnet50) for a data set, which have 3 categories. In fact, I want to extend the introduced code of ‘Transfer Learning tutorial’ (Transfer Learning tutorial) for a new data set which have 3 categories. In addition, in my data set each image has just one label (i.e., each train/val/test image has just one label). Could you help me please to do that?
I have changed the above-mentioned code as follows:

  1. I have changed the parameters of nn.Linear as follow:

num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 3) # 3 means we have 3 class labels

  1. I have changed the Loss function:
    criterion = nn.NLLLoss()

  2. I have changed the ‘train_model’ method as follow:


m = nn.LogSoftmax()
outputs = model(inputs)
_, preds = torch.max(outputs.data, 1)
loss = criterion(m(outputs), labels)

However, my obtained result isn’t good at all. As a result, my precise questions are as follows:

  1. In these cases which Loss function must be used?
  2. Are those changes for training the model and compute the loss correct?

@ahkarami I think you should create a separate topic for your issue which is very different from the original post. You are doing Multiclass classification instead of multilabel.

Your loss function is correct btw.

3 Likes

Thank you very much for your help. I agree with you there. As a result, I will create a new topic (Multiclass Classification in PyTorch).

@AjayTalati
@mratsim

I’ve used MultiLabelSoftMarginLoss and Adam optimizer,the loss looked well.
the SGD optimizer worked properly also, and same as last fc along with sigmoid,then BCELoss.

the MultiLabelMarginLoss doesn’t work, loss become 0 in 2nd minibatch.

the last loss is 0.08…, cann’t become smaller.
Train Epoch: 29 (19%)Loss: 0.081794
Train Epoch: 29 (39%)Loss: 0.080127
Train Epoch: 29 (59%)Loss: 0.083426
Train Epoch: 29 (79%)Loss: 0.086233
Train Epoch: 29 (99%)Loss: 0.082037

but for 252(36*7) destinations, for different image, for example 10 images from test set, the model gave a same prediction,just like:
0.0006 0.0006 0.0006 … 0.0891 0.0742 0.1139
0.0006 0.0006 0.0006 … 0.0891 0.0742 0.1139
0.0006 0.0006 0.0006 … 0.0891 0.0742 0.1139
… ⋱ …
0.0006 0.0006 0.0006 … 0.0891 0.0742 0.1139
0.0006 0.0006 0.0006 … 0.0891 0.0742 0.1139
0.0006 0.0006 0.0006 … 0.0891 0.0742 0.1139
[torch.FloatTensor of size 10x252]

I’m whole confused

1 Like

Hi,

I used BCELoss. It’s the standard for multi label classification in many ways. Give it a shot.

5 Likes

Hi Soumith,

I see that a bunch of people feel multi label classification is important and don’t have the details figured out. I can build an example based off of the code I wrote for my research. Is the standard way to fork the git repo and request merge?

Best,
Spandan

4 Likes

Hi Spandan,

that would be a great thing to help the community :wink: Good working examples are always warmly appreciated.

Best,
Ajay

1 Like

Dear @mratsim
I have an extremely large-scale multi-label data set (with about 12M images and 11K labels). Would you please kindly, guide me what is the best way to represent each sample with its corresponding labels? (with the best Multi-GPU utilization and data loading efficiency)
Thank you

1 Like

Hey @ahkarami, I’m sorry I never processed data on such scale (yet :wink: ) and without playing with the data and your IT architecture I would have trouble to help you there.

Here is how I would go:

  • Get as much RAM as you can, get SSDs as well.
  • Load the data on the fly with multiple workers so that the CPU can feed your data as fast as the GPUs process it.
  • Have a look into PyTorch Distributed: http://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html
  • If data storage or storage of numpy array is an issue after preprocessing, look into bcolz for in-memory or on-disk compressed numpy compatible arrays. I wrote an article on that here but I only had 160 GB of images to process.

For the multi-GPU side, you will probably have to summon one of PyTorch core devs.

3 Likes

One way to do this is to not load everything into the dataloader, and just write one which assembles the labels on the fly. Say your GPU would handle something like 150 images in one go. So it needs 150 vectors of length 11K in one go, as each image’s label can be binarized [1,0,0,0,1…] (1 if the image has that label and 0 if it doesn’t.)

First, create a dictionary of image names to it’s labels and store it in a dictionary using python pickle. Let’s call this pickle file ‘image_name_to_label_vector.pckl’.
Now, you can create a new data loader like this. All I’ve changed from the original data loader is the get_item function, where I’m loading the labels on the fly from this dictionary. Simple!

IN YOUR PYTORCH FILE, add the new data loader -

from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
from ImageFolder_new import ImageFolder_spandan

DATA LOADER (save as ImageFolder_new.py) -

import torch.utils.data as data
import pickle
import numpy as np
from PIL import Image
import os
import os.path
import torch

IMG_EXTENSIONS = [
    '.jpg', '.JPG', '.jpeg', '.JPEG',
    '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
]

f = open('image_name_to_label_vector.pckl','rb')
image_name_to_label_vector = pickle.load(f)
f.close()

def is_image_file(filename):
    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)


def find_classes(dir):
    classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
    return classes, class_to_idx


def make_dataset(dir, class_to_idx):
    images = []
    concept_or_tag_features = []
    dir = os.path.expanduser(dir)
    for target in sorted(os.listdir(dir)):
        d = os.path.join(dir, target)
        if not os.path.isdir(d):
            continue

        for root, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                if is_image_file(fname):
                    path = os.path.join(root, fname)
                    item = (path, class_to_idx[target])
                    images.append(item)

    return images


def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            image_converted = img.convert('RGB')
            return image_converted


def accimage_loader(path):
    import accimage
    try:
        return accimage.Image(path)
    except IOError:
        # Potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)


def default_loader(path):
    from torchvision import get_image_backend
    if get_image_backend() == 'accimage':
        return accimage_loader(path)
    else:
        return pil_loader(path)


class ImageFolder_spandan(data.Dataset):
    """A generic data loader where the images are arranged in this way: ::
        root/dog/xxx.png
        root/dog/xxy.png
        root/dog/xxz.png
        root/cat/123.png
        root/cat/nsdf3.png
        root/cat/asd932_.png
    Args:
        root (string): Root directory path.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        loader (callable, optional): A function to load an image given its path.
     Attributes:
        classes (list): List of the class names.
        class_to_idx (dict): Dict with items (class_name, class_index).
        imgs (list): List of (image path, class_index) tuples
    """

    def __init__(self, root, transform=None, target_transform=None,
                 loader=default_loader):
        classes, class_to_idx = find_classes(root)
        imgs = make_dataset(root, class_to_idx)
        if len(imgs) == 0:
            raise(RuntimeError("Found 0 images in subfolders of: " + root + "\n"
                               "Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))

        self.root = root
        self.imgs = imgs
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        
        path, target = self.imgs[index]
        img = self.loader(path)
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)
        name = path.split('/')[-1]
        label = image_name_to_label_vector[name]
        return img,label
        
    def __len__(self):
        return len(self.imgs)

That handles your data loading without anything too fancy. If you have the resources to parallelise this, feel free to use DataParallel!

1 Like