Greetings,
Im currently working with this paper: Compare the performance of the models in art classification
It uses the following dataset: Multitask Painting Categorization by Deep Multibranch Neural Network | Imaging and Vision Laboratory
I got 3 task, with 100k samples and those have 1508 artists, 125 styles and 41 genres as classes.
My Program is as followed for Resnet multitask implementation:
mean = np.array([0.485, 0.456, 0.406])
std = np.array( [0.229, 0.224, 0.225])
# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transforms = {
'train': transforms.Compose([
#transforms.ToPILImage(),
transforms.Resize((256,256)),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean, std)
]),
'test': transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean, std)
]),
'val':
transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((224, 224)),
#transforms.RandomResizedCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean, std)
]),
}
class Resnet18_multiTaskNet(nn.Module):
def __init__(self, pretrained=True, frozen_feature_layers = False):
super().__init__()
self.resnet18 = models.resnet18(pretrained=pretrained)
self.features = nn.Sequential(*list(self.resnet18.children())[:-1]).cuda()
self.is_frozen = frozen_feature_layers
if frozen_feature_layers:
self.freeze_feature_layers()
# now lets add our new layers
in_features = self.resnet18.fc.in_features
#self.fc0 = nn.Linear(in_features, 512)
#self.bn_pu = nn.BatchNorm1d(in_features, eps = 1e-5)
# our five new heads for 5 tasks we have at hand!
self.fc_artist = nn.Linear(in_features, class_length ['artist'])
self.fc_style = nn.Linear(in_features, class_length ['style'])
self.fc_genre = nn.Linear(in_features, class_length ['genre'])
torch.nn.init.normal_(self.fc_artist .weight, mean=0.0, std=0.01)
torch.nn.init.normal_(self.fc_style.weight, mean=0.0, std=0.01)
torch.nn.init.normal_(self.fc_genre.weight, mean=0.0, std=0.01)
def forward(self, input_imgs):
output = self.features(input_imgs)
#output = output.view(input_imgs.size(0), -1)
#output = self.bn_pu(F.relu(self.fc0(output)))
# since color is multi label we should use sigmoid
# but since we want a numerical stable one, we use
# nn.BCEWithLogitsloss, as a loss which itself applies sigmoid
# and thus accepts logits. so we wont use sigmoid here for that matter
# its much stabler than sigmoid+BCE
print(output.shape)
output = torch.flatten(output, 1)
print(output.shape)
prd_arist = self.fc_artist(output)
prd_style = self.fc_style(output)
prd_genre = self.fc_genre(output)
return prd_arist, prd_style, prd_genre
def _set_freeze_(self, status):
for n,p in self.features.named_parameters():
p.requires_grad = status
# for m in self.features.children():
# for p in m.parameters():
# p.requires_grad=status
def freeze_feature_layers(self):
self._set_freeze_(False)
def unfreeze_feature_layers(self):
self._set_freeze_(True)
class customdataset(Dataset):
def __init__(self, csv_file, root_dir, transform, n =None):
if n == None:
df = pd.read_csv(csv_file)
else:
df = pd.read_csv(csv_file, nrows=n)
unsorted_labels = {x: df[x].unique() for x in ['artist','style','genre']}
self.labels = {x: np.sort(unsorted_labels[x]) for x in ['artist','style','genre']}
self.annotations = df
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
image = Image.open(img_path).convert('RGB')
image_artist = self.annotations.iloc[index,1]
image_style = self.annotations.iloc[index,2]
image_genre = self.annotations.iloc[index,3]
#test = le.transform(list(le.classes_))
#le fit anpassen an die unique df col
le = preprocessing.LabelEncoder()
le.fit(self.labels['style'])
image_style = le.transform([image_style])
le.fit(self.labels['artist'])
image_artist = le.transform([image_artist])
le.fit(self.labels['genre'])
image_genre = le.transform([image_genre])
if self.transform:
#image = np.array(image)
#size = image.shape
#print(size)
#if(size[2] == 1):
#image = gray2rgb(image)
image = self.transform(image)
return(image,image_artist,image_style,image_genre)
def train_model(model, criterion, optimizer, scheduler, num_epochs):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
fields = ['artist', 'style', 'genre']
#accuracies = [0.0]*len(fields)
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('\nLearning rate at this epoch is: %0.9f' % scheduler.get_lr()[0])
print('\nLearning rate at this epoch is: %.9f ', optimizer.param_groups[0]['lr'], '\n')
print('-' * 10)
lrs = []
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_loss2 = 0.0
running_loss3 = 0.0
running_loss4 = 0.0
running_loss5 = 0.0
running_corrects_artist = 0
running_corrects_style = 0
running_corrects_genre = 0
# Iterate over data.
i = 0
for inputs,image_artist,image_style,image_genre in dataloaders[phase]:
i = i + 1
image_style
inputs = inputs.to(device)
image_style = image_style.to(device)
image_artist= image_artist.to(device)
image_genre = image_genre.to(device)
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
#style
_, preds_artist = torch.max(outputs[0], 1)
image_artist= torch.squeeze(image_artist).long()
loss_artist = criterion[0](outputs[0], image_artist)
f1_metric['artist'](preds_artist,image_artist.data)
#style
_, preds_style = torch.max(outputs[1], 1)
image_style= torch.squeeze(image_style).long()
loss_style = criterion[1](outputs[1], image_style)
#genre
_, preds_genre = torch.max(outputs[2], 1)
image_genre= torch.squeeze(image_genre).long()
loss_genre = criterion[2](outputs[2], image_genre)
loss_final = loss_artist + loss_style + loss_genre
# backward + optimize only if in training phase
if phase == 'train':
# zero the parameter gradients
optimizer.zero_grad()
loss_final.backward()
optimizer.step()
# statistics
running_loss += loss_final.item() * inputs.size(0)
running_loss2 += loss_final.item() * inputs.size(0)
running_loss3 += loss_artist * inputs.size(0)
running_loss4 += loss_style * inputs.size(0)
running_loss5 += loss_genre * inputs.size(0)
#Accuarcy corrects
running_corrects_artist += torch.mean((preds_artist.view(*image_artist.shape) == image_artist).float())
running_corrects_style += torch.mean((preds_style.view(*image_style.shape) == image_style).float())
running_corrects_genre += torch.mean((preds_genre.view(*image_genre.shape) == image_genre).float())
running_corrects_artis_2 = torch.mean((preds_artist.view(*image_artist.shape) == image_artist).float())
print(f'\n running_corrects_artis_2: {running_corrects_artis_2}',)
running_corrects_style_2 = torch.mean((preds_style.view(*image_style.shape) == image_style).float())
print(f' running_corrects_style_2: {running_corrects_style_2}',)
running_corrects_genre_2 = torch.mean((preds_genre.view(*image_genre.shape) == image_genre).float())
print(f' running_corrects_genre_2: {running_corrects_genre_2} \n',)
#F1-Score
current_f1_artist = f1_metric['artist'](preds_artist,image_artist.data)
current_f1_style = f1_metric['style'](preds_style,image_style.data)
current_f1_genre = f1_metric['genre'](preds_genre,image_genre.data)
#Plot
lrs.append(torch.mean((preds_artist.view(*image_artist.shape) == image_artist).float()).cpu())
if(i) % 10 == 0:
plt.plot(lrs)
plt.savefig('foo.png')
if (i+1) % 1 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss_final.item():.4f} ,Running Loss: {running_loss / dataset_sizes[phase]}, Running Loss for Batch: {running_loss2/ 80} ')
#print(f'stylepred: {preds_style} , styleimage: {image_style.data}')
print(f'running_corrects_artist: {running_corrects_artist} , running_corrects_style: {running_corrects_style}, running_corrects_genre: {running_corrects_genre}')
print(f'loss_final.item(): {loss_final.item()} ')
print(f'loss_artist: {loss_artist.item()} , loss_style: {loss_style.item()}, loss_gemre: {loss_genre.item()} ')
print(f'loss_artist * inputs.size(0): {loss_artist.item() * inputs.size(0)} , loss_style: {loss_style.item() * inputs.size(0)}, loss_gemre: {loss_genre.item() * inputs.size(0)} ')
running_loss2 = 0.0
if phase == 'train':
scheduler.step()
#Losses
epoch_loss = running_loss / dataset_sizes[phase]
epoch_loss_artist = running_loss3 / dataset_sizes[phase]
epoch_loss_style = running_loss4 / dataset_sizes[phase]
epoch_loss_genre = running_loss5 / dataset_sizes[phase]
#Accuracys
epoch_acc_artist = running_corrects_artist.double() / dataset_sizes[phase]
epoch_acc_style = running_corrects_style.double() / dataset_sizes[phase]
epoch_acc_genre = running_corrects_genre.double() / dataset_sizes[phase]
print()
print('{} artist_Acc: {:.4f} '
'style_acc: {:.4f} '
'genre_acc: {:.4f} '.format(
phase, epoch_acc_artist ,epoch_acc_style,epoch_acc_genre))
print('{} total loss: {:.4f} artist loss: {:.4f} style loss: {:.4f} genre loss: {:.4f}'.format(phase,epoch_loss,epoch_loss_artist ,
epoch_loss_style,epoch_loss_genre,
))
print()
# deep copy the model
if phase == 'val' and epoch_acc_genre > best_acc:
best_acc = epoch_acc_genre
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
print('F1-Score:')
print(f'Artist: {current_f1_artist}')
print(f'Style: {current_f1_style}')
print(f'Genre: {current_f1_genre}')
# load best model weights
model.load_state_dict(best_model_wts)
return model
I currently train everything on 3 Machines, the VGG-Model gets only 30% and transformer too. So i must make big mistakes. I have no idea where, because in the journal they hit about 60-80% accuracy with the same Data. The models start in each category with around 0% and they need several epochs go get better for example they need 20 epochs to make it from 0 to 8% or something.
They used a 0.2 learning rate from what i could gather from the studies and they implement also the coisinelearningscheduler. with SGD. They used 160 epochs and i need about 1 week to make 160 epochs currently.
I think my class for the model is maybe the problem