I am using the following code for fitting 3D human body model to 2d image however, it is very slow. In nearly more than 2hrs it only fit 100 images. Since the code doesn’t support native batch_size, is there a way to force it to have a larger batch_size otherwise?
$ cat smplifyx/main.py
# -*- coding: utf-8 -*-
# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
# holder of all proprietary rights on this computer program.
# You can only use this computer program if you have closed
# a license agreement with MPG or you get the right to use the computer
# program from someone who is authorized to grant you that right.
# Any use of the computer program without a valid license is prohibited and
# liable to prosecution.
#
# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
# for Intelligent Systems and the Max Planck Institute for Biological
# Cybernetics. All rights reserved.
#
# Contact: ps-license@tuebingen.mpg.de
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import sys
import os
import os.path as osp
import time
import yaml
import torch
import smplx
from utils import JointMapper
from cmd_parser import parse_config
from data_parser import create_dataset
from fit_single_frame import fit_single_frame
from camera import create_camera
from prior import create_prior
torch.backends.cudnn.enabled = False
def main(**args):
output_folder = args.pop('output_folder')
output_folder = osp.expandvars(output_folder)
if not osp.exists(output_folder):
os.makedirs(output_folder)
# Store the arguments for the current experiment
conf_fn = osp.join(output_folder, 'conf.yaml')
with open(conf_fn, 'w') as conf_file:
yaml.dump(args, conf_file)
result_folder = args.pop('result_folder', 'results')
result_folder = osp.join(output_folder, result_folder)
if not osp.exists(result_folder):
os.makedirs(result_folder)
mesh_folder = args.pop('mesh_folder', 'meshes')
mesh_folder = osp.join(output_folder, mesh_folder)
if not osp.exists(mesh_folder):
os.makedirs(mesh_folder)
out_img_folder = osp.join(output_folder, 'images')
if not osp.exists(out_img_folder):
os.makedirs(out_img_folder)
float_dtype = args['float_dtype']
if float_dtype == 'float64':
dtype = torch.float64
elif float_dtype == 'float32':
dtype = torch.float64
else:
print('Unknown float type {}, exiting!'.format(float_dtype))
sys.exit(-1)
use_cuda = args.get('use_cuda', True)
if use_cuda and not torch.cuda.is_available():
print('CUDA is not available, exiting!')
sys.exit(-1)
img_folder = args.pop('img_folder', 'images')
dataset_obj = create_dataset(img_folder=img_folder, **args)
start = time.time()
input_gender = args.pop('gender', 'neutral')
gender_lbl_type = args.pop('gender_lbl_type', 'none')
max_persons = args.pop('max_persons', -1)
float_dtype = args.get('float_dtype', 'float32')
if float_dtype == 'float64':
dtype = torch.float64
elif float_dtype == 'float32':
dtype = torch.float32
else:
raise ValueError('Unknown float type {}, exiting!'.format(float_dtype))
joint_mapper = JointMapper(dataset_obj.get_model2data())
model_params = dict(model_path=args.get('model_folder'),
joint_mapper=joint_mapper,
create_global_orient=True,
create_body_pose=not args.get('use_vposer'),
create_betas=True,
create_left_hand_pose=True,
create_right_hand_pose=True,
create_expression=True,
create_jaw_pose=True,
create_leye_pose=True,
create_reye_pose=True,
create_transl=False,
dtype=dtype,
**args)
male_model = smplx.create(gender='male', **model_params)
# SMPL-H has no gender-neutral model
if args.get('model_type') != 'smplh':
neutral_model = smplx.create(gender='neutral', **model_params)
female_model = smplx.create(gender='female', **model_params)
# Create the camera object
focal_length = args.get('focal_length')
camera = create_camera(focal_length_x=focal_length,
focal_length_y=focal_length,
dtype=dtype,
**args)
if hasattr(camera, 'rotation'):
camera.rotation.requires_grad = False
use_hands = args.get('use_hands', True)
use_face = args.get('use_face', True)
body_pose_prior = create_prior(
prior_type=args.get('body_prior_type'),
dtype=dtype,
**args)
jaw_prior, expr_prior = None, None
if use_face:
jaw_prior = create_prior(
prior_type=args.get('jaw_prior_type'),
dtype=dtype,
**args)
expr_prior = create_prior(
prior_type=args.get('expr_prior_type', 'l2'),
dtype=dtype, **args)
left_hand_prior, right_hand_prior = None, None
if use_hands:
lhand_args = args.copy()
lhand_args['num_gaussians'] = args.get('num_pca_comps')
left_hand_prior = create_prior(
prior_type=args.get('left_hand_prior_type'),
dtype=dtype,
use_left_hand=True,
**lhand_args)
rhand_args = args.copy()
rhand_args['num_gaussians'] = args.get('num_pca_comps')
right_hand_prior = create_prior(
prior_type=args.get('right_hand_prior_type'),
dtype=dtype,
use_right_hand=True,
**rhand_args)
shape_prior = create_prior(
prior_type=args.get('shape_prior_type', 'l2'),
dtype=dtype, **args)
angle_prior = create_prior(prior_type='angle', dtype=dtype)
if use_cuda and torch.cuda.is_available():
device = torch.device('cuda')
camera = camera.to(device=device)
female_model = female_model.to(device=device)
male_model = male_model.to(device=device)
if args.get('model_type') != 'smplh':
neutral_model = neutral_model.to(device=device)
body_pose_prior = body_pose_prior.to(device=device)
angle_prior = angle_prior.to(device=device)
shape_prior = shape_prior.to(device=device)
if use_face:
expr_prior = expr_prior.to(device=device)
jaw_prior = jaw_prior.to(device=device)
if use_hands:
left_hand_prior = left_hand_prior.to(device=device)
right_hand_prior = right_hand_prior.to(device=device)
else:
device = torch.device('cpu')
# A weight for every joint of the model
joint_weights = dataset_obj.get_joint_weights().to(device=device,
dtype=dtype)
# Add a fake batch dimension for broadcasting
joint_weights.unsqueeze_(dim=0)
for idx, data in enumerate(dataset_obj):
img = data['img']
fn = data['fn']
keypoints = data['keypoints']
print('Processing: {}'.format(data['img_path']))
curr_result_folder = osp.join(result_folder, fn)
if not osp.exists(curr_result_folder):
os.makedirs(curr_result_folder)
curr_mesh_folder = osp.join(mesh_folder, fn)
if not osp.exists(curr_mesh_folder):
os.makedirs(curr_mesh_folder)
for person_id in range(keypoints.shape[0]):
if person_id >= max_persons and max_persons > 0:
continue
curr_result_fn = osp.join(curr_result_folder,
'{:03d}.pkl'.format(person_id))
curr_mesh_fn = osp.join(curr_mesh_folder,
'{:03d}.obj'.format(person_id))
curr_img_folder = osp.join(output_folder, 'images', fn,
'{:03d}'.format(person_id))
if not osp.exists(curr_img_folder):
os.makedirs(curr_img_folder)
if gender_lbl_type != 'none':
if gender_lbl_type == 'pd' and 'gender_pd' in data:
gender = data['gender_pd'][person_id]
if gender_lbl_type == 'gt' and 'gender_gt' in data:
gender = data['gender_gt'][person_id]
else:
gender = input_gender
if gender == 'neutral':
body_model = neutral_model
elif gender == 'female':
body_model = female_model
elif gender == 'male':
body_model = male_model
out_img_fn = osp.join(curr_img_folder, 'output.png')
fit_single_frame(img, keypoints[[person_id]],
body_model=body_model,
camera=camera,
joint_weights=joint_weights,
dtype=dtype,
output_folder=output_folder,
result_folder=curr_result_folder,
out_img_fn=out_img_fn,
result_fn=curr_result_fn,
mesh_fn=curr_mesh_fn,
shape_prior=shape_prior,
expr_prior=expr_prior,
body_pose_prior=body_pose_prior,
left_hand_prior=left_hand_prior,
right_hand_prior=right_hand_prior,
jaw_prior=jaw_prior,
angle_prior=angle_prior,
**args)
elapsed = time.time() - start
time_msg = time.strftime('%H hours, %M minutes, %S seconds',
time.gmtime(elapsed))
print('Processing the data took: {}'.format(time_msg))
if __name__ == "__main__":
args = parse_config()
main(**args)
I am running it using the following code:
$ export CUDA_VISIBLE_DEVICES=0
$ python smplifyx/main.py --config cfg_files/fit_smplx.yaml --data_folder ../../data/smplify-x/djrn_train_data/ --output_folder ../../data/smplify-x/djrn_train_results --visualize="False" --model_folder ../../data/smplify-x/models_smplx_v1_1/models/smplx/SMPLX_NEUTRAL.npz --vposer_ckpt ../../data/smplify-x/vposer_v1_0 --part_segm_fn ../../data/smplify-x/smplx_parts_segm.pkl
related issues:
on local machine with 4G GPU:
$ nvidia-smi
Mon Jan 11 18:53:10 2021
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.45.01 Driver Version: 455.45.01 CUDA Version: 11.1 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce GTX 165... Off | 00000000:01:00.0 Off | N/A |
| N/A 64C P0 32W / N/A | 2930MiB / 3911MiB | 53% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 1174 G /usr/lib/xorg/Xorg 133MiB |
| 0 N/A N/A 2346 G /usr/lib/xorg/Xorg 776MiB |
| 0 N/A N/A 2530 G /usr/bin/gnome-shell 120MiB |
| 0 N/A N/A 2900 G ...gAAAAAAAAA --shared-files 165MiB |
| 0 N/A N/A 19237 G /opt/zoom/zoom 64MiB |
| 0 N/A N/A 87955 G /usr/lib/firefox/firefox 1MiB |
| 0 N/A N/A 88568 G /usr/lib/firefox/firefox 1MiB |
| 0 N/A N/A 88739 G /usr/lib/firefox/firefox 1MiB |
| 0 N/A N/A 89874 G /usr/lib/firefox/firefox 1MiB |
| 0 N/A N/A 91555 G /usr/lib/firefox/firefox 1MiB |
| 0 N/A N/A 98516 G /usr/lib/firefox/firefox 1MiB |
| 0 N/A N/A 98582 G /usr/lib/firefox/firefox 1MiB |
| 0 N/A N/A 121388 C python 1635MiB |
+-----------------------------------------------------------------------------+
974/31772MB(smplifyx)
on server with 12G GPU:
$ nvidia-smi
Mon Jan 11 15:24:13 2021
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06 Driver Version: 450.36.06 CUDA Version: 11.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce RTX 208... On | 00000000:1D:00.0 Off | N/A |
| 33% 42C P2 63W / 250W | 1768MiB / 11019MiB | 12% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 1 GeForce RTX 208... On | 00000000:1E:00.0 Off | N/A |
| 29% 40C P2 61W / 250W | 1768MiB / 11019MiB | 13% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 2 GeForce RTX 208... On | 00000000:1F:00.0 Off | N/A |
| 27% 26C P8 3W / 250W | 1MiB / 11019MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 3 GeForce RTX 208... On | 00000000:20:00.0 Off | N/A |
| 27% 24C P8 2W / 250W | 1MiB / 11019MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 4 GeForce RTX 208... On | 00000000:21:00.0 Off | N/A |
| 27% 23C P8 6W / 250W | 1MiB / 11019MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 5 GeForce RTX 208... On | 00000000:22:00.0 Off | N/A |
| 27% 25C P8 19W / 250W | 1MiB / 11019MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 6 GeForce RTX 208... On | 00000000:23:00.0 Off | N/A |
| 27% 26C P8 1W / 250W | 1MiB / 11019MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 7 GeForce RTX 208... On | 00000000:24:00.0 Off | N/A |
| 27% 23C P8 16W / 250W | 1MiB / 11019MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 13231 C python 1765MiB |
| 1 N/A N/A 14300 C python 1765MiB |
+-----------------------------------------------------------------------------+
Some extra info:
$ python
Python 3.8.5 (default, Jul 28 2020, 12:59:40)
[GCC 9.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> torch.__version__
'1.7.1'
Server has Ubuntu 18.04 and local machine has Ubuntu 20.04:
$ lsb_release -a
LSB Version: core-11.1.0ubuntu2-noarch:security-11.1.0ubuntu2-noarch
Distributor ID: Ubuntu
Description: Ubuntu 20.04.1 LTS
Release: 20.04
Codename: focal