Algorithms




InterNet (ECCV’2020)


Internet + Internet on Interhand3d

InterNet (ECCV'2020)
@InProceedings{Moon_2020_ECCV_InterHand2.6M,
author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
booktitle = {European Conference on Computer Vision (ECCV)},
year = {2020}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
InterHand2.6M (ECCV'2020)
@InProceedings{Moon_2020_ECCV_InterHand2.6M,
author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
booktitle = {European Conference on Computer Vision (ECCV)},
year = {2020}
}

Results on InterHand2.6M val & test set

Train Set Set Arch Input Size MPJPE-single MPJPE-interacting MPJPE-all MRRPE APh ckpt log
All test(H+M) InterNet_resnet_50 256x256 9.47 13.40 11.59 29.28 0.99 ckpt log
All val(M) InterNet_resnet_50 256x256 11.22 15.23 13.16 31.73 0.98 ckpt log



SimpleBaseline3D (ICCV’2017)


Pose Lift + Simplebaseline3d on H36m

SimpleBaseline3D (ICCV'2017)
@inproceedings{martinez_2017_3dbaseline,
  title={A simple yet effective baseline for 3d human pose estimation},
  author={Martinez, Julieta and Hossain, Rayat and Romero, Javier and Little, James J.},
  booktitle={ICCV},
  year={2017}
}
Human3.6M (TPAMI'2014)
@article{h36m_pami,
  author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu,  Cristian},
  title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  publisher = {IEEE Computer Society},
  volume = {36},
  number = {7},
  pages = {1325-1339},
  month = {jul},
  year = {2014}
}

Results on Human3.6M dataset with ground truth 2D detections

Arch MPJPE P-MPJPE ckpt log
simple_baseline_3d_tcn1 43.4 34.3 ckpt log

1 Differing from the original paper, we didn’t apply the max-norm constraint because we found this led to a better convergence and performance.


Pose Lift + Simplebaseline3d on Mpi_inf_3dhp

SimpleBaseline3D (ICCV'2017)
@inproceedings{martinez_2017_3dbaseline,
  title={A simple yet effective baseline for 3d human pose estimation},
  author={Martinez, Julieta and Hossain, Rayat and Romero, Javier and Little, James J.},
  booktitle={ICCV},
  year={2017}
}
MPI-INF-3DHP (3DV'2017)
@inproceedings{mono-3dhp2017,
  author = {Mehta, Dushyant and Rhodin, Helge and Casas, Dan and Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and Theobalt, Christian},
  title = {Monocular 3D Human Pose Estimation In The Wild Using Improved CNN Supervision},
  booktitle = {3D Vision (3DV), 2017 Fifth International Conference on},
  url = {http://gvv.mpi-inf.mpg.de/3dhp_dataset},
  year = {2017},
  organization={IEEE},
  doi={10.1109/3dv.2017.00064},
}

Results on MPI-INF-3DHP dataset with ground truth 2D detections

Arch MPJPE P-MPJPE 3DPCK 3DAUC ckpt log
simple_baseline_3d_tcn1 84.3 53.2 85.0 52.0 ckpt log

1 Differing from the original paper, we didn’t apply the max-norm constraint because we found this led to a better convergence and performance.




Associative Embedding (NIPS’2017)


Associative Embedding + Higherhrnet on Aic

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HigherHRNet (CVPR'2020)
@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
  title={Ai challenger: A large-scale dataset for going deeper in image understanding},
  author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
  journal={arXiv preprint arXiv:1711.06475},
  year={2017}
}

Results on AIC validation set without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HigherHRNet-w32 512x512 0.315 0.710 0.243 0.379 0.757 ckpt log

Results on AIC validation set with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HigherHRNet-w32 512x512 0.323 0.718 0.254 0.379 0.758 ckpt log

Associative Embedding + Hrnet on Aic

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
  title={Ai challenger: A large-scale dataset for going deeper in image understanding},
  author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
  journal={arXiv preprint arXiv:1711.06475},
  year={2017}
}

Results on AIC validation set without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HRNet-w32 512x512 0.303 0.697 0.225 0.373 0.755 ckpt log

Results on AIC validation set with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HRNet-w32 512x512 0.318 0.717 0.246 0.379 0.764 ckpt log

Associative Embedding + Higherhrnet on Coco

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HigherHRNet (CVPR'2020)
@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HigherHRNet-w32 512x512 0.677 0.870 0.738 0.723 0.890 ckpt log
HigherHRNet-w32 640x640 0.686 0.871 0.747 0.733 0.898 ckpt log
HigherHRNet-w48 512x512 0.686 0.873 0.741 0.731 0.892 ckpt log

Results on COCO val2017 with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HigherHRNet-w32 512x512 0.706 0.881 0.771 0.747 0.901 ckpt log
HigherHRNet-w32 640x640 0.706 0.880 0.770 0.749 0.902 ckpt log
HigherHRNet-w48 512x512 0.716 0.884 0.775 0.755 0.901 ckpt log

Associative Embedding + Hrnet + Udp on Coco

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HRNet-w32_udp 512x512 0.671 0.863 0.729 0.717 0.889 ckpt log
HRNet-w48_udp 512x512 0.681 0.872 0.741 0.725 0.892 ckpt log

Associative Embedding + Resnet on Coco

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50 512x512 0.466 0.742 0.479 0.552 0.797 ckpt log
pose_resnet_50 640x640 0.479 0.757 0.487 0.566 0.810 ckpt log
pose_resnet_101 512x512 0.554 0.807 0.599 0.622 0.841 ckpt log
pose_resnet_152 512x512 0.595 0.829 0.648 0.651 0.856 ckpt log

Results on COCO val2017 with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50 512x512 0.503 0.765 0.521 0.591 0.821 ckpt log
pose_resnet_50 640x640 0.525 0.784 0.542 0.610 0.832 ckpt log
pose_resnet_101 512x512 0.603 0.831 0.641 0.668 0.870 ckpt log
pose_resnet_152 512x512 0.660 0.860 0.713 0.709 0.889 ckpt log

Associative Embedding + Mobilenetv2 on Coco

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
MobilenetV2 (CVPR'2018)
@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_mobilenetv2 512x512 0.380 0.671 0.368 0.473 0.741 ckpt log

Results on COCO val2017 with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_mobilenetv2 512x512 0.442 0.696 0.422 0.517 0.766 ckpt log

Associative Embedding + Higherhrnet + Udp on Coco

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HigherHRNet (CVPR'2020)
@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}
UDP (CVPR'2020)
@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HigherHRNet-w32_udp 512x512 0.678 0.862 0.736 0.724 0.890 ckpt log
HigherHRNet-w48_udp 512x512 0.690 0.872 0.750 0.734 0.891 ckpt log

Associative Embedding + Hrnet on Coco

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HRNet-w32 512x512 0.654 0.863 0.720 0.710 0.892 ckpt log
HRNet-w48 512x512 0.665 0.860 0.727 0.716 0.889 ckpt log

Results on COCO val2017 with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HRNet-w32 512x512 0.698 0.877 0.760 0.748 0.907 ckpt log
HRNet-w48 512x512 0.712 0.880 0.771 0.757 0.909 ckpt log

Associative Embedding + Higherhrnet on Crowdpose

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HigherHRNet (CVPR'2020)
@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
  title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
  author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
  journal={arXiv preprint arXiv:1812.00324},
  year={2018}
}

Results on CrowdPose test without multi-scale test

Arch Input Size AP AP50 AP75 AP (E) AP (M) AP (H) ckpt log
HigherHRNet-w32 512x512 0.655 0.859 0.705 0.728 0.660 0.577 ckpt log

Results on CrowdPose test with multi-scale test. 2 scales ([2, 1]) are used

Arch Input Size AP AP50 AP75 AP (E) AP (M) AP (H) ckpt log
HigherHRNet-w32 512x512 0.661 0.864 0.710 0.742 0.670 0.566 ckpt log

Associative Embedding + Hrnet on MHP

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}
MHP (ACM MM'2018)
@inproceedings{zhao2018understanding,
  title={Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing},
  author={Zhao, Jian and Li, Jianshu and Cheng, Yu and Sim, Terence and Yan, Shuicheng and Feng, Jiashi},
  booktitle={Proceedings of the 26th ACM international conference on Multimedia},
  pages={792--800},
  year={2018}
}

Results on MHP v2.0 validation set without multi-scale test

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HRNet-w48 512x512 0.583 0.895 0.666 0.656 0.931 ckpt log

Results on MHP v2.0 validation set with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
HRNet-w48 512x512 0.592 0.898 0.673 0.664 0.932 ckpt log

Associative Embedding + Higherhrnet on Coco-Wholebody

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HigherHRNet (CVPR'2020)
@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}
COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val without multi-scale test

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
HigherHRNet-w32+ 512x512 0.590 0.672 0.185 0.335 0.676 0.721 0.212 0.298 0.401 0.493 ckpt log
HigherHRNet-w48+ 512x512 0.630 0.706 0.440 0.573 0.730 0.777 0.389 0.477 0.487 0.574 ckpt log

Note: + means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.


Associative Embedding + Hrnet on Coco-Wholebody

Associative Embedding (NIPS'2017)
@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}
HRNet (CVPR'2019)
@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}
COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val without multi-scale test

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
HRNet-w32+ 512x512 0.551 0.650 0.271 0.451 0.564 0.618 0.159 0.238 0.342 0.453 ckpt log
HRNet-w48+ 512x512 0.592 0.686 0.443 0.595 0.619 0.674 0.347 0.438 0.422 0.532 ckpt log

Note: + means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.




SimpleBaseline2D (ECCV’2018)


Topdown Heatmap + Resnet on Animalpose

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
Animal-Pose (ICCV'2019)
@InProceedings{Cao_2019_ICCV,
    author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
    title = {Cross-Domain Adaptation for Animal Pose Estimation},
    booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
    month = {October},
    year = {2019}
}

Results on AnimalPose validation set (1117 instances)

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50 256x256 0.688 0.945 0.772 0.733 0.952 ckpt log
pose_resnet_101 256x256 0.696 0.948 0.785 0.737 0.954 ckpt log
pose_resnet_152 256x256 0.709 0.948 0.797 0.749 0.951 ckpt log

Topdown Heatmap + Resnet on Atrw

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ATRW (ACM MM'2020)
@inproceedings{li2020atrw,
  title={ATRW: A Benchmark for Amur Tiger Re-identification in the Wild},
  author={Li, Shuyuan and Li, Jianguo and Tang, Hanlin and Qian, Rui and Lin, Weiyao},
  booktitle={Proceedings of the 28th ACM International Conference on Multimedia},
  pages={2590--2598},
  year={2020}
}

Results on ATRW validation set

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50 256x256 0.900 0.973 0.932 0.929 0.985 ckpt log
pose_resnet_101 256x256 0.898 0.973 0.936 0.927 0.985 ckpt log
pose_resnet_152 256x256 0.896 0.973 0.931 0.927 0.985 ckpt log

Topdown Heatmap + Resnet on Fly

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
Vinegar Fly (Nature Methods'2019)
@article{pereira2019fast,
  title={Fast animal pose estimation using deep neural networks},
  author={Pereira, Talmo D and Aldarondo, Diego E and Willmore, Lindsay and Kislin, Mikhail and Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W},
  journal={Nature methods},
  volume={16},
  number={1},
  pages={117--125},
  year={2019},
  publisher={Nature Publishing Group}
}

Results on Vinegar Fly test set

Arch Input Size PCK@0.2 AUC EPE ckpt log
pose_resnet_50 192x192 0.996 0.910 2.00 ckpt log
pose_resnet_101 192x192 0.996 0.912 1.95 ckpt log
pose_resnet_152 192x192 0.997 0.917 1.78 ckpt log

Topdown Heatmap + Resnet on Horse10

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
HRNet (CVPR'2019)
@inproceedings{mathis2021pretraining,
  title={Pretraining boosts out-of-domain robustness for pose estimation},
  author={Mathis, Alexander and Biasi, Thomas and Schneider, Steffen and Yuksekgonul, Mert and Rogers, Byron and Bethge, Matthias and Mathis, Mackenzie W},
  booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
  pages={1859--1868},
  year={2021}
}

Results on Horse-10 test set

Set Arch Input Size PCK@0.3 NME ckpt log
split1 pose_resnet_50 256x256 0.956 0.113 ckpt log
split2 pose_resnet_50 256x256 0.954 0.111 ckpt log
split3 pose_resnet_50 256x256 0.946 0.129 ckpt log
split1 pose_resnet_101 256x256 0.958 0.115 ckpt log
split2 pose_resnet_101 256x256 0.955 0.115 ckpt log
split3 pose_resnet_101 256x256 0.946 0.126 ckpt log
split1 pose_resnet_152 256x256 0.969 0.105 ckpt log
split2 pose_resnet_152 256x256 0.970 0.103 ckpt log
split3 pose_resnet_152 256x256 0.957 0.131 ckpt log

Topdown Heatmap + Resnet on Locust

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
Desert Locust (Elife'2019)
@article{graving2019deepposekit,
  title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
  author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
  journal={Elife},
  volume={8},
  pages={e47994},
  year={2019},
  publisher={eLife Sciences Publications Limited}
}

Results on Desert Locust test set

Arch Input Size PCK@0.2 AUC EPE ckpt log
pose_resnet_50 160x160 0.999 0.899 2.27 ckpt log
pose_resnet_101 160x160 0.999 0.907 2.03 ckpt log
pose_resnet_152 160x160 1.000 0.926 1.48 ckpt log

Topdown Heatmap + Resnet on Macaque

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
MacaquePose (bioRxiv'2020)
@article{labuguen2020macaquepose,
  title={MacaquePose: A novel ‘in the wild’macaque monkey pose dataset for markerless motion capture},
  author={Labuguen, Rollyn and Matsumoto, Jumpei and Negrete, Salvador and Nishimaru, Hiroshi and Nishijo, Hisao and Takada, Masahiko and Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro},
  journal={bioRxiv},
  year={2020},
  publisher={Cold Spring Harbor Laboratory}
}

Results on MacaquePose with ground-truth detection bounding boxes

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50 256x192 0.799 0.952 0.919 0.837 0.964 ckpt log
pose_resnet_101 256x192 0.790 0.953 0.908 0.828 0.967 ckpt log
pose_resnet_152 256x192 0.794 0.951 0.915 0.834 0.968 ckpt log

Topdown Heatmap + Resnet on Zebra

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
Grévy’s Zebra (Elife'2019)
@article{graving2019deepposekit,
  title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
  author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
  journal={Elife},
  volume={8},
  pages={e47994},
  year={2019},
  publisher={eLife Sciences Publications Limited}
}

Results on Grévy’s Zebra test set

Arch Input Size PCK@0.2 AUC EPE ckpt log
pose_resnet_50 160x160 1.000 0.914 1.86 ckpt log
pose_resnet_101 160x160 1.000 0.916 1.82 ckpt log
pose_resnet_152 160x160 1.000 0.921 1.66 ckpt log

Topdown Heatmap + Resnet on Aic

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
AI Challenger (ArXiv'2017)
@article{wu2017ai,
  title={Ai challenger: A large-scale dataset for going deeper in image understanding},
  author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
  journal={arXiv preprint arXiv:1711.06475},
  year={2017}
}

Results on AIC val set with ground-truth bounding boxes

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_101 256x192 0.294 0.736 0.174 0.337 0.763 ckpt log

Topdown Heatmap + Resnet on Coco

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50 256x192 0.718 0.898 0.795 0.773 0.937 ckpt log
pose_resnet_50 384x288 0.731 0.900 0.799 0.783 0.931 ckpt log
pose_resnet_101 256x192 0.726 0.899 0.806 0.781 0.939 ckpt log
pose_resnet_101 384x288 0.748 0.905 0.817 0.798 0.940 ckpt log
pose_resnet_152 256x192 0.735 0.905 0.812 0.790 0.943 ckpt log
pose_resnet_152 384x288 0.750 0.908 0.821 0.800 0.942 ckpt log

Topdown Heatmap + Resnet + Dark on Coco

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
DarkPose (CVPR'2020)
@inproceedings{zhang2020distribution,
  title={Distribution-aware coordinate representation for human pose estimation},
  author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={7093--7102},
  year={2020}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50_dark 256x192 0.724 0.898 0.800 0.777 0.936 ckpt log
pose_resnet_50_dark 384x288 0.735 0.900 0.801 0.785 0.937 ckpt log
pose_resnet_101_dark 256x192 0.732 0.899 0.808 0.786 0.938 ckpt log
pose_resnet_101_dark 384x288 0.749 0.902 0.816 0.799 0.939 ckpt log
pose_resnet_152_dark 256x192 0.745 0.905 0.821 0.797 0.942 ckpt log
pose_resnet_152_dark 384x288 0.757 0.909 0.826 0.806 0.943 ckpt log

Topdown Heatmap + Resnet + Fp16 on Coco

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
FP16 (ArXiv'2017)
@article{micikevicius2017mixed,
  title={Mixed precision training},
  author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
  journal={arXiv preprint arXiv:1710.03740},
  year={2017}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50_fp16 256x192 0.717 0.898 0.793 0.772 0.936 ckpt log

Topdown Heatmap + Resnet on Crowdpose

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
CrowdPose (CVPR'2019)
@article{li2018crowdpose,
  title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
  author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
  journal={arXiv preprint arXiv:1812.00324},
  year={2018}
}

Results on CrowdPose test with YOLOv3 human detector

Arch Input Size AP AP50 AP75 AP (E) AP (M) AP (H) ckpt log
pose_resnet_50 256x192 0.637 0.808 0.692 0.739 0.650 0.506 ckpt log
pose_resnet_101 256x192 0.647 0.810 0.703 0.744 0.658 0.522 ckpt log
pose_resnet_101 320x256 0.661 0.821 0.714 0.759 0.671 0.536 ckpt log
pose_resnet_152 256x192 0.656 0.818 0.712 0.754 0.666 0.532 ckpt log

Topdown Heatmap + Resnet on JHMDB

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
JHMDB (ICCV'2013)
@inproceedings{Jhuang:ICCV:2013,
  title = {Towards understanding action recognition},
  author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
  booktitle = {International Conf. on Computer Vision (ICCV)},
  month = Dec,
  pages = {3192-3199},
  year = {2013}
}

Results on Sub-JHMDB dataset

The models are pre-trained on MPII dataset only. NO test-time augmentation (multi-scale /rotation testing) is used.

  • Normalized by Person Size

Split Arch Input Size Head Sho Elb Wri Hip Knee Ank Mean ckpt log
Sub1 pose_resnet_50 256x256 99.1 98.0 93.8 91.3 99.4 96.5 92.8 96.1 ckpt log
Sub2 pose_resnet_50 256x256 99.3 97.1 90.6 87.0 98.9 96.3 94.1 95.0 ckpt log
Sub3 pose_resnet_50 256x256 99.0 97.9 94.0 91.6 99.7 98.0 94.7 96.7 ckpt log
Average pose_resnet_50 256x256 99.2 97.7 92.8 90.0 99.3 96.9 93.9 96.0 - -
Sub1 pose_resnet_50 (2 Deconv.) 256x256 99.1 98.5 94.6 92.0 99.4 94.6 92.5 96.1 ckpt log
Sub2 pose_resnet_50 (2 Deconv.) 256x256 99.3 97.8 91.0 87.0 99.1 96.5 93.8 95.2 ckpt log
Sub3 pose_resnet_50 (2 Deconv.) 256x256 98.8 98.4 94.3 92.1 99.8 97.5 93.8 96.7 ckpt log
Average pose_resnet_50 (2 Deconv.) 256x256 99.1 98.2 93.3 90.4 99.4 96.2 93.4 96.0 - -
  • Normalized by Torso Size

Split Arch Input Size Head Sho Elb Wri Hip Knee Ank Mean ckpt log
Sub1 pose_resnet_50 256x256 93.3 83.2 74.4 72.7 85.0 81.2 78.9 81.9 ckpt log
Sub2 pose_resnet_50 256x256 94.1 74.9 64.5 62.5 77.9 71.9 78.6 75.5 ckpt log
Sub3 pose_resnet_50 256x256 97.0 82.2 74.9 70.7 84.7 83.7 84.2 82.9 ckpt log
Average pose_resnet_50 256x256 94.8 80.1 71.3 68.6 82.5 78.9 80.6 80.1 - -
Sub1 pose_resnet_50 (2 Deconv.) 256x256 92.4 80.6 73.2 70.5 82.3 75.4 75.0 79.2 ckpt log
Sub2 pose_resnet_50 (2 Deconv.) 256x256 93.4 73.6 63.8 60.5 75.1 68.4 75.5 73.7 ckpt log
Sub3 pose_resnet_50 (2 Deconv.) 256x256 96.1 81.2 72.6 67.9 83.6 80.9 81.5 81.2 ckpt log
Average pose_resnet_50 (2 Deconv.) 256x256 94.0 78.5 69.9 66.3 80.3 74.9 77.3 78.0 - -

Topdown Heatmap + Resnet on MHP

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
MHP (ACM MM'2018)
@inproceedings{zhao2018understanding,
  title={Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing},
  author={Zhao, Jian and Li, Jianshu and Cheng, Yu and Sim, Terence and Yan, Shuicheng and Feng, Jiashi},
  booktitle={Proceedings of the 26th ACM international conference on Multimedia},
  pages={792--800},
  year={2018}
}

Results on MHP v2.0 val set

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_101 256x192 0.583 0.897 0.669 0.636 0.918 ckpt log

Note that, the evaluation metric used here is mAP (adapted from COCO), which may be different from the official evaluation codes. Please be cautious if you use the results in papers.


Topdown Heatmap + Resnet on Mpii

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch Input Size Mean Mean@0.1 ckpt log
pose_resnet_50 256x256 0.882 0.286 ckpt log
pose_resnet_101 256x256 0.888 0.290 ckpt log
pose_resnet_152 256x256 0.889 0.303 ckpt log

Topdown Heatmap + Resnet + Mpii on Mpii_trb

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
MPII-TRB (ICCV'2019)
@inproceedings{duan2019trb,
  title={TRB: A Novel Triplet Representation for Understanding 2D Human Body},
  author={Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and Liu, Wentao and Qian, Chen and Ouyang, Wanli},
  booktitle={Proceedings of the IEEE International Conference on Computer Vision},
  pages={9479--9488},
  year={2019}
}

Results on MPII-TRB val set

Arch Input Size Skeleton Acc Contour Acc Mean Acc ckpt log
pose_resnet_50 256x256 0.887 0.858 0.868 ckpt log
pose_resnet_101 256x256 0.890 0.863 0.873 ckpt log
pose_resnet_152 256x256 0.897 0.868 0.879 ckpt log

Topdown Heatmap + Resnet on Ochuman

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
OCHuman (CVPR'2019)
@inproceedings{zhang2019pose2seg,
  title={Pose2seg: Detection free human instance segmentation},
  author={Zhang, Song-Hai and Li, Ruilong and Dong, Xin and Rosin, Paul and Cai, Zixi and Han, Xi and Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={889--898},
  year={2019}
}

Results on OCHuman test dataset with ground-truth bounding boxes

Following the common setting, the models are trained on COCO train dataset, and evaluate on OCHuman dataset.

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
pose_resnet_50 256x192 0.546 0.726 0.593 0.592 0.755 ckpt log
pose_resnet_50 384x288 0.539 0.723 0.574 0.588 0.756 ckpt log
pose_resnet_101 256x192 0.559 0.724 0.606 0.605 0.751 ckpt log
pose_resnet_101 384x288 0.571 0.715 0.615 0.615 0.748 ckpt log
pose_resnet_152 256x192 0.570 0.725 0.617 0.616 0.754 ckpt log
pose_resnet_152 384x288 0.582 0.723 0.627 0.627 0.752 ckpt log

Topdown Heatmap + Resnet on Posetrack18

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
PoseTrack18 (CVPR'2018)
@inproceedings{andriluka2018posetrack,
  title={Posetrack: A benchmark for human pose estimation and tracking},
  author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={5167--5176},
  year={2018}
}

Results on PoseTrack2018 val with ground-truth bounding boxes

Arch Input Size Head Shou Elb Wri Hip Knee Ankl Total ckpt log
pose_resnet_50 256x192 86.5 87.5 82.3 75.6 79.9 78.6 74.0 81.0 ckpt log

The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.

Results on PoseTrack2018 val with MMDetection pre-trained Cascade R-CNN (X-101-64x4d-FPN) human detector

Arch Input Size Head Shou Elb Wri Hip Knee Ankl Total ckpt log
pose_resnet_50 256x192 78.9 81.9 77.8 70.8 75.3 73.2 66.4 75.2 ckpt log

The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.


Topdown Heatmap + Resnet on Deepfashion

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
DeepFashion (CVPR'2016)
@inproceedings{liuLQWTcvpr16DeepFashion,
 author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
 title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
 booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
 month = {June},
 year = {2016}
}
DeepFashion (ECCV'2016)
@inproceedings{liuYLWTeccv16FashionLandmark,
 author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
 title = {Fashion Landmark Detection in the Wild},
 booktitle = {European Conference on Computer Vision (ECCV)},
 month = {October},
 year = {2016}
 }

Results on DeepFashion val set

Set Arch Input Size PCK@0.2 AUC EPE ckpt log
upper pose_resnet_50 256x256 0.954 0.578 16.8 ckpt log
lower pose_resnet_50 256x256 0.965 0.744 10.5 ckpt log
full pose_resnet_50 256x256 0.977 0.664 12.7 ckpt log

Topdown Heatmap + Resnet on Freihand2d

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
FreiHand (ICCV'2019)
@inproceedings{zimmermann2019freihand,
  title={Freihand: A dataset for markerless capture of hand pose and shape from single rgb images},
  author={Zimmermann, Christian and Ceylan, Duygu and Yang, Jimei and Russell, Bryan and Argus, Max and Brox, Thomas},
  booktitle={Proceedings of the IEEE International Conference on Computer Vision},
  pages={813--822},
  year={2019}
}

Results on FreiHand val & test set

Set Arch Input Size PCK@0.2 AUC EPE ckpt log
val pose_resnet_50 224x224 0.993 0.868 3.25 ckpt log
test pose_resnet_50 224x224 0.992 0.868 3.27 ckpt log

Topdown Heatmap + Resnet on Interhand2d

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
InterHand2.6M (ECCV'2020)
@InProceedings{Moon_2020_ECCV_InterHand2.6M,
author = {Moon, Gyeongsik and Yu, Shoou-I and Wen, He and Shiratori, Takaaki and Lee, Kyoung Mu},
title = {InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image},
booktitle = {European Conference on Computer Vision (ECCV)},
year = {2020}
}

Results on InterHand2.6M val & test set

Train Set Set Arch Input Size PCK@0.2 AUC EPE ckpt log
Human_annot val(M) pose_resnet_50 256x256 0.973 0.828 5.15 ckpt log
Human_annot test(H) pose_resnet_50 256x256 0.973 0.826 5.27 ckpt log
Human_annot test(M) pose_resnet_50 256x256 0.975 0.841 4.90 ckpt log
Human_annot test(H+M) pose_resnet_50 256x256 0.975 0.839 4.97 ckpt log
Machine_annot val(M) pose_resnet_50 256x256 0.970 0.824 5.39 ckpt log
Machine_annot test(H) pose_resnet_50 256x256 0.969 0.821 5.52 ckpt log
Machine_annot test(M) pose_resnet_50 256x256 0.972 0.838 5.03 ckpt log
Machine_annot test(H+M) pose_resnet_50 256x256 0.972 0.837 5.11 ckpt log
All val(M) pose_resnet_50 256x256 0.977 0.840 4.66 ckpt log
All test(H) pose_resnet_50 256x256 0.979 0.839 4.65 ckpt log
All test(M) pose_resnet_50 256x256 0.979 0.838 4.42 ckpt log
All test(H+M) pose_resnet_50 256x256 0.979 0.851 4.46 ckpt log

Topdown Heatmap + Resnet on Onehand10k

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
OneHand10K (TCSVT'2019)
@article{wang2018mask,
  title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
  author={Wang, Yangang and Peng, Cong and Liu, Yebin},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={29},
  number={11},
  pages={3258--3268},
  year={2018},
  publisher={IEEE}
}

Results on OneHand10K val set

Arch Input Size PCK@0.2 AUC EPE ckpt log
pose_resnet_50 256x256 0.989 0.555 25.19 ckpt log

Topdown Heatmap + Resnet on Panoptic2d

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
CMU Panoptic HandDB (CVPR'2017)
@inproceedings{simon2017hand,
  title={Hand keypoint detection in single images using multiview bootstrapping},
  author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
  booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
  pages={1145--1153},
  year={2017}
}

Results on CMU Panoptic (MPII+NZSL val set)

Arch Input Size PCKh@0.7 AUC EPE ckpt log
pose_resnet_50 256x256 0.999 0.713 9.00 ckpt log

Topdown Heatmap + Resnet on Rhd2d

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
RHD (ICCV'2017)
@TechReport{zb2017hand,
  author={Christian Zimmermann and Thomas Brox},
  title={Learning to Estimate 3D Hand Pose from Single RGB Images},
  institution={arXiv:1705.01389},
  year={2017},
  note="https://arxiv.org/abs/1705.01389",
  url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}

Results on RHD test set

Arch Input Size PCK@0.2 AUC EPE ckpt log
pose_hrnetv2_w18_udp 256x256 0.992 0.902 2.21 ckpt log

Topdown Heatmap + Resnet on Coco-Wholebody

SimpleBaseline2D (ECCV'2018)
@inproceedings{xiao2018simple,
  title={Simple baselines for human pose estimation and tracking},
  author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
  booktitle={Proceedings of the European conference on computer vision (ECCV)},
  pages={466--481},
  year={2018}
}
COCO-WholeBody (ECCV'2020)
@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size Body AP Body AR Foot AP Foot AR Face AP Face AR Hand AP Hand AR Whole AP Whole AR ckpt log
pose_resnet_50 256x192 0.652 0.739 0.614 0.746 0.608 0.716 0.460 0.584 0.457 0.578 ckpt log
pose_resnet_50 384x288 0.666 0.747 0.635 0.763 0.732 0.812 0.537 0.647 0.573 0.671 ckpt log
pose_resnet_101 256x192 0.670 0.754 0.640 0.767 0.611 0.723 0.463 0.589 0.533 0.647 ckpt log
pose_resnet_101 384x288 0.692 0.770 0.680 0.798 0.747 0.822 0.549 0.658 0.597 0.692 ckpt log
pose_resnet_152 256x192 0.682 0.764 0.662 0.788 0.624 0.728 0.482 0.606 0.548 0.661 ckpt log
pose_resnet_152 384x288 0.703 0.780 0.693 0.813 0.751 0.825 0.559 0.667 0.610 0.705 ckpt log



DeepPose (CVPR’2014)


Deeppose + Resnet on Coco

DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
COCO (ECCV'2014)
@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset

Arch Input Size AP AP50 AP75 AR AR50 ckpt log
deeppose_resnet_50 256x192 0.526 0.816 0.586 0.638 0.887 ckpt log
deeppose_resnet_101 256x192 0.560 0.832 0.628 0.668 0.900 ckpt log
deeppose_resnet_152 256x192 0.583 0.843 0.659 0.686 0.907 ckpt log

Deeppose + Resnet on Mpii

DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
MPII (CVPR'2014)
@inproceedings{andriluka14cvpr,
  author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
  title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2014},
  month = {June}
}

Results on MPII val set

Arch Input Size Mean Mean@0.1 ckpt log
deeppose_resnet_50 256x256 0.825 0.174 ckpt log
deeppose_resnet_101 256x256 0.841 0.193 ckpt log
deeppose_resnet_152 256x256 0.850 0.198 ckpt log

Deeppose + Resnet + Wingloss on WFLW

DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
Wingloss (CVPR'2018)
@inproceedings{feng2018wing,
  title={Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks},
  author={Feng, Zhen-Hua and Kittler, Josef and Awais, Muhammad and Huber, Patrik and Wu, Xiao-Jun},
  booktitle={Computer Vision and Pattern Recognition (CVPR), 2018 IEEE Conference on},
  year={2018},
  pages ={2235-2245},
  organization={IEEE}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
  title={Look at boundary: A boundary-aware face alignment algorithm},
  author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2129--2138},
  year={2018}
}

Results on WFLW dataset

The model is trained on WFLW train.

Arch Input Size NMEtest NMEpose NMEillumination NMEocclusion NMEblur NMEmakeup NMEexpression ckpt log
deeppose_res50_wingloss 256x256 4.64 8.25 4.59 5.56 5.26 4.59 5.07 ckpt log

Deeppose + Resnet on WFLW

DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
WFLW (CVPR'2018)
@inproceedings{wu2018look,
  title={Look at boundary: A boundary-aware face alignment algorithm},
  author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={2129--2138},
  year={2018}
}

Results on WFLW dataset

The model is trained on WFLW train.

Arch Input Size NMEtest NMEpose NMEillumination NMEocclusion NMEblur NMEmakeup NMEexpression ckpt log
deeppose_res50 256x256 4.85 8.50 4.81 5.69 5.45 4.82 5.20 ckpt log

Deeppose + Resnet on Deepfashion

DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
DeepFashion (CVPR'2016)
@inproceedings{liuLQWTcvpr16DeepFashion,
 author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou},
 title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations},
 booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
 month = {June},
 year = {2016}
}
DeepFashion (ECCV'2016)
@inproceedings{liuYLWTeccv16FashionLandmark,
 author = {Liu, Ziwei and Yan, Sijie and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
 title = {Fashion Landmark Detection in the Wild},
 booktitle = {European Conference on Computer Vision (ECCV)},
 month = {October},
 year = {2016}
 }

Results on DeepFashion val set

Set Arch Input Size PCK@0.2 AUC EPE ckpt log
upper deeppose_resnet_50 256x256 0.965 0.535 17.2 ckpt log
lower deeppose_resnet_50 256x256 0.971 0.678 11.8 ckpt log
full deeppose_resnet_50 256x256 0.983 0.602 14.0 ckpt log

Deeppose + Resnet on Onehand10k

DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
OneHand10K (TCSVT'2019)
@article{wang2018mask,
  title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
  author={Wang, Yangang and Peng, Cong and Liu, Yebin},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  volume={29},
  number={11},
  pages={3258--3268},
  year={2018},
  publisher={IEEE}
}

Results on OneHand10K val set

Arch Input Size PCK@0.2 AUC EPE ckpt log
deeppose_resnet_50 256x256 0.990 0.486 34.28 ckpt log

Deeppose + Resnet on Panoptic2d

DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
CMU Panoptic HandDB (CVPR'2017)
@inproceedings{simon2017hand,
  title={Hand keypoint detection in single images using multiview bootstrapping},
  author={Simon, Tomas and Joo, Hanbyul and Matthews, Iain and Sheikh, Yaser},
  booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
  pages={1145--1153},
  year={2017}
}

Results on CMU Panoptic (MPII+NZSL val set)

Arch Input Size PCKh@0.7 AUC EPE ckpt log
deeppose_resnet_50 256x256 0.999 0.686 9.36 ckpt log

Deeppose + Resnet on Rhd2d

DeepPose (CVPR'2014)
@inproceedings{toshev2014deeppose,
  title={Deeppose: Human pose estimation via deep neural networks},
  author={Toshev, Alexander and Szegedy, Christian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1653--1660},
  year={2014}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
RHD (ICCV'2017)
@TechReport{zb2017hand,
  author={Christian Zimmermann and Thomas Brox},
  title={Learning to Estimate 3D Hand Pose from Single RGB Images},
  institution={arXiv:1705.01389},
  year={2017},
  note="https://arxiv.org/abs/1705.01389",
  url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
}

Results on RHD test set

Arch Input Size PCK@0.2 AUC EPE ckpt log
deeppose_resnet_50 256x256 0.988 0.865 3.29 ckpt log



HMR (CVPR’2018)


HMR + Resnet on Mixed

HMR (CVPR'2018)
@inProceedings{kanazawaHMR18,
  title={End-to-end Recovery of Human Shape and Pose},
  author = {Angjoo Kanazawa
  and Michael J. Black
  and David W. Jacobs
  and Jitendra Malik},
  booktitle={Computer Vision and Pattern Recognition (CVPR)},
  year={2018}
}
ResNet (CVPR'2016)
@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}
Human3.6M (TPAMI'2014)
@article{h36m_pami,
  author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu,  Cristian},
  title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments},
  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  publisher = {IEEE Computer Society},
  volume = {36},
  number = {7},
  pages = {1325-1339},
  month = {jul},
  year = {2014}
}

Results on Human3.6M with ground-truth bounding box having MPJPE-PA of 52.60 mm on Protocol2

Arch Input Size MPJPE (P1) MPJPE-PA (P1) MPJPE (P2) MPJPE-PA (P2) ckpt log
hmr_resnet_50 224x224 80.75 55.08 80.35 52.60 ckpt log