Algorithms ¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

AI Challenger (ArXiv'2017)

@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}

@article{wu2017ai,
  title={Ai challenger: A large-scale dataset for going deeper in image understanding},
  author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
  journal={arXiv preprint arXiv:1711.06475},
  year={2017}
}

Results on AIC validation set without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HigherHRNet-w32	512x512	0.315	0.710	0.243	0.379	0.757	ckpt	log

Results on AIC validation set with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HigherHRNet-w32	512x512	0.323	0.718	0.254	0.379	0.758	ckpt	log

Associative Embedding + Hrnet on Aic¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

AI Challenger (ArXiv'2017)

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

@article{wu2017ai,
  title={Ai challenger: A large-scale dataset for going deeper in image understanding},
  author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
  journal={arXiv preprint arXiv:1711.06475},
  year={2017}
}

Results on AIC validation set without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w32	512x512	0.303	0.697	0.225	0.373	0.755	ckpt	log

Results on AIC validation set with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w32	512x512	0.318	0.717	0.246	0.379	0.764	ckpt	log

Associative Embedding + Higherhrnet on Coco¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HigherHRNet-w32	512x512	0.677	0.870	0.738	0.723	0.890	ckpt	log
HigherHRNet-w32	640x640	0.686	0.871	0.747	0.733	0.898	ckpt	log
HigherHRNet-w48	512x512	0.686	0.873	0.741	0.731	0.892	ckpt	log

Results on COCO val2017 with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HigherHRNet-w32	512x512	0.706	0.881	0.771	0.747	0.901	ckpt	log
HigherHRNet-w32	640x640	0.706	0.880	0.770	0.749	0.902	ckpt	log
HigherHRNet-w48	512x512	0.716	0.884	0.775	0.755	0.901	ckpt	log

Associative Embedding + Hrnet + Udp on Coco¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

UDP (CVPR'2020)

@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w32_udp	512x512	0.671	0.863	0.729	0.717	0.889	ckpt	log
HRNet-w48_udp	512x512	0.681	0.872	0.741	0.725	0.892	ckpt	log

Associative Embedding + Resnet on Coco¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

ResNet (CVPR'2016)

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnet_50	512x512	0.466	0.742	0.479	0.552	0.797	ckpt	log
pose_resnet_50	640x640	0.479	0.757	0.487	0.566	0.810	ckpt	log
pose_resnet_101	512x512	0.554	0.807	0.599	0.622	0.841	ckpt	log
pose_resnet_152	512x512	0.595	0.829	0.648	0.651	0.856	ckpt	log

Results on COCO val2017 with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_resnet_50	512x512	0.503	0.765	0.521	0.591	0.821	ckpt	log
pose_resnet_50	640x640	0.525	0.784	0.542	0.610	0.832	ckpt	log
pose_resnet_101	512x512	0.603	0.831	0.641	0.668	0.870	ckpt	log
pose_resnet_152	512x512	0.660	0.860	0.713	0.709	0.889	ckpt	log

Associative Embedding + Mobilenetv2 on Coco¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

MobilenetV2 (CVPR'2018)

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_mobilenetv2	512x512	0.380	0.671	0.368	0.473	0.741	ckpt	log

Results on COCO val2017 with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
pose_mobilenetv2	512x512	0.442	0.696	0.422	0.517	0.766	ckpt	log

Associative Embedding + Higherhrnet + Udp on Coco¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}

UDP (CVPR'2020)

@InProceedings{Huang_2020_CVPR,
  author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
  title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
  booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2020}
}

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HigherHRNet-w32_udp	512x512	0.678	0.862	0.736	0.724	0.890	ckpt	log
HigherHRNet-w48_udp	512x512	0.690	0.872	0.750	0.734	0.891	ckpt	log

Associative Embedding + Hrnet on Coco¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

@inproceedings{lin2014microsoft,
  title={Microsoft coco: Common objects in context},
  author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle={European conference on computer vision},
  pages={740--755},
  year={2014},
  organization={Springer}
}

Results on COCO val2017 without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w32	512x512	0.654	0.863	0.720	0.710	0.892	ckpt	log
HRNet-w48	512x512	0.665	0.860	0.727	0.716	0.889	ckpt	log

Results on COCO val2017 with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w32	512x512	0.698	0.877	0.760	0.748	0.907	ckpt	log
HRNet-w48	512x512	0.712	0.880	0.771	0.757	0.909	ckpt	log

Associative Embedding + Higherhrnet on Crowdpose¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}

CrowdPose (CVPR'2019)

@article{li2018crowdpose,
  title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
  author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
  journal={arXiv preprint arXiv:1812.00324},
  year={2018}
}

Results on CrowdPose test without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AP (E)	AP (M)	AP (H)	ckpt	log
HigherHRNet-w32	512x512	0.655	0.859	0.705	0.728	0.660	0.577	ckpt	log

Results on CrowdPose test with multi-scale test. 2 scales ([2, 1]) are used

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AP (E)	AP (M)	AP (H)	ckpt	log
HigherHRNet-w32	512x512	0.661	0.864	0.710	0.742	0.670	0.566	ckpt	log

Associative Embedding + Hrnet on MHP¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

@inproceedings{sun2019deep,
  title={Deep high-resolution representation learning for human pose estimation},
  author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5693--5703},
  year={2019}
}

MHP (ACM MM'2018)

@inproceedings{zhao2018understanding,
  title={Understanding humans in crowded scenes: Deep nested adversarial learning and a new benchmark for multi-human parsing},
  author={Zhao, Jian and Li, Jianshu and Cheng, Yu and Sim, Terence and Yan, Shuicheng and Feng, Jiashi},
  booktitle={Proceedings of the 26th ACM international conference on Multimedia},
  pages={792--800},
  year={2018}
}

Results on MHP v2.0 validation set without multi-scale test

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w48	512x512	0.583	0.895	0.666	0.656	0.931	ckpt	log

Results on MHP v2.0 validation set with multi-scale test. 3 default scales ([2, 1, 0.5]) are used

Arch	Input Size	AP	AP⁵⁰	AP⁷⁵	AR	AR⁵⁰	ckpt	log
HRNet-w48	512x512	0.592	0.898	0.673	0.664	0.932	ckpt	log

Associative Embedding + Higherhrnet on Coco-Wholebody¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}

COCO-WholeBody (ECCV'2020)

@inproceedings{cheng2020higherhrnet,
  title={HigherHRNet: Scale-Aware Representation Learning for Bottom-Up Human Pose Estimation},
  author={Cheng, Bowen and Xiao, Bin and Wang, Jingdong and Shi, Honghui and Huang, Thomas S and Zhang, Lei},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={5386--5395},
  year={2020}
}

@inproceedings{jin2020whole,
  title={Whole-Body Human Pose Estimation in the Wild},
  author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
  year={2020}
}

Results on COCO-WholeBody v1.0 val without multi-scale test

Arch	Input Size	Body AP	Body AR	Foot AP	Foot AR	Face AP	Face AR	Hand AP	Hand AR	Whole AP	Whole AR	ckpt	log
HigherHRNet-w32+	512x512	0.590	0.672	0.185	0.335	0.676	0.721	0.212	0.298	0.401	0.493	ckpt	log
HigherHRNet-w48+	512x512	0.630	0.706	0.440	0.573	0.730	0.777	0.389	0.477	0.487	0.574	ckpt	log

Note: + means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.

Associative Embedding + Hrnet on Coco-Wholebody¶

@inproceedings{newell2017associative,
  title={Associative embedding: End-to-end learning for joint detection and grouping},
  author={Newell, Alejandro and Huang, Zhiao and Deng, Jia},
  booktitle={Advances in neural information processing systems},
  pages={2277--2287},
  year={2017}
}