Shortcuts

Source code for mmpose.models.detectors.interhand_3d

# Copyright (c) OpenMMLab. All rights reserved.
import mmcv
import numpy as np
from mmcv.utils.misc import deprecated_api_warning

from mmpose.core import imshow_keypoints, imshow_keypoints_3d
from ..builder import POSENETS
from .top_down import TopDown


[docs]@POSENETS.register_module() class Interhand3D(TopDown): """Top-down interhand 3D pose detector of paper ref: Gyeongsik Moon. "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image". A child class of TopDown detector. """
[docs] def forward(self, img, target=None, target_weight=None, img_metas=None, return_loss=True, **kwargs): """Calls either forward_train or forward_test depending on whether return_loss=True. Note this setting will change the expected inputs. When `return_loss=True`, img and img_meta are single-nested (i.e. Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta should be double nested (i.e. list[Tensor], list[list[dict]]), with the outer list indicating test time augmentations. Note: - batch_size: N - num_keypoints: K - num_img_channel: C (Default: 3) - img height: imgH - img width: imgW - heatmaps height: H - heatmaps weight: W Args: img (torch.Tensor[NxCximgHximgW]): Input images. target (list[torch.Tensor]): Target heatmaps, relative hand root depth and hand type. target_weight (list[torch.Tensor]): Weights for target heatmaps, relative hand root depth and hand type. img_metas (list(dict)): Information about data augmentation By default this includes: - "image_file: path to the image file - "center": center of the bbox - "scale": scale of the bbox - "rotation": rotation of the bbox - "bbox_score": score of bbox - "heatmap3d_depth_bound": depth bound of hand keypoint 3D heatmap - "root_depth_bound": depth bound of relative root depth 1D heatmap return_loss (bool): Option to `return loss`. `return loss=True` for training, `return loss=False` for validation & test. Returns: dict|tuple: if `return loss` is true, then return losses. \ Otherwise, return predicted poses, boxes, image paths, \ heatmaps, relative hand root depth and hand type. """ if return_loss: return self.forward_train(img, target, target_weight, img_metas, **kwargs) return self.forward_test(img, img_metas, **kwargs)
[docs] def forward_test(self, img, img_metas, **kwargs): """Defines the computation performed at every call when testing.""" assert img.size(0) == len(img_metas) batch_size, _, img_height, img_width = img.shape if batch_size > 1: assert 'bbox_id' in img_metas[0] features = self.backbone(img) if self.with_neck: features = self.neck(features) if self.with_keypoint: output = self.keypoint_head.inference_model( features, flip_pairs=None) if self.test_cfg.get('flip_test', True): img_flipped = img.flip(3) features_flipped = self.backbone(img_flipped) if self.with_neck: features_flipped = self.neck(features_flipped) if self.with_keypoint: output_flipped = self.keypoint_head.inference_model( features_flipped, img_metas[0]['flip_pairs']) output = [(out + out_flipped) * 0.5 for out, out_flipped in zip(output, output_flipped)] if self.with_keypoint: result = self.keypoint_head.decode( img_metas, output, img_size=[img_width, img_height]) else: result = {} return result
[docs] @deprecated_api_warning({'pose_limb_color': 'pose_link_color'}, cls_name='Interhand3D') def show_result(self, result, img=None, skeleton=None, kpt_score_thr=0.3, radius=8, bbox_color='green', thickness=2, pose_kpt_color=None, pose_link_color=None, vis_height=400, num_instances=-1, axis_azimuth=-115, win_name='', show=False, wait_time=0, out_file=None): """Visualize 3D pose estimation results. Args: result (list[dict]): The pose estimation results containing: - "keypoints_3d" ([K,4]): 3D keypoints - "keypoints" ([K,3] or [T,K,3]): Optional for visualizing 2D inputs. If a sequence is given, only the last frame will be used for visualization - "bbox" ([4,] or [T,4]): Optional for visualizing 2D inputs - "title" (str): title for the subplot img (str or Tensor): Optional. The image to visualize 2D inputs on. skeleton (list of [idx_i,idx_j]): Skeleton described by a list of links, each is a pair of joint indices. kpt_score_thr (float, optional): Minimum score of keypoints to be shown. Default: 0.3. radius (int): Radius of circles. bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. thickness (int): Thickness of lines. pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None, do not draw keypoints. pose_link_color (np.array[Mx3]): Color of M limbs. If None, do not draw limbs. vis_height (int): The image height of the visualization. The width will be N*vis_height depending on the number of visualized items. num_instances (int): Number of instances to be shown in 3D. If smaller than 0, all the instances in the pose_result will be shown. Otherwise, pad or truncate the pose_result to a length of num_instances. axis_azimuth (float): axis azimuth angle for 3D visualizations. win_name (str): The window name. show (bool): Whether to show the image. Default: False. wait_time (int): Value of waitKey param. Default: 0. out_file (str or None): The filename to write the image. Default: None. Returns: Tensor: Visualized img, only if not `show` or `out_file`. """ if num_instances < 0: assert len(result) > 0 result = sorted(result, key=lambda x: x.get('track_id', 0)) # draw image and 2d poses if img is not None: img = mmcv.imread(img) bbox_result = [] pose_2d = [] for res in result: if 'bbox' in res: bbox = np.array(res['bbox']) if bbox.ndim != 1: assert bbox.ndim == 2 bbox = bbox[-1] # Get bbox from the last frame bbox_result.append(bbox) if 'keypoints' in res: kpts = np.array(res['keypoints']) if kpts.ndim != 2: assert kpts.ndim == 3 kpts = kpts[-1] # Get 2D keypoints from the last frame pose_2d.append(kpts) if len(bbox_result) > 0: bboxes = np.vstack(bbox_result) mmcv.imshow_bboxes( img, bboxes, colors=bbox_color, top_k=-1, thickness=2, show=False) if len(pose_2d) > 0: imshow_keypoints( img, pose_2d, skeleton, kpt_score_thr=kpt_score_thr, pose_kpt_color=pose_kpt_color, pose_link_color=pose_link_color, radius=radius, thickness=thickness) img = mmcv.imrescale(img, scale=vis_height / img.shape[0]) img_vis = imshow_keypoints_3d( result, img, skeleton, pose_kpt_color, pose_link_color, vis_height, axis_limit=300, axis_azimuth=axis_azimuth, axis_elev=15, kpt_score_thr=kpt_score_thr, num_instances=num_instances) if show: mmcv.visualization.imshow(img_vis, win_name, wait_time) if out_file is not None: mmcv.imwrite(img_vis, out_file) return img_vis
Read the Docs v: latest
Versions
latest
1.x
v0.14.0
fix-doc
cn_doc
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.