├── utils
    ├── README.md
    ├── __init__.py
    └── ai_utils.py
├── data_lib
    ├── __init__.py
    ├── dataset_preprocess
    │   ├── __init__.py
    │   ├── count_mean_std.py
    │   ├── gen_dataset_thumbnail.py
    │   └── gen_dataset_txt.py
    ├── dataset_tools.py
    ├── dataset_convert
    │   ├── coco-annotator_2_coco-mmpose.py
    │   ├── eric_yolov3_2_coco-mmdet.py
    │   └── coco-annotator_2_coco-mmdet.py
    └── dataset_vis
    │   └── coco_detect_vis.py
├── hand_lib
    ├── __init__.py
    ├── hand_mesh
    │   ├── __init__.py
    │   └── minimal_hands
    │   │   ├── __init__.py
    │   │   ├── ik_model.py
    │   │   └── kinematics.py
    ├── hand_detector
    │   ├── hand_detecotr_21kp
    │   │   └── __init__.py
    │   ├── hand_detector_d2
    │   │   ├── __init__.py
    │   │   └── hand_detector_d2_api.py
    │   ├── hand_detector_mediapipe
    │   │   ├── __init__.py
    │   │   └── hand_detector_mediapipe.py
    │   ├── hand_detector_yolox
    │   │   ├── __init__.py
    │   │   └── hand_detector_yolox.py
    │   └── __init__.py
    ├── README.md
    └── hand_detect_and_estimate.py
├── mocap_lib
    ├── calibration
    │   ├── __init__.py
    │   ├── example_boards
    │   │   ├── charuco_9x16.yaml
    │   │   ├── intrinsic
    │   │   │   ├── charuco_A1_44_intri_1.yaml
    │   │   │   ├── charuco_A1_88_intri_4.yaml
    │   │   │   └── charuco_A1_88_intri_1.yaml
    │   │   ├── charuco_7x5.yaml
    │   │   ├── charuco_A1_44.yaml
    │   │   └── charuco_A1_88.yaml
    │   ├── calibration_by_multical.sh
    │   ├── gopro_wifi_reader.py
    │   └── calibration_w_human.py
    ├── middleware
    │   ├── __init__.py
    │   └── VMCApi.py
    ├── visualize
    │   ├── __init__.py
    │   └── poseviz_demo
    │   │   ├── __init__.py
    │   │   └── holistic_demo.py
    ├── body_wholebody
    │   ├── __init__.py
    │   ├── mediapipe_holistic.py
    │   └── wholebody_kp_detector_mmpose.py
    ├── triangulate
    │   ├── utils
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── anipose_triangulate.py
    ├── bbox_tracking
    │   ├── __init__.py
    │   └── bbox_tracking.py
    ├── smooth_filter
    │   ├── __init__.py
    │   ├── smooth_filter.py
    │   └── one_euro_api.py
    ├── __init__.py
    ├── skeleton_transfer
    │   ├── __init__.py
    │   ├── openpose_lib.py
    │   └── keypoints_map.py
    ├── body_regress
    │   └── spin_onnx.py
    ├── README.md
    └── get_body_bbox_kps.py
├── body_lib
    ├── body_kp_detector
    │   ├── __init__.py
    │   ├── body_detector_lightweight
    │   │   ├── __init__.py
    │   │   └── body_detector_lightweight_api.py
    │   ├── body_detector_movenet
    │   │   ├── __init__.py
    │   │   └── movenet_api_onnx.py
    │   ├── blazepose_mediapipe
    │   │   ├── __init__.py
    │   │   ├── pose_landmark_origin.py
    │   │   ├── body_bbox_detector.py
    │   │   └── pose_landmark_yolox.py
    │   ├── kp_detector_mmpose.py
    │   └── body_kp_detector_kapao
    │   │   └── body_kp_detector_kapao.py
    ├── __init__.py
    └── body_bbox_detector
    │   ├── __init__.py
    │   └── body_bbox_detector_mmdet.py
├── art_lib
    ├── style_transfer
    │   └── dct_net
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── dct_net.py
    ├── talking_head
    │   ├── __init__.py
    │   ├── wav2lip
    │   │   └── audio_encoder.py
    │   └── sadtalker
    │   │   └── audio_2_pose.py
    ├── README.md
    ├── inpainting
    │   └── lama.py
    └── optical_flow_estimate
    │   └── raft
    │       ├── raft_api.py
    │       └── utils.py
├── sd_lib
    ├── prompt2prompt
    │   └── __init__.py
    ├── ip_adapter
    │   ├── models
    │   │   ├── __init__.py
    │   │   └── resampler.py
    │   └── ip_adapter_api.py
    ├── README.md
    ├── controlnet
    │   ├── utils.py
    │   └── controlnet_api.py
    ├── tagger
    │   └── tagger_api.py
    ├── inversion_api.py
    └── clip_encoder.py
├── requirements.txt
├── gpt_lib
    ├── code_example
    │   ├── huggingface_demo.py
    │   ├── chatglm6b_demo.py
    │   ├── openai_gpt3_demo.py
    │   └── openai_azure_demo.py
    ├── langchain
    │   ├── utils.py
    │   └── model_config.py
    ├── models
    │   ├── llm_base.py
    │   ├── llama.py
    │   └── chatglm_6b.py
    ├── textsplitter
    │   └── chinese_text_splitter.py
    ├── chatglm6b_finetune
    │   ├── tokenize_dataset_rows.py
    │   └── finetune.py
    └── lora_finetune
    │   └── chatglm6b_lora_deepspeed.py
├── audio_lib
    ├── tts
    │   └── bark_example.py
    └── svc
    │   └── sovits_infer.py
├── seg_lib
    ├── README.md
    ├── carvekit
    │   └── carvekit_api.py
    ├── segformer_b2_clothes
    │   └── segformer_api.py
    ├── ppmattingv2
    │   └── ppmattingv2_api.py
    ├── cihp_pgn
    │   └── cihp_pgn_api.py
    └── u2net
    │   ├── u2net_api.py
    │   └── u2net_cloth_api.py
├── math_lib
    ├── affine_matrix.py
    ├── k_means.py
    ├── wrap_affine.py
    └── gaussian_filter.py
├── ocr_lib
    ├── paddle_excel.py
    └── paddle_ocr.py
├── .gitignore
├── SPEEDTABLE.md
└── sr_lab
    └── realesrgan
        └── realesrgan_onnx_api.py


/utils/README.md:
--------------------------------------------------------------------------------
1 | ### Dataset Preprocess
2 | 
3 | - count imgs mean & std
4 | - generate img names from dir to txt
5 | 


--------------------------------------------------------------------------------
/data_lib/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/8/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/hand_lib/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/6/22
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/hand_lib/hand_mesh/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/5/25
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/mocap_lib/middleware/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/mocap_lib/visualize/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/7/21
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/mocap_lib/body_wholebody/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/7/21
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/mocap_lib/triangulate/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/8/8
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/art_lib/style_transfer/dct_net/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2023/1/17
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/hand_lib/hand_mesh/minimal_hands/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/mocap_lib/visualize/poseviz_demo/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detector/hand_detecotr_21kp/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/7/4
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detector/hand_detector_d2/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detector/hand_detector_mediapipe/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detector/hand_detector_yolox/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/body_detector_lightweight/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/body_detector_movenet/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | 


--------------------------------------------------------------------------------
/art_lib/talking_head/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2023/2/22
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .tpsmm.tpsmm import TPSMM, KPDetector
6 | 


--------------------------------------------------------------------------------
/mocap_lib/bbox_tracking/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/8/15
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .bbox_tracking import BboxTracking
6 | 


--------------------------------------------------------------------------------
/mocap_lib/smooth_filter/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/8/16
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .smooth_filter import SmoothFilter
6 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2021/11/19
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .ai_utils import MyTimer, get_path_by_ext, make_random_name
6 | 


--------------------------------------------------------------------------------
/data_lib/dataset_preprocess/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from gen_dataset_txt import gen_txt_from_path
6 | 


--------------------------------------------------------------------------------
/body_lib/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/7/21
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .body_bbox_detector.body_bbox_detector_mmdet import BodyBboxDetector
6 | 


--------------------------------------------------------------------------------
/body_lib/body_bbox_detector/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/7/21
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .body_bbox_detector_mmdet import BodyBboxDetector
6 | 


--------------------------------------------------------------------------------
/mocap_lib/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/3
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .body_wholebody.wholebody_kp_detector_mmpose import BodyWholebodyDetector
6 | 


--------------------------------------------------------------------------------
/mocap_lib/skeleton_transfer/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/8/15
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .cocowholebody_2_openpose import cocowb_2_openpose
6 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detector/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .hand_detector_yolox.hand_detector_yolox import HandDetectorYolox


--------------------------------------------------------------------------------
/sd_lib/prompt2prompt/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2023/9/4
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .ddim_inversion import ddim_inversion, null_optimization, EmptyControl
6 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/blazepose_mediapipe/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/7/21
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | # from pose_landmark_lite_full_heavy import LandmarkDetector


--------------------------------------------------------------------------------
/sd_lib/ip_adapter/models/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2023/8/23
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .ip_adapter import IPAdapter, IPAdapterPlus, ImageProjModel
6 | from .resampler import Resampler
7 | 


--------------------------------------------------------------------------------
/mocap_lib/triangulate/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/8/8
3 | # @Author : ykk648
4 | # @Project : https://github.com/ykk648/AI_power
5 | from .easymocap_triangulate import EasyMocapTriangulate
6 | from .anipose_triangulate import AniposeTriangulate
7 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/example_boards/charuco_9x16.yaml:
--------------------------------------------------------------------------------
 1 | boards:
 2 |   charuco_9x16:
 3 |     _type_: charuco
 4 |     size: [9, 16]
 5 |     aruco_dict: 4X4_1000
 6 | 
 7 |     square_length: 0.0173
 8 |     marker_length: 0.013
 9 | 
10 |     min_rows: 2
11 |     min_points: 10  
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cv2box
 2 | apstone
 3 | numpy
 4 | onnxruntime
 5 | numba==0.54.1
 6 | nvidia_tensorrt==8.0.0.3
 7 | onnx==1.10.1
 8 | scipy==1.6.2
 9 | torchvision==0.9.1+cu111
10 | scikit_image==0.18.3
11 | tqdm==4.61.1
12 | torch==1.8.1+cu111
13 | pycuda==2021.1
14 | matplotlib==3.4.3
15 | Pillow==8.4.0
16 | 


--------------------------------------------------------------------------------
/sd_lib/README.md:
--------------------------------------------------------------------------------
 1 | ## StableDiffusion Lib
 2 | 
 3 | 
 4 | ### CLIP encoder
 5 | 
 6 | - [CLIP encoder](https://huggingface.co/openai/clip-vit-base-patch32) including text&image encoder
 7 | 
 8 | ### IP-Adapter
 9 | 
10 | - [ip_adapter](https://github.com/tencent-ailab/IP-Adapter/blob/main/ip_adapter/ip_adapter.py)
11 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/example_boards/intrinsic/charuco_A1_44_intri_1.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   _type_: 'charuco'
 3 |   size: [4, 4]
 4 |   aruco_dict: '4X4_250'
 5 |   square_length: 0.28
 6 |   marker_length: 0.224
 7 | 
 8 |   min_rows: 2
 9 |   min_points: 4
10 | 
11 | boards:
12 |   charuco_A1_0:
13 |     aruco_offset: 0
14 | 
15 | 


--------------------------------------------------------------------------------
/gpt_lib/code_example/huggingface_demo.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/4/17
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | hugging face use case
 7 | """
 8 | 
 9 | from huggingface_hub import Repository
10 | repo = Repository(local_dir="/mnt/models/hugging_face/Alpaca-CoT", clone_from="QingyiSi/Alpaca-CoT")


--------------------------------------------------------------------------------
/art_lib/README.md:
--------------------------------------------------------------------------------
 1 | ### Inpainting
 2 | 
 3 | - [lama](https://github.com/Sanster/lama-cleaner)
 4 | 
 5 | ### Style Transfer
 6 | 
 7 | - [DctNet](https://www.modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models/summary)
 8 | 
 9 | ### Talking Head
10 | 
11 | - [SadTalker](https://github.com/OpenTalker/SadTalker)
12 | - [TPSMM](https://github.com/yoyo-nb/Thin-Plate-Spline-Motion-Model)
13 | - [Wav2lip](https://github.com/Rudrabha/Wav2Lip)


--------------------------------------------------------------------------------
/mocap_lib/body_regress/spin_onnx.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/4/14
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | import torch
 7 | from apstone import ONNXModel
 8 | 
 9 | # https://github.com/nkolot/SPIN
10 | 
11 | onnx_model_p = 'pretrain_models/body_regressor_spin/body_regressor_spin-eft-agora.onnx'
12 | 
13 | spin = ONNXModel(onnx_model_p)
14 | print(spin.forward(torch.randn(1, 3, 224, 224).numpy()))
15 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/example_boards/intrinsic/charuco_A1_88_intri_4.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   _type_: 'charuco'
 3 |   size: [8, 8]
 4 |   aruco_dict: '4X4_1000'
 5 |   square_length: 0.07
 6 |   marker_length: 0.056
 7 | 
 8 |   min_rows: 1
 9 |   min_points: 6
10 | 
11 | boards:
12 |   charuco_A1_0:
13 |     aruco_offset: 0
14 | 
15 |   charuco_A1_1:
16 |     aruco_offset: 220
17 |     
18 |   charuco_A1_2:
19 |     aruco_offset: 440
20 | 
21 |   charuco_A1_3:
22 |     aruco_offset: 660
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/example_boards/charuco_7x5.yaml:
--------------------------------------------------------------------------------
 1 | boards:
 2 |   charuco_7x5:
 3 |     _type_: charuco
 4 |     size: [7, 5]
 5 |     aruco_dict: 4X4_250
 6 | 
 7 |     square_length: 0.13
 8 |     marker_length: 0.104
 9 | 
10 |     min_rows: 1
11 |     min_points: 6
12 |     
13 | aruco_params:
14 |   adaptiveThreshWinSizeMin: 3
15 |   adaptiveThreshWinSizeMax: 23
16 |   adaptiveThreshWinSizeStep: 1
17 |   minMarkerPerimeterRate: 0.01
18 |   maxMarkerPerimeterRate: 4.0
19 |   perspectiveRemovePixelPerCell: 1
20 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/example_boards/charuco_A1_44.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   _type_: 'charuco'
 3 |   size: [4, 4]
 4 |   aruco_dict: '4X4_250'
 5 |   square_length: 0.28
 6 |   marker_length: 0.224
 7 | 
 8 |   min_rows: 2
 9 |   min_points: 4
10 | 
11 | boards:
12 |   charuco_A1_0:
13 |     aruco_offset: 0
14 | 
15 |   charuco_A1_1:
16 |     aruco_offset: 50
17 |     
18 |   charuco_A1_2:
19 |     aruco_offset: 100
20 | 
21 |   charuco_A1_3:
22 |     aruco_offset: 150
23 | 
24 |   charuco_A1_4:
25 |     aruco_offset: 200
26 | 
27 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/example_boards/charuco_A1_88.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   _type_: 'charuco'
 3 |   size: [8, 8]
 4 |   aruco_dict: '4X4_1000'
 5 |   square_length: 0.07
 6 |   marker_length: 0.056
 7 | 
 8 |   min_rows: 1
 9 |   min_points: 6
10 | 
11 | boards:
12 |   charuco_A1_0:
13 |     aruco_offset: 0
14 | 
15 |   charuco_A1_1:
16 |     aruco_offset: 220
17 |     
18 |   charuco_A1_2:
19 |     aruco_offset: 440
20 | 
21 |   charuco_A1_3:
22 |     aruco_offset: 660
23 | 
24 |   charuco_A1_4:
25 |     aruco_offset: 880
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/example_boards/intrinsic/charuco_A1_88_intri_1.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |   _type_: 'charuco'
 3 |   size: [8, 8]
 4 |   aruco_dict: '4X4_1000'
 5 |   square_length: 0.07
 6 |   marker_length: 0.056
 7 | 
 8 |   min_rows: 1
 9 |   min_points: 6
10 | 
11 | boards:
12 |   charuco_A1_0:
13 |     aruco_offset: 0
14 | 
15 | 
16 | #aruco_params:
17 | #  adaptiveThreshWinSizeMin: 3
18 | #  adaptiveThreshWinSizeMax: 23
19 | #  adaptiveThreshWinSizeStep: 1
20 | #  minMarkerPerimeterRate: 0.01
21 | #  maxMarkerPerimeterRate: 4.0
22 | #  perspectiveRemovePixelPerCell: 1
23 | 


--------------------------------------------------------------------------------
/gpt_lib/code_example/chatglm6b_demo.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/4/12
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from transformers import AutoTokenizer, AutoModel
 6 | 
 7 | tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
 8 | model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
 9 | model = model.eval()
10 | response, history = model.chat(tokenizer, "你好", history=[])
11 | print(response)
12 | response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history)
13 | print(response)


--------------------------------------------------------------------------------
/gpt_lib/langchain/utils.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/9
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import torch
 6 | 
 7 | def torch_gc():
 8 |     if torch.cuda.is_available():
 9 |         # with torch.cuda.device(DEVICE):
10 |         torch.cuda.empty_cache()
11 |         torch.cuda.ipc_collect()
12 |     elif torch.backends.mps.is_available():
13 |         try:
14 |             from torch.mps import empty_cache
15 |             empty_cache()
16 |         except Exception as e:
17 |             print(e)
18 |             print("如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本，以支持及时清理 torch 产生的内存占用。")


--------------------------------------------------------------------------------
/hand_lib/README.md:
--------------------------------------------------------------------------------
 1 | ### Hand Detect
 2 | 
 3 | - hand detector d2 (from [detector.d2](https://github.com/ddshan/hand_detector.d2) based detectron2)
 4 | - hand detector from [mediapipe](https://github.com/google/mediapipe)
 5 | - hand detector based by yolox (self-trained by [mmdetection](https://github.com/ykk648/mmdetection))
 6 | 
 7 | ### Hand Mesh Recovery
 8 | 
 9 | - IK model from [minimal-hands](https://github.com/MengHao666/Minimal-Hand-pytorch)
10 | 
11 | ### Hand Regress
12 | 
13 | - H3DWModel from [frankmocap](https://github.com/facebookresearch/frankmocap/blob/bb05b851bc3f1e27a55fd15e9f46093e7c05fc12/handmocap/hand_mocap_api.py#L44) (convert 2 onnx)


--------------------------------------------------------------------------------
/audio_lib/tts/bark_example.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/4/27
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | https://github.com/suno-ai/bark
 7 | """
 8 | from bark import SAMPLE_RATE, generate_audio, preload_models
 9 | from IPython.display import Audio
10 | 
11 | # download and load all models
12 | preload_models()
13 | 
14 | # generate audio from text
15 | text_prompt = """
16 |      Hello, my name is Suno. And, uh — and I like pizza. [laughs] 
17 |      But I also have other interests such as playing tic tac toe.
18 | """
19 | audio_array = generate_audio(text_prompt)
20 | 
21 | # play text in notebook
22 | Audio(audio_array, rate=SAMPLE_RATE)


--------------------------------------------------------------------------------
/seg_lib/README.md:
--------------------------------------------------------------------------------
 1 | ## Segmentation Lib
 2 | 
 3 | 
 4 | ### carvekit
 5 | 
 6 | - [image-background-remove-tool](https://github.com/OPHoperHPO/image-background-remove-tool) API example, for cloth seg.
 7 | 
 8 | ### cihp_pgn
 9 | 
10 | - [CIHP_PGN](https://github.com/Engineering-Course/CIHP_PGN) Human segmentation, model converted to onnx
11 | 
12 | ### u2net
13 | 
14 | - [rembg](https://github.com/danielgatis/rembg) 
15 | - [cv_u2net_salient-detection](https://www.modelscope.cn/models/damo/cv_u2net_salient-detection/summary) 
16 | 
17 | ### ppmattingv2
18 | 
19 | - [pp_mattingv2](https://github.com/jiachen0212/pp_mattingv2)
20 | 
21 | ### RAFT
22 | 
23 | - [RAFT](https://github.com/princeton-vl/RAFT)
24 | 


--------------------------------------------------------------------------------
/data_lib/dataset_tools.py:
--------------------------------------------------------------------------------
 1 | from data_lib.dataset_preprocess import gen_txt_from_path
 2 | 
 3 | """
 4 | generate dataset list as follow formats:
 5 | |-- dataset
 6 |     |-- train
 7 |         |--label1
 8 |             |--dataset prefix 1
 9 |                 |--*.jpg
10 |             |--dataset prefix 2
11 |                 |--*.jpg
12 |         |--label2
13 |             |--dataset prefix
14 |                 |--*.jpg
15 |         |--label
16 |             |--dataset prefix
17 |                 |--*.jpg
18 |         ...
19 | 
20 |     |-- val
21 |         |--*.jpg
22 |     |--train.txt
23 |     |--val.txt
24 | """
25 | 
26 | if __name__ == '__main__':
27 |     test_path = ''
28 |     # count_mean_std(test_path)
29 |     gen_txt_from_path(test_path, img_format='jpg', train_ratio=0.8)
30 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/calibration_by_multical.sh:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/5/16
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | # install
 7 | pip install multical
 8 | 
 9 | # separate
10 | multical intrinsic --image_path ./ --boards ./example_boards/intrinsic/charuco_A1_88_intri_4.yaml
11 | multical calibrate --image_path ./ --calibration ./intrinsic.json --fix_intrinsic --boards ./example_boards/charuco_A1_44.yaml
12 | 
13 | # generate board
14 | multical boards --boards ./example_boards/charuco_A1_44.yaml --paper_size A1 --pixels_mm 10 --write my_images
15 | 
16 | # intrinsic and extrinsic
17 | multical calibrate --image_path ./ --boards ./example_boards/charuco_A1_44.yaml --limit_images 200 --fix_aspect
18 | multical vis --workspace_file calibration.pkl
19 | 


--------------------------------------------------------------------------------
/sd_lib/controlnet/utils.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/10/23
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import numpy as np
 6 | import torch
 7 | from cv2box import CVImage
 8 | 
 9 | 
10 | def make_inpaint_condition(image_p, image_mask_p):
11 |     image = CVImage(image_p).pillow()
12 |     image_mask = CVImage(image_mask_p).pillow()
13 |     image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
14 |     image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
15 |     assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
16 |     image[image_mask > 0.5] = -1.0  # set as masked pixel
17 |     image = np.expand_dims(image, 0).transpose((0, 3, 1, 2))
18 |     image = torch.from_numpy(image)
19 |     return image
20 | 


--------------------------------------------------------------------------------
/art_lib/style_transfer/dct_net/utils.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/1/17
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import numpy as np
 6 | import cv2
 7 | 
 8 | 
 9 | def resize_size(image, size=720):
10 |     h, w, c = np.shape(image)
11 |     if min(h, w) > size:
12 |         if h > w:
13 |             h, w = int(size * h / w), size
14 |         else:
15 |             h, w = size, int(size * w / h)
16 |     image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
17 |     return image
18 | 
19 | 
20 | def padTo16x(image):
21 |     h, w, c = np.shape(image)
22 |     if h % 16 == 0 and w % 16 == 0:
23 |         return image, h, w
24 |     nh, nw = (h // 16 + 1) * 16, (w // 16 + 1) * 16
25 |     img_new = np.ones((nh, nw, 3), np.uint8) * 255
26 |     img_new[:h, :w, :] = image
27 | 
28 |     return img_new, h, w
29 | 


--------------------------------------------------------------------------------
/math_lib/affine_matrix.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/11/11
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import numpy as np
 6 | 
 7 | 
 8 | def inverse_cv_affine(mat):
 9 |     """
10 |     similar to mat_rev = cv2.invertAffineTransform(mat)
11 |     Args:
12 |         mat:
13 |     Returns:
14 |     """
15 |     # inverse the Affine transformation matrix
16 |     mat_rev = np.zeros([2, 3])
17 |     div1 = mat[0][0] * mat[1][1] - mat[0][1] * mat[1][0]
18 |     mat_rev[0][0] = mat[1][1] / div1
19 |     mat_rev[0][1] = -mat[0][1] / div1
20 |     mat_rev[0][2] = -(mat[0][2] * mat[1][1] - mat[0][1] * mat[1][2]) / div1
21 |     div2 = mat[0][1] * mat[1][0] - mat[0][0] * mat[1][1]
22 |     mat_rev[1][0] = mat[1][0] / div2
23 |     mat_rev[1][1] = -mat[0][0] / div2
24 |     mat_rev[1][2] = -(mat[0][2] * mat[1][0] - mat[0][0] * mat[1][2]) / div2
25 | 


--------------------------------------------------------------------------------
/mocap_lib/README.md:
--------------------------------------------------------------------------------
 1 | ## Mocap Lib
 2 | 
 3 | ### Body Regress
 4 | 
 5 | - [SPIN](https://github.com/open-mmlab/mmhuman3d/tree/main/configs/spin/) onnx model 
 6 | 
 7 | ### Whole Body Keypoints Detect
 8 | 
 9 | - mediapipe wrapper / [mmpose](https://github.com/open-mmlab/mmpose) model support
10 | 
11 | ### Calibration
12 | 
13 | - [multical](https://github.com/oliver-batchelor/multical)
14 | 
15 | ### Middleware
16 | 
17 | - [VMC](https://protocol.vmc.info/) protocol demo.
18 | 
19 | ### Smooth Filter
20 | 
21 | - [SmoothNet](https://github.com/cure-lab/SmoothNet)
22 | - OneEuro
23 | 
24 | ### Triangulation
25 | 
26 | - [anipose method](https://github.com/lambdaloop/anipose)
27 | - [easymocap method](https://github.com/zju3dv/EasyMocap)
28 | - [pose2sim method](https://github.com/perfanalytics/pose2sim/blob/main/Pose2Sim/triangulate_3d.py)
29 | 
30 | ### Visualize
31 | 
32 | - [poseviz](https://github.com/isarandi/poseviz) demo for mediapipe.


--------------------------------------------------------------------------------
/ocr_lib/paddle_excel.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/6/12
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import cv2
 6 | from paddleocr import PPStructure, draw_structure_result, save_structure_res
 7 | import os
 8 | 
 9 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
10 | table_engine = PPStructure(show_log=True)
11 | 
12 | save_folder = 'output'
13 | img_path = ''
14 | img = cv2.imread(img_path)
15 | result = table_engine(img)
16 | save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0])
17 | 
18 | for line in result:
19 |     line.pop('img')
20 |     print(line)
21 | 
22 | from PIL import Image
23 | 
24 | font_path = './pretrain_models/ocr_lib/simfang.ttf'  # PaddleOCR下提供字体包
25 | image = Image.open(img_path).convert('RGB')
26 | im_show = draw_structure_result(image, result, font_path=font_path)
27 | im_show = Image.fromarray(im_show)
28 | im_show.save('result_structure.jpg')


--------------------------------------------------------------------------------
/math_lib/k_means.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/13
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | 
 9 | def cal_dis(points, centroids, k):
10 |     cal_dis_res = []
11 |     for point in points:
12 |         dis = np.linalg.norm(np.tile(point, (k, 1)) - centroids)
13 |         cal_dis_res.append(dis)
14 |     return cal_dis_res
15 | 
16 | 
17 | def update_centroids(points, centroids, k):
18 |     cal_dis_list = cal_dis(points, centroids, k)
19 |     min_cal_dis_list = np.argmin(cal_dis_list, axis=1)
20 |     new_centroids = pd.DataFrame(points).groupby(min_cal_dis_list).mean()
21 |     diff = new_centroids - centroids
22 |     return new_centroids, diff
23 | 
24 | 
25 | def k_means(points, k):
26 |     centroids = points.sample(k)
27 |     # use min diff or optim fix rounds
28 |     for i in range(100):
29 |         centroids, _ = update_centroids(points, centroids, k)
30 |     return centroids
31 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/gopro_wifi_reader.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/5/23
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | # This is the script without the need of a FFmpeg installation, pure OpenCV
 7 | # This is not useful for image processing (eg: find faces) as there will be more lag, around 6 seconds added.
 8 | import cv2
 9 | import numpy as np
10 | from time import time
11 | import socket
12 | from goprocam import GoProCamera
13 | from goprocam import constants
14 | gpCam = GoProCamera.GoPro()
15 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
16 | t = time()
17 | gpCam.livestream("start")
18 | cap = cv2.VideoCapture("udp://10.5.5.9:8554")
19 | while True:
20 |     nmat, frame = cap.read()
21 |     cv2.imshow("GoPro OpenCV", frame)
22 |     if cv2.waitKey(1) & 0xFF == ord('q'):
23 |         break
24 |     if time() - t >= 2.5:
25 |         sock.sendto("_GPHD_:0:0:2:0.000000\n".encode(), ("10.5.5.9", 8554))
26 |         t = time()
27 | 
28 | cap.release()
29 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/mocap_lib/skeleton_transfer/openpose_lib.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/7/25
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | mediapipe33_to_openpose25 = [0, 0, 12, 14, 16, 11, 13, 15, 0, 24, 26, 28, 23, 25, 27, 5, 2, 8, 7, 31, 31, 29,
 7 |                            32, 32, 30]
 8 | 
 9 | alphapose17_to_openpose25 = [9, 8, 14, 15, 16, 11, 12, 13, 0, 1, 2, 3, 4, 5, 6, -1, -1, -1,
10 |                              -1, -1, -1, -1, -1, -1, -1, ]
11 | 
12 | 
13 | class Openpose25:
14 |     def __init__(self, poses=None):
15 |         self.poses = poses
16 | 
17 |     def from_mediapipe_33(self, poses):
18 |         """
19 | 
20 |         Args:
21 |             poses: 33 * 3
22 | 
23 |         Returns: 25 * 3
24 | 
25 |         """
26 |         poses = poses[mediapipe33_to_openpose25]
27 |         poses[8, :2] = poses[[9, 12], :2].mean(axis=0)
28 |         poses[8, 2] = poses[[9, 12], 2].min(axis=0)
29 |         poses[1, :2] = poses[[2, 5], :2].mean(axis=0)
30 |         poses[1, 2] = poses[[2, 5], 2].min(axis=0)
31 |         return poses
32 | 


--------------------------------------------------------------------------------
/art_lib/talking_head/wav2lip/audio_encoder.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/7/25
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import os
 6 | # import numba
 7 | import numpy as np
 8 | from apstone import ModelBase
 9 | from cv2box import CVImage
10 | from scipy.spatial import ConvexHull
11 | import cv2
12 | """
13 | input_name:['input_1'], shape:[[1, 32, 1, 80, 16]]
14 | output_name:['output_1'], shape:[[1, 32, 512]]
15 | """
16 | 
17 | MODEL_ZOO = {
18 |     # 32 frame , mel spectrogram
19 |     # input_name: ['input_1'], shape: [[1, 32, 1, 80, 16]]
20 |     # output_name: ['output_1'], shape: [[1, 32, 512]]
21 |     'audio_encoder': {
22 |         'model_path': 'pretrain_models/talking_head/wav2lip/audio_encoder.onnx',
23 |     },
24 | }
25 | 
26 | 
27 | class Audio2PoseDecoder(ModelBase):
28 |     def __init__(self, model_type='audio_encoder', provider='cpu'):
29 |         super().__init__(MODEL_ZOO[model_type], provider)
30 |         self.model_type = model_type
31 | 
32 |     def forward(self, img_source, img_driving, pass_drive_kp=False):
33 |         pass


--------------------------------------------------------------------------------
/ocr_lib/paddle_ocr.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/23
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | https://github.com/PaddlePaddle/PaddleOCR
 7 | """
 8 | from paddleocr import PaddleOCR, draw_ocr
 9 | 
10 | # Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
11 | # 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
12 | ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
13 | img_path = ''
14 | result = ocr.ocr(img_path, cls=True)
15 | for idx in range(len(result)):
16 |     res = result[idx]
17 |     for line in res:
18 |         print(line)
19 | 
20 | # 显示结果
21 | # 如果本地没有simfang.ttf，可以在doc/fonts目录下下载
22 | from PIL import Image
23 | result = result[0]
24 | image = Image.open(img_path).convert('RGB')
25 | boxes = [line[0] for line in result]
26 | txts = [line[1][0] for line in result]
27 | scores = [line[1][1] for line in result]
28 | im_show = draw_ocr(image, boxes, txts, scores, font_path='./pretrain_models/ocr_lib/simfang.ttf')
29 | im_show = Image.fromarray(im_show)
30 | im_show.save('result2.jpg')
31 | 


--------------------------------------------------------------------------------
/utils/ai_utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import PIL
 3 | import torch
 4 | import torch.nn.functional as F
 5 | import time
 6 | import numpy as np
 7 | import uuid
 8 | from pathlib import Path
 9 | import pickle
10 | 
11 | 
12 | def load_checkpoint(model, filename):
13 |     return model.load_state_dict(torch.load(filename))
14 | 
15 | 
16 | def make_random_name(f_name):
17 |     return uuid.uuid4().hex + '.' + f_name.split('.')[-1]
18 | 
19 | 
20 | def down_sample(target_, size):
21 |     return F.interpolate(target_, size=size, mode='bilinear', align_corners=True)
22 | 
23 | 
24 | class MyTimer(object):
25 |     """
26 |     timer
27 |     """
28 | 
29 |     def __enter__(self):
30 |         self.t0 = time.time()
31 | 
32 |     def __exit__(self, exc_type, exc_val, exc_tb):
33 |         print('[finished, spent time: {time:.2f}s]'.format(time=time.time() - self.t0))
34 | 
35 | 
36 | def get_path_by_ext(this_dir, ext_list=None):
37 |     if ext_list is None:
38 |         print('Use image ext as default !')
39 |         ext_list = [".jpg", ".png", ".JPG", ".webp", ".jpeg"]
40 |     return [p for p in Path(this_dir).rglob('*') if p.suffix in ext_list]
41 | 


--------------------------------------------------------------------------------
/art_lib/talking_head/sadtalker/audio_2_pose.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | # import numba
 3 | import numpy as np
 4 | from apstone import ModelBase
 5 | from cv2box import CVImage
 6 | from scipy.spatial import ConvexHull
 7 | import cv2
 8 | 
 9 | """
10 | ref https://github.com/OpenTalker/SadTalker/blob/main/src/audio2pose_models/cvae.py
11 | """
12 | 
13 | MODEL_ZOO = {
14 |     # input: 32frames z(random) class(style) ref(reference coeff) audio_emb(audio feature)
15 |     # input_name: ['input_1', 'input_2', 'input_3', 'input_4'], shape: [[1, 6], [1], [1, 64], [1, 32, 512]]
16 |     # output: pose_motion_pred
17 |     # output_name: ['output_1'], shape: [[1, 32, 6]]
18 |     'audio2pose_decoder': {
19 |         'model_path': 'pretrain_models/art_lib/talking_head/sadtalker/audio_2_pose.onnx',
20 |     },
21 | }
22 | 
23 | 
24 | class Audio2PoseDecoder(ModelBase):
25 |     def __init__(self, model_type='audio2pose_decoder', provider='cpu'):
26 |         super().__init__(MODEL_ZOO[model_type], provider)
27 |         self.model_type = model_type
28 | 
29 |     def forward(self, img_source, img_driving, pass_drive_kp=False):
30 |         pass
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     pass
35 | 


--------------------------------------------------------------------------------
/data_lib/dataset_preprocess/count_mean_std.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import os
 4 | from tqdm import tqdm
 5 | from utils import get_path_by_ext
 6 | 
 7 | 
 8 | def count_mean_std(img_dir_path):
 9 |     # path = img_path
10 |     means = [0, 0, 0]
11 |     stdevs = [0, 0, 0]
12 | 
13 |     # index = 1
14 |     num_imgs = 0
15 |     # img_names = os.listdir(path)
16 |     for img_path in tqdm(get_path_by_ext(img_dir_path)):
17 |         num_imgs += 1
18 |         # print(img_name)
19 |         img = cv2.imread(str(img_path))
20 |         img = np.asarray(img)
21 |         img = img.astype(np.float32)  # / 255.
22 |         for i in range(3):
23 |             means[i] += img[:, :, i].mean()
24 |             stdevs[i] += img[:, :, i].std()
25 |     # print(num_imgs)
26 |     means.reverse()
27 |     stdevs.reverse()
28 | 
29 |     means = np.asarray(means) / num_imgs
30 |     stdevs = np.asarray(stdevs) / num_imgs
31 | 
32 |     print("normMean = {}".format(means))
33 |     print("normStd = {}".format(stdevs))
34 |     print('transforms.Normalize(normMean={},normStd = {})'.format(means, stdevs).replace(' ', ','))
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     count_mean_std('')
39 | 


--------------------------------------------------------------------------------
/data_lib/dataset_convert/coco-annotator_2_coco-mmpose.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/8/29
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from cv2box import CVFile
 7 | from tqdm import tqdm
 8 | 
 9 | 
10 | def get_coco_bbox_gt(json_in_, json_out_):
11 |     json_data = CVFile(json_in_).data
12 | 
13 |     out_list = []
14 |     for i in tqdm(range(len(json_data['annotations']))):
15 |         dummy = json_data['annotations'][i]
16 |         if dummy['category_id'] == 1:
17 |             out_list.append({
18 |                 'bbox': dummy['bbox'],
19 |                 'category_id': dummy['category_id'],
20 |                 'image_id': dummy['image_id'],
21 |                 'score': 1.0,
22 |             })
23 |     print(len(out_list))
24 |     CVFile(json_out_).json_write(out_list)
25 | 
26 | 
27 | def del_other_category(json_in_, json_out_):
28 |     json_data = CVFile(json_in_).data
29 |     out_data = json_data.copy()
30 |     out_data['annotations'] = []
31 |     print(len(json_data['annotations']))
32 |     for i in tqdm(range(len(json_data['annotations']))):
33 |         dummy = json_data['annotations'][i]
34 |         if dummy['category_id'] == 1 and 'keypoints' in dummy.keys():
35 |             out_data['annotations'].append(dummy)
36 | 
37 |     print(len(out_data['annotations']))
38 |     CVFile(json_out_).json_write(out_data)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     json_in = ''
43 |     json_out = ''
44 |     # get_coco_bbox_gt(json_in, json_out)
45 |     del_other_category(json_in, json_out)
46 | 


--------------------------------------------------------------------------------
/gpt_lib/code_example/openai_gpt3_demo.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/2/6
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | import os
 7 | import openai as ai
 8 | 
 9 | 
10 | # Get the key from an environment variable on the machine it is running on
11 | # ai.api_key = os.environ.get("OPENAI_API_KEY")
12 | 
13 | 
14 | def generate_gpt3_response(user_text, print_output=False):
15 |     """
16 |     Query OpenAI GPT-3 for the specific key and get back a response
17 |     :type user_text: str the user's text to query for
18 |     :type print_output: boolean whether or not to print the raw output JSON
19 |     """
20 |     completions = ai.Completion.create(
21 |         engine='text-davinci-003',  # Determines the quality, speed, and cost.
22 |         temperature=0.5,  # Level of creativity in the response
23 |         prompt=user_text,  # What the user typed in
24 |         max_tokens=3500,  # Maximum tokens in the prompt AND response
25 |         n=1,  # The number of completions to generate
26 |         stop=None,  # An optional setting to control response generation
27 |     )
28 | 
29 |     # Displaying the output can be helpful if things go wrong
30 |     if print_output:
31 |         print(completions)
32 | 
33 |     # Return the first choice's text
34 |     return completions.choices[0].text
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     os.environ.setdefault("OPENAI_API_KEY", '')
39 |     ai.api_key = os.environ.get("OPENAI_API_KEY")
40 | 
41 |     text = '写一篇乔·拜登的演讲稿'
42 |     results = generate_gpt3_response(text)
43 |     print(results)
44 |     # print(results.decode())
45 | 


--------------------------------------------------------------------------------
/hand_lib/hand_mesh/minimal_hands/ik_model.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/2/14
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from apstone import ONNXModel
 6 | import numpy as np
 7 | from hand_lib.hand_mesh.minimal_hands.kinematics import xyz_to_delta, MPIIHandJoints, mano_to_mpii
 8 | from cv2box import CVFile
 9 | 
10 | IK_UNIT_LENGTH = 0.09473151311686484
11 | mano_ref_xyz = CVFile('pretrain_models/digital_human/minimal_hands/hand_mesh_model.pkl').data['joints']
12 | # convert the kinematic definition to MPII style, and normalize it
13 | mpii_ref_xyz = mano_to_mpii(mano_ref_xyz) / IK_UNIT_LENGTH
14 | mpii_ref_xyz -= mpii_ref_xyz[9:10]
15 | # get bone orientations in the reference pose
16 | mpii_ref_delta, mpii_ref_length = xyz_to_delta(mpii_ref_xyz, MPIIHandJoints)
17 | mpii_ref_delta = mpii_ref_delta * mpii_ref_length
18 | 
19 | 
20 | class IKModel:
21 |     def __init__(self):
22 |         self.ik_model = ONNXModel('pretrain_models/digital_human/minimal_hands/iknet/iknet.onnx')
23 | 
24 |     def forward_np(self, hand_np):
25 |         # xyz = np.array(hand_np)
26 |         delta, length = xyz_to_delta(hand_np, MPIIHandJoints)
27 |         delta *= length
28 |         pack = np.concatenate(
29 |             [hand_np, delta, mpii_ref_xyz, mpii_ref_delta], 0
30 |         )
31 |         return self.forward(pack)
32 | 
33 |     def forward(self, pack):
34 |         pack = np.expand_dims(pack, 0)
35 |         theta = self.ik_model.forward(pack.astype(np.float32))[0]
36 |         # theta_mano = mpii_to_mano(theta)
37 |         if len(theta.shape) == 3:
38 |             theta = theta[0]
39 |         return theta
40 | 


--------------------------------------------------------------------------------
/math_lib/wrap_affine.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/12/28
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import cv2
 6 | import numpy as np
 7 | from cv2box import CVImage, MyTimer
 8 | import cvcuda
 9 | import nvcv
10 | import torch
11 | 
12 | # opencv default version
13 | img_p = ''
14 | img = CVImage(img_p).bgr
15 | crop_size = 256
16 | mat_ = np.array([[1.77893761e-01, 2.47390154e-03, -9.42742635e+01], [-2.47390154e-03, 1.77893761e-01, -3.40511541e+01]])
17 | mat_rev = cv2.invertAffineTransform(mat_)
18 | with MyTimer() as mfc:
19 |     for i in range(10000):  # 31.5 fps
20 |         warped = cv2.warpAffine(img, mat_, (crop_size, crop_size), borderValue=0.0)
21 | CVImage(warped).show()
22 | 
23 | 
24 | # opencv cuda version
25 | image_tensors = torch.tensor(CVImage(img_p).bgr).unsqueeze(0).cuda()
26 | # print(image_tensors)
27 | print(image_tensors.size())
28 | cvcuda_input_tensor = cvcuda.as_tensor(image_tensors, "NHWC")
29 | cvcuda_output_tensor = cvcuda.Tensor([1, 256, 256, 3], np.uint8, "NHWC")
30 | print(cvcuda_input_tensor.shape)
31 | cvcuda_affine_tensor = cvcuda.warp_affine_into(src=cvcuda_input_tensor, dst=cvcuda_output_tensor, xform=mat_rev,
32 |                                                flags=cvcuda.Interp.LINEAR, border_mode=cvcuda.Border.CONSTANT,
33 |                                                border_value=[0])
34 | print(cvcuda_affine_tensor.shape)
35 | print(type(cvcuda_affine_tensor))
36 | print(np.array(cvcuda_affine_tensor))
37 | 
38 | # torch.tensor(cvcuda_output_tensor.cuda()).data_ptr()
39 | img_out = cvcuda.as_image(cvcuda_output_tensor.cuda(), format=cvcuda.Format.BGR8)
40 | img_out = img_out.cpu()
41 | print(img_out.shape)
42 | CVImage(img_out).show()
43 | 


--------------------------------------------------------------------------------
/gpt_lib/code_example/openai_azure_demo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import openai  # 需要pip安装
 3 | import prompt_toolkit  # 需要额外安装这个库，用于命令行交互
 4 | 
 5 | openai.api_type = "azure"
 6 | openai.api_base = "https://shanghai-free-test.openai.azure.com/"  # 这里需要根据自己的资源进行更改
 7 | # openai.api_version = "2022-12-01"
 8 | openai.api_version = "2023-03-15-preview"
 9 | 
10 | # 配置OpenAI API密钥
11 | openai.api_key = ''  # 这里根据自己的API KEY更改
12 | # 设定OpenAI的模型和引擎
13 | model_engine = "gpt-35-turbo"  # 这里就是创建的模型名称更改
14 | prompt_prefix = "我: "
15 | response_prefix = "AI: "
16 | 
17 | # import requests
18 | # url = openai.api_base + "/openai/deployments?api-version=2022-12-01"
19 | # r = requests.get(url, headers={"api-key": openai.api_key})
20 | # print(r.text)
21 | 
22 | # 定义一个函数，用于向OpenAI API发送请求并返回结果
23 | def generate_response(prompt):
24 |     response = openai.Completion.create(
25 |         engine=model_engine,
26 |         prompt=prompt,
27 |         max_tokens=1024,
28 |         n=1,
29 |         stop=["\n"],
30 |         temperature=0,
31 |     )
32 |     message = response.choices[0].text
33 |     return message.strip()
34 | 
35 | 
36 | # 通过Prompt Toolkit库来实现命令行交互
37 | def prompt_user():
38 |     while True:
39 |         try:
40 |             # 读取用户输入的信息
41 |             user_input = prompt_toolkit.prompt(prompt_prefix)
42 |             # user_input = 'What\'s the difference between garbanzo beans and chickpeas? '
43 |             print(user_input)
44 |             # 将用户输入发送给OpenAI API，并返回结果
45 |             response = generate_response(user_input)
46 |             # 打印OpenAI API返回的结果
47 |             print(response_prefix + response)
48 |         except KeyboardInterrupt:
49 |             # 如果用户按下Ctrl-C，则退出程序
50 |             print("\n再见!")
51 |             break
52 | 
53 | 
54 | # 运行程序
55 | if __name__ == "__main__":
56 |     prompt_user()
57 | 


--------------------------------------------------------------------------------
/seg_lib/carvekit/carvekit_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/6/27
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | ref https://github.com/OPHoperHPO/image-background-remove-tool
 7 | """
 8 | import os
 9 | import numpy as np
10 | from PIL import Image, ImageOps
11 | from carvekit.web.schemas.config import MLConfig
12 | from carvekit.web.utils.init_utils import init_interface
13 | 
14 | SHOW_FULLSIZE = False  # param {type:"boolean"}
15 | PREPROCESSING_METHOD = "none"  # param ["stub", "none"]
16 | SEGMENTATION_NETWORK = "tracer_b7"  # param ["u2net", "deeplabv3", "basnet", "tracer_b7"]
17 | POSTPROCESSING_METHOD = "fba"  # param ["fba", "none"]
18 | SEGMENTATION_MASK_SIZE = 640  # param ["640", "320"] {type:"raw", allow-input: true}
19 | TRIMAP_DILATION = 30  # param {type:"integer"}
20 | TRIMAP_EROSION = 5  # param {type:"integer"}
21 | DEVICE = 'cuda'  # 'cuda' 'cpu'
22 | 
23 | config = MLConfig(segmentation_network=SEGMENTATION_NETWORK,
24 |                   preprocessing_method=PREPROCESSING_METHOD,
25 |                   postprocessing_method=POSTPROCESSING_METHOD,
26 |                   seg_mask_size=SEGMENTATION_MASK_SIZE,
27 |                   trimap_dilation=TRIMAP_DILATION,
28 |                   trimap_erosion=TRIMAP_EROSION,
29 |                   device=DEVICE)
30 | 
31 | interface = init_interface(config)
32 | 
33 | imgs = ['']
34 | 
35 | images = interface(imgs)
36 | for i, im in enumerate(images):
37 |     img = np.array(im)
38 |     img = img[..., :3]  # no transparency
39 |     idx = (img[..., 0] == 130) & (img[..., 1] == 130) & (img[..., 2] == 130)  # background 0 or 130, just try it
40 |     img = np.ones(idx.shape) * 255
41 |     img[idx] = 0
42 |     im = Image.fromarray(np.uint8(img), 'L')
43 |     im.save(f'./{imgs[i].split("/")[-1].split(".")[0]}.jpg')
44 | 


--------------------------------------------------------------------------------
/gpt_lib/models/llm_base.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/12
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import torch
 6 | from transformers import GenerationConfig
 7 | 
 8 | 
 9 | class LLM:
10 |     def __init__(self, model_info, load_in_8bit, device_map):
11 |         self.model_path = model_info['model_path']
12 |         if 'self.config' not in locals():
13 |             self.config = \
14 |                 model_info['config'].from_pretrained(self.model_path, return_unused_kwargs=True,
15 |                                                      trust_remote_code=True)[0]
16 |         self.model = model_info['model'].from_pretrained(self.model_path, trust_remote_code=True,
17 |                                                          load_in_8bit=load_in_8bit, device_map=device_map())
18 |         self.tokenizer = model_info['tokenizer'].from_pretrained(self.model_path, trust_remote_code=True)
19 |         self.prompt_template = model_info['prompt_template']
20 | 
21 |     def generate(self, *args, **kwargs):
22 |         pass
23 | 
24 |     def generate_prompt(self, prompt_in):
25 |         return self.prompt_template.format(instruction=prompt_in)
26 | 
27 |     def generate_base(self, prompt, generation_config=None, **kwargs, ):
28 |         prompt_format = self.generate_prompt(prompt)
29 |         inputs = self.tokenizer(prompt_format, return_tensors="pt")
30 | 
31 |         with torch.no_grad():
32 |             generation_output = self.model.generate(
33 |                 input_ids=inputs["input_ids"].cuda(),
34 |                 generation_config=generation_config,
35 |                 return_dict_in_generate=True,
36 |                 output_scores=True,
37 |             )
38 |         s = generation_output.sequences[0]
39 |         output = self.tokenizer.decode(s)
40 |         return output.split("### Response:")[1].strip()
41 | 


--------------------------------------------------------------------------------
/mocap_lib/bbox_tracking/bbox_tracking.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/8/15
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from cv2box import CVBbox
 7 | import numpy as np
 8 | 
 9 | 
10 | class BboxTracking:
11 |     def __init__(self, image_shape_, batch=1):
12 |         """
13 |         Args:
14 |             image_shape_: (W,H)
15 |         """
16 |         self.image_shape_ = image_shape_
17 |         self.batch_ = batch
18 |         self.last_bbox_array = None
19 | 
20 |     def reset_condition(self, area_limit):
21 |         """
22 |         judge bbox area by px^2
23 |         """
24 |         if self.last_bbox_array is None:
25 |             return self.last_bbox_array
26 |         else:
27 |             bbox_area = CVBbox(self.last_bbox_array).area()
28 |             reset_index = np.where(bbox_area < area_limit)
29 |             self.last_bbox_array[reset_index] = np.array([0, 0, self.image_shape_[0], self.image_shape_[1]])
30 |             return self.last_bbox_array
31 | 
32 |     def forward(self, keypoints_batch=None, margin=0.1, area_limit=1000):
33 |         """
34 |         Args:
35 |             keypoints_batch: N_view*N_kp*N_axis [N*3, N*3, ...]
36 |             margin:
37 |             area_limit:
38 |         Returns:
39 |         """
40 | 
41 |         if not keypoints_batch:
42 |             # part_w = int(self.image_shape_[0] * 1 / 3)
43 |             # return [part_w, 0, 2 * part_w, self.image_shape_[1]]
44 |             return np.array([[0, 0, self.image_shape_[0], self.image_shape_[1]]]).repeat(self.batch_, axis=0)
45 |         else:
46 |             self.last_bbox_array = np.array(
47 |                 [CVBbox(None).get_bbox_from_points(keypoints[:, :2], self.image_shape_, margin_ratio=margin) for
48 |                  keypoints in keypoints_batch])
49 |             return self.reset_condition(area_limit)
50 | 


--------------------------------------------------------------------------------
/gpt_lib/textsplitter/chinese_text_splitter.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/9
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | ref https://www.modelscope.cn/models/damo/nlp_bert_document-segmentation_chinese-base/summary
 7 | """
 8 | from langchain.text_splitter import CharacterTextSplitter
 9 | import re
10 | from typing import List
11 | from modelscope.pipelines import pipeline
12 | 
13 | p = pipeline(
14 |     task="document-segmentation",
15 |     model='damo/nlp_bert_document-segmentation_chinese-base',
16 |     device="cuda")
17 | 
18 | 
19 | class ChineseTextSplitter(CharacterTextSplitter):
20 |     def __init__(self, pdf: bool = False, **kwargs):
21 |         super().__init__(**kwargs)
22 |         self.pdf = pdf
23 | 
24 |     def split_text(self, text: str, use_document_segmentation: bool = False) -> List[str]:
25 |         # use_document_segmentation参数指定是否用语义切分文档，此处采取的文档语义分割模型为达摩院开源的nlp_bert_document-segmentation_chinese-base，论文见https://arxiv.org/abs/2107.09278
26 |         # 如果使用模型进行文档语义切分，那么需要安装modelscope[nlp]：pip install "modelscope[nlp]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
27 |         if self.pdf:
28 |             text = re.sub(r"\n{3,}", "\n", text)
29 |             text = re.sub('\s', ' ', text)
30 |             text = text.replace("\n\n", "")
31 |         if use_document_segmentation:
32 |             result = p(documents=text)
33 |             sent_list = [i for i in result["text"].split("\n\t") if i]
34 |         else:
35 |             sent_sep_pattern = re.compile('([﹒﹔﹖﹗．。！？]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))')  # del ：；
36 |             sent_list = []
37 |             for ele in sent_sep_pattern.split(text):
38 |                 if sent_sep_pattern.match(ele) and sent_list:
39 |                     sent_list[-1] += ele
40 |                 elif ele:
41 |                     sent_list.append(ele)
42 |         return sent_list
43 | 


--------------------------------------------------------------------------------
/art_lib/inpainting/lama.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/7/28
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from cv2box import CVImage, MyFpsCounter
 6 | from apstone import ModelBase
 7 | import numpy as np
 8 | 
 9 | MODEL_ZOO = {
10 |     # ref https://github.com/Sanster/lama-cleaner/blob/main/lama_cleaner/model/lama.py
11 |     # pytorch do not support ifft op: https://github.com/pytorch/pytorch/issues/81075
12 |     # input : RGB 0-1 (1,3,H,W)  (1,1,H,W)
13 |     'big_lama': {
14 |         'model_path': 'pretrain_models/art_lib/inpainting/big-lama.tjm'
15 |     },
16 | }
17 | 
18 | 
19 | class LAMA(ModelBase):
20 |     def __init__(self, model_type='big_lama', provider='gpu'):
21 |         super().__init__(MODEL_ZOO[model_type], provider)
22 |         self.model_type = model_type
23 | 
24 |     def forward(self, image_, mask_):
25 |         """
26 |         Args:
27 |             image_: CVImage acceptable class (path BGR tensor byte PIL etc.)
28 |             mask_: [H, W]
29 |         Returns: [H, W, C] BGR
30 |         """
31 | 
32 |         image_in = CVImage(CVImage(image_).rgb()).tensor()
33 |         mask_ = CVImage(CVImage(mask_).mask(rgb=True)).tensor()
34 |         mask_in = (mask_ > 0) * 1
35 |         inpainted_image_ = self.model.forward([image_in, mask_in])
36 |         inpainted_image_ = inpainted_image_[0].permute(1, 2, 0).detach().cpu().numpy()
37 |         inpainted_image_ = np.clip(inpainted_image_ * 255, 0, 255).astype("uint8")
38 |         inpainted_image_ = CVImage(inpainted_image_).rgb()  # rgb2bgr
39 |         return inpainted_image_
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     image_p = 'resources/inpainting/dog_chair.png'
44 |     mask_p = 'resources/inpainting/dog_chair_mask.png'
45 | 
46 |     fb_cur = LAMA(model_type='big_lama', provider='gpu')
47 |     inpaint_result = fb_cur.forward(image_p, mask_p)
48 |     print(inpaint_result.shape)
49 |     CVImage(inpaint_result).show()
50 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detect_and_estimate.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/3/3
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from hand_lib.hand_detector import HandDetectorYolox  # MediapipeHand, IKModel, ThirdViewDetector
 7 | from cv2box import CVVideoLoader, CVImage, CVFile, MyFpsCounter
 8 | from tqdm import tqdm
 9 | import cv2
10 | import numpy as np
11 | 
12 | if __name__ == '__main__':
13 |     video_p = ''
14 | 
15 |     hdy = HandDetectorYolox(thres=0.3)
16 |     # hdd2 = ThirdViewDetector()
17 |     # mph = MediapipeHand()
18 |     # ikm = IKModel()
19 | 
20 |     with CVVideoLoader(video_p) as cvvl:
21 |         for i in tqdm(range(len(cvvl))):
22 |             _, frame = cvvl.get()
23 |             # CVImage(frame).show()
24 |             bboxs, show_image = hdy.forward(frame, show=True)
25 |             # hdd2_results, show_image = hdd2.forward(frame, show=True)
26 |             # CVImage(frame).show()
27 |             # bboxs = np.array(hdd2_results["instances"].pred_boxes.tensor.to('cpu'))
28 | 
29 |             if len(bboxs) != 2:
30 |                 print(i)
31 | 
32 |             # frame_info = {'left': None, 'right': None}
33 |             # for bbox_ in bboxs:
34 |             #     # print(bbox_)
35 |             #     frame_crop = crop_padding_and_resize(frame, bbox_)
36 |             #     # CVImage(frame_crop).show()
37 |             #     hand_xyz, side_label, side_label_score = mph.forward(frame_crop)
38 |             #     # print(hand_xyz, side_label)
39 |             #     if hand_xyz is None:
40 |             #         continue
41 |             #     theta = ikm.forward_np(np.array(hand_xyz))
42 |             #     # print(theta)
43 |             #
44 |             #     if side_label == 'Left':
45 |             #         frame_info['left'] = theta
46 |             #     elif side_label == 'Right':
47 |             #         frame_info['right'] = theta
48 |             # CVFile('./yolox_mp_thres0_world_xyz/frame{}.pkl'.format(i)).pickle_write(frame_info)
49 | 


--------------------------------------------------------------------------------
/sd_lib/tagger/tagger_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/10/25
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from apstone import ModelBase
 7 | from cv2box import CVImage, CVFile
 8 | import numpy as np
 9 | 
10 | MODEL_ZOO = {
11 |     # input_name:['input_1:0'], shape:[[1, 448, 448, 3]]
12 |     # output_name:['predictions_sigmoid'], shape:[[1, 9083]]
13 |     'moat': {
14 |         'model_path': 'sd_models/tagger/SmilingWolf_wd-v1-4-moat-tagger-v2.onnx',
15 |         'tag_path': 'sd_models/tagger/selected_tags.csv',
16 |     },
17 | }
18 | 
19 | del_list = ['no_humans', 'english_text', 'monochrome', 'greyscale', 'blurry', 'solo', 'horse']
20 | 
21 | 
22 | class Tagger(ModelBase):
23 |     def __init__(self, model_name='moat', provider='gpu'):
24 |         super().__init__(MODEL_ZOO[model_name], provider)
25 |         self.input_size = (448, 448)
26 |         self.tags = CVFile(MODEL_ZOO[model_name]['tag_path']).data
27 |         # print(self.tag_data)
28 | 
29 |     def forward(self, image_in_):
30 |         image_in_ = CVImage(image_in_).resize(self.input_size).bgr
31 |         image_in_ = image_in_[None, :].astype(np.float32)
32 |         outputs = self.model.forward(image_in_)[0]
33 |         outputs = 1 / (1 + np.exp(-outputs))
34 |         tags = {tag: float(conf) for tag, conf in zip(self.tags['name'][4:], outputs.flatten()[4:]) if
35 |                 float(conf) > 0.6}
36 |         tags = sorted(tags.items(), key=lambda x: x[1], reverse=True)
37 |         tags = [tag[0] for tag in tags]
38 | 
39 |         # for tag in tags:
40 |         #     if tag in del_list or tag.find('background') > 0:
41 |         #         tags.remove(tag)
42 | 
43 |         return ','.join(tags)
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     image_p = 'resources/for_sd/An_astronaut_is_riding_a_horse_on_Mars_seed-444264997.png'
48 |     image_in = CVImage(image_p).bgr
49 | 
50 |     tagger = Tagger(model_name='moat')
51 | 
52 |     output = tagger.forward(image_in)
53 |     print(output)
54 | 


--------------------------------------------------------------------------------
/mocap_lib/calibration/calibration_w_human.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/9/5
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | using human keypoints instead of charuco boards to do calibration
 7 | """
 8 | import numpy as np
 9 | from cv2box import CVFile, CVCamera
10 | import cv2
11 | import aniposelib
12 | 
13 | human_body_height = 1.82 - 0.25
14 | side_bias = 1 / 15 * human_body_height
15 | 
16 | used_kps_3d = np.array(
17 |     [[0, -human_body_height, 0], [-side_bias, -1 / 2 * human_body_height, 0],
18 |      [side_bias, -1 / 2 * human_body_height, 0], [-side_bias, -1 / 4 * human_body_height, 0],
19 |      [side_bias, -1 / 4 * human_body_height, 0], [-side_bias, 0, 0], [side_bias, 0, 0], ])
20 | 
21 | frame = 120
22 | all_kps = []
23 | used_kps_index = [1, 9, 12, 10, 13, 11, 14]
24 | cameras = []
25 | cvc = CVCamera(
26 |     multical_pkl_path='./0809cal/front_4_0809_window_1080.pkl')
27 | for camera_name in ['268', '617', '728', '886']:
28 |     kp_p = './0906_pm/stand1/{}_2dkp.pkl'.format(camera_name)
29 |     kps = CVFile(kp_p).data[frame][used_kps_index, :2]
30 | 
31 |     kp_p_2 = './0906_pm/walk2/{}_2dkp.pkl'.format(
32 |         camera_name)
33 |     all_kps.append(CVFile(kp_p_2).data)
34 | 
35 |     results = cv2.solvePnP(used_kps_3d, kps, cvc.intri_matrix()[camera_name], cvc.dist()[camera_name])
36 | 
37 |     camera = aniposelib.cameras.Camera(name=camera_name,
38 |                                        size=cvc.image_size()[camera_name],
39 |                                        matrix=cvc.intri_matrix()[camera_name],
40 |                                        rvec=results[1],
41 |                                        tvec=results[2],
42 |                                        dist=cvc.dist()[camera_name])
43 |     cameras.append(camera)
44 | 
45 | cvc_empty = CVCamera()
46 | cvc_empty.camera_group = aniposelib.cameras.CameraGroup(cameras)
47 | final_camera_group = cvc_empty.bundle_adjust_iter(np.array(all_kps))
48 | CVFile('./0906_pm/cgroup_from_human.pkl').pickle_write(final_camera_group)
49 | 


--------------------------------------------------------------------------------
/data_lib/dataset_convert/eric_yolov3_2_coco-mmdet.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/9/1
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | https://gitcode.net/EricLee/yolo_v3
 7 | coco hand
 8 | some error may exists, use 'yolo2coco.py' instead
 9 | """
10 | from cv2box import get_path_by_ext, CVFile, CVImage
11 | from tqdm import tqdm
12 | 
13 | image_p = ''
14 | labels_p = ''
15 | coco_temp = ''
16 | 
17 | coco_out = CVFile(coco_temp).data
18 | 
19 | coco_out['images'] = []
20 | coco_out['annotations'] = []
21 | count = 10010
22 | count2 = 100101
23 | for image_p in tqdm(get_path_by_ext(image_p)):
24 |     file_name = str(image_p.stem + image_p.suffix)
25 |     image_p = str(image_p)
26 |     height, width = CVImage(image_p).bgr.shape[0:2]
27 |     image_label_p = image_p.replace('/images', '/labels').replace('.jpg', '.txt')
28 |     # print(image_label_p)
29 |     labels = CVFile(image_label_p).data
30 |     # print(labels)
31 |     coco_out['images'].append({
32 |         'id': count,
33 |         'path': image_p[68:],
34 |         'width': width,
35 |         'height': height,
36 |         'file_name': file_name,
37 |     })
38 |     for i in range(len(labels)):
39 |         coco_out['annotations'].append({
40 |             'image_id': count,
41 |             'id': count2,
42 |             'category_id': 0,
43 |             'bbox': [
44 |                 int(float(str(labels[i]).split(' ')[1]) * width - float(str(labels[i]).split(' ')[3]) * width * 0.5),
45 |                 int(float(str(labels[i]).split(' ')[2]) * height - float(str(labels[i]).split(' ')[4][:-5]) * height * 0.5),
46 |                 int(float(str(labels[i]).split(' ')[3]) * width),
47 |                 int(float(str(labels[i]).split(' ')[4][:-5]) * height)],
48 |             'iscrowd': False,
49 |             'isbbox': True,
50 |             'area': int(float(str(labels[i]).split(' ')[3]) * width) * int(float(str(labels[i]).split(' ')[4][:-5]) * height)
51 |         })
52 |         count2 += 1
53 | 
54 |     count += 1
55 | 
56 | CVFile(coco_temp.replace('.json', '_out.json')).json_write(coco_out)
57 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/kp_detector_mmpose.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/8/17
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | https://modelscope.cn/models/damo/cv_hrnetv2w32_body-2d-keypoints_image/summary
 7 | """
 8 | from cv2box import CVImage, MyFpsCounter
 9 | from apstone import KpDetectorBase
10 | import cv2
11 | import numpy as np
12 | 
13 | MODEL_ZOO = {
14 |     # input_name:['input_1'], shape:[[1, 3, 128, 128]]
15 |     # output_name:['output1'], shape:[[1, 15, 32, 32]]
16 |     'hrnetv2w32': {
17 |         'model_path': 'pretrain_models/body_lib/body_kp_detector/modelscope_hrnetv2w32.onnx',
18 |         'model_input_size': (128, 128)
19 |     },  # w h
20 | }
21 | 
22 | class BodyDetectorModelScope(KpDetectorBase):
23 |     def __init__(self, model_type='r50', provider='gpu'):
24 |         super().__init__(MODEL_ZOO[model_type], provider)
25 |         self.dark_flag = model_type.find('dark') > 0
26 | 
27 |     def forward(self, image_in_, bbox_, show=False, mirror_test=False):
28 |         if len(bbox_) == 0:
29 |             return [[0, 0, 0]] * 133
30 | 
31 |         model_results = self.model.forward(self.preprocess(image_in_, bbox_))
32 | 
33 |         kp_results = self.post_process_default(model_results[0], self.ratio, self.left, self.top)
34 | 
35 |         if show:
36 |             self.show(image_in_, kp_results)
37 | 
38 |         return kp_results
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     image_path = 'resources/for_pose/t_pose_1080p.jpeg'
43 |     image_in = CVImage(image_path).bgr
44 |     bbox = [493, 75, 1427, 1044]
45 | 
46 |     bwd = BodyDetectorModelScope(model_type='hrnetv2w32', provider='gpu')
47 |     kps = bwd.forward(image_in, bbox, show=True, mirror_test=False)
48 |     # print(kps)
49 | 
50 |     # with MyFpsCounter('model forward 10 times fps: ') as mfc:
51 |     #     for i in range(10):
52 |     #         kps = bwd.forward(image_in, bbox)
53 | 
54 |     # # for video
55 |     # from cv2box import CVVideoLoader
56 |     # from tqdm import tqdm
57 |     #
58 |     # with CVVideoLoader('') as cvvl:
59 |     #     for _ in tqdm(range(len(cvvl))):
60 |     #         _, frame = cvvl.get()
61 |     #         kps = bwd.forward(image_in, bbox, show=True, mirror_test=False)
62 | 


--------------------------------------------------------------------------------
/art_lib/optical_flow_estimate/raft/raft_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/8/21
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | import numpy as np
 7 | from art_lib.optical_flow_estimate.raft.utils import flow_to_image
 8 | from cv2box import CVImage
 9 | from apstone import ModelBase
10 | 
11 | MODEL_ZOO = {
12 |     # https://github.com/ibaiGorordo/ONNX-RAFT-Optical-Flow-Estimation
13 |     # 0-255 RGB
14 |     # input_name:['0', '1'], shape:[[1, 3, 480, 640], [1, 3, 480, 640]]
15 |     # output_name:['23437', '23436'], shape:[[1, 2, 60, 80], [1, 2, 480, 640]]
16 |     'raft_kitti_iter20_480x640': {
17 |         'model_path': 'pretrain_models/art_lib/optical_flow_estimate/raft/iter20/raft_kitti_iter20_480x640.onnx'
18 |     },
19 | }
20 | 
21 | 
22 | class Raft(ModelBase):
23 | 
24 |     def __init__(self, model_name='raft_kitti_iter20_480x640', provider='gpu'):
25 |         super(Raft, self).__init__(MODEL_ZOO[model_name], provider)
26 |         self.input_width = 640
27 |         self.input_height = 480
28 |         self.mean = 0
29 |         self.std = 1
30 | 
31 |     def forward(self, img1_, img2_):
32 |         img_width, img_height = CVImage(img1_).bgr.shape[:-1][::-1]
33 |         img1_input = CVImage(img1_).blob((self.input_width, self.input_height), self.mean, self.std, rgb=True)
34 |         img2_input = CVImage(img2_).blob((self.input_width, self.input_height), self.mean, self.std, rgb=True)
35 |         outputs = self.model.forward([img1_input, img2_input])
36 |         outputs = outputs[1][0].transpose(1, 2, 0)
37 |         # draw
38 |         flow_img_ = flow_to_image(outputs)
39 |         flow_img_ = CVImage(flow_img_).resize((img_width, img_height)).rgb()
40 |         return flow_img_
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     # Initialize model
45 |     model_name_ = 'raft_kitti_iter20_480x640'
46 |     raft = Raft(model_name_)
47 | 
48 |     # Read inference image
49 |     img1 = CVImage("resources/for_optical_flow/frame_0016.png").rgb()
50 |     img2 = CVImage("resources/for_optical_flow/frame_0025.png").rgb()
51 | 
52 |     # Estimate flow and colorize it
53 |     flow_map = raft.forward(img1, img2)
54 |     combined_img = np.hstack((img1, img2, flow_map))
55 | 
56 |     CVImage(combined_img).show(0, "Estimated flow")
57 | 
58 | 


--------------------------------------------------------------------------------
/gpt_lib/langchain/model_config.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/9
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import torch.cuda
 6 | import torch.backends
 7 | import os
 8 | 
 9 | embedding_model_dict = {
10 |     "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
11 |     "ernie-base": "nghuyong/ernie-3.0-base-zh",
12 |     "text2vec-base": "shibing624/text2vec-base-chinese",
13 |     "text2vec": "GanymedeNil/text2vec-large-chinese",
14 | }
15 | 
16 | # Embedding model name
17 | EMBEDDING_MODEL = "text2vec"
18 | 
19 | # Embedding running device
20 | EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
21 | 
22 | # supported LLM models
23 | llm_model_dict = {
24 |     "chatyuan": "ClueAI/ChatYuan-large-v2",
25 |     "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe",
26 |     "chatglm-6b-int4": "THUDM/chatglm-6b-int4",
27 |     "chatglm-6b-int8": "THUDM/chatglm-6b-int8",
28 |     "chatglm-6b": "/mnt/ljt/models/hugging_face/chatglm-6b",
29 | }
30 | 
31 | # LLM model name
32 | LLM_MODEL = "chatglm-6b"
33 | 
34 | # LLM lora path，默认为空，如果有请直接指定文件夹路径
35 | LLM_LORA_PATH = ""
36 | USE_LORA = True if LLM_LORA_PATH else False
37 | 
38 | # LLM streaming reponse
39 | STREAMING = True
40 | 
41 | # Use p-tuning-v2 PrefixEncoder
42 | USE_PTUNING_V2 = False
43 | 
44 | # LLM running device
45 | LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
46 | 
47 | VS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "vector_store")
48 | 
49 | UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content")
50 | 
51 | # 基于上下文的prompt模版，请务必保留"{question}"和"{context}"
52 | PROMPT_TEMPLATE = """已知信息：
53 | {context}
54 | 
55 | 根据上述已知信息，简洁和专业的来回答用户的问题。如果无法从中得到答案，请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”，不允许在答案中添加编造成分，答案请使用中文。 问题是：{question}"""
56 | 
57 | # PROMPT_TEMPLATE = """你是刘润，以刘润的语气回答问题，刘润是一个成功学大师，善于商业分析，喜欢使用商业术语解释问题，以下是他说过的一些话，尽量模仿这个口吻和说话方式：
58 | # {context}
59 | # 问题是：{question}"""
60 | 
61 | # 匹配后单段上下文长度
62 | CHUNK_SIZE = 250
63 | 
64 | # LLM input history length
65 | LLM_HISTORY_LEN = 3
66 | 
67 | # return top-k text chunk from vector store
68 | VECTOR_SEARCH_TOP_K = 5
69 | 
70 | NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")
71 | 


--------------------------------------------------------------------------------
/art_lib/style_transfer/dct_net/dct_net.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/1/17
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import cv2
 6 | import numpy as np
 7 | from apstone import ModelBase
 8 | from cv2box import CVImage
 9 | 
10 | from art_lib.style_transfer.dct_net.utils import resize_size, padTo16x
11 | """
12 | ref https://www.modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models/summary
13 | """
14 | 
15 | MODEL_ZOO = {
16 |     '3d': {
17 |         'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/3d_h.onnx',
18 |         'input_dynamic_shape': (720, 720, 3),
19 |     },
20 |     'anime': {
21 |         'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/anime_h.onnx',
22 |         'input_dynamic_shape': (720, 720, 3),
23 |     },
24 |     'artstyle': {
25 |         'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/artstyle_h.onnx',
26 |         'input_dynamic_shape': (720, 720, 3),
27 |     },
28 |     'handdrawn': {
29 |         'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/handdrawn_h.onnx',
30 |         'input_dynamic_shape': (720, 720, 3),
31 |     },
32 |     'sketch': {
33 |         'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/sketch_h.onnx',
34 |         'input_dynamic_shape': (720, 720, 3),
35 |     },
36 | }
37 | 
38 | 
39 | class DCTNet(ModelBase):
40 |     def __init__(self, model_type='anime', provider='cpu'):
41 |         super().__init__(MODEL_ZOO[model_type], provider)
42 |         self.model_type = model_type
43 | 
44 |     def forward(self, img_in):
45 |         # img: BGR input
46 |         img_bgr = CVImage(img_in).bgr
47 |         ori_h, ori_w, _ = img_bgr.shape
48 |         img_bgr = resize_size(img_bgr, size=720).astype(np.float32)
49 |         pad_bg, pad_h, pad_w = padTo16x(img_bgr)
50 |         pad_bg = pad_bg.astype(np.float32)
51 |         bg_res = self.model.forward(pad_bg)[0]
52 |         res = bg_res[:pad_h, :pad_w, :]
53 | 
54 |         res = cv2.resize(res, (ori_w, ori_h), interpolation=cv2.INTER_AREA)
55 |         res = np.clip(res, 0, 255).astype(np.uint8)
56 |         return res
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     image_p = 'resources/test3.jpg'
61 |     dct = DCTNet(model_type='3d')
62 |     out_img = dct.forward(image_p)
63 |     CVImage(out_img).show()
64 | 


--------------------------------------------------------------------------------
/mocap_lib/get_body_bbox_kps.py:
--------------------------------------------------------------------------------
 1 | from body_lib import BodyBboxDetector
 2 | from mocap_lib import BodyWholebodyDetector
 3 | from mocap_lib.skeleton_transfer.cocowholebody_2_openpose import cocowb_2_openpose
 4 | from cv2box import CVImage, CVFile
 5 | from cv2box.cv_gears import CVVideoThread, Consumer, Linker, Queue
 6 | import numpy as np
 7 | 
 8 | 
 9 | class BodyBboxThread(Linker):
10 |     def __init__(self, queue_list: list, fps_counter):
11 |         super().__init__(queue_list, fps_counter=fps_counter)
12 | 
13 |         self.bbd = BodyBboxDetector(model='yolox_tiny_trt16', threshold=0.5, provider='gpu')
14 | 
15 |     def forward_func(self, something_in):
16 |         # do your work here.
17 |         image_in = something_in
18 |         something_out = [image_in, self.bbd.forward(image_in, max_bbox_num=1)[0]]
19 |         return something_out
20 | 
21 | 
22 | class BodyKpThread(Consumer):
23 |     def __init__(self, queue_list: list, out_pkl_path_, fps_counter):
24 |         super().__init__(queue_list, fps_counter=fps_counter)
25 |         # add init here
26 |         self.bwd = BodyWholebodyDetector(model_type='hrnet_w48_384_dark', provider='gpu')
27 |         self.out_pkl_path = out_pkl_path_
28 |         self.kp_list = []
29 | 
30 |     def exit_func(self):
31 |         super(BodyKpThread, self).exit_func()
32 |         if self.out_pkl_path:
33 |             CVFile(self.out_pkl_path).pickle_write(np.array(self.kp_list))
34 | 
35 |     def forward_func(self, something_in):
36 |         # do your work here.
37 |         something_out = self.bwd.forward(something_in[0], something_in[1], show=False, mirror_test=False)
38 | 
39 |         left_hand_kps, right_hand_kps, openpose_25_kps = cocowb_2_openpose(something_out)
40 |         whole_kps = np.concatenate((openpose_25_kps, left_hand_kps, right_hand_kps), 0)
41 |         if self.out_pkl_path:
42 |             self.kp_list.append(whole_kps)
43 |         # print(whole_kps.shape)
44 |         # print(right_hand_kps)
45 |         # print(openpose_25_kps)
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     # 4K 67 fps
50 |     video_p = ''
51 |     out_pkl_path = None
52 | 
53 |     q1 = Queue(5)
54 |     q2 = Queue(5)
55 |     c1 = CVVideoThread(video_p, [q1], fps_counter=False)
56 |     b1 = BodyBboxThread([q1, q2], fps_counter=True)
57 |     b2 = BodyKpThread([q2], out_pkl_path, fps_counter=True)
58 |     c1.start()
59 |     b1.start()
60 |     b2.start()
61 | 


--------------------------------------------------------------------------------
/mocap_lib/triangulate/anipose_triangulate.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/7/15
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from cv2box import CVFile, CVCamera
 7 | import numpy as np
 8 | 
 9 | from mocap_lib.skeleton_transfer.bone_links import BONE_CONFIG
10 | 
11 | 
12 | class AniposeTriangulate:
13 |     def __init__(self, pkl_path_, pkl_mode_='anipose'):
14 |         if pkl_mode_ == 'multical':
15 |             self.c_group = CVCamera(pkl_path_).load_camera_group()
16 |         elif pkl_mode_ == 'anipose':
17 |             self.c_group = CVFile(pkl_path_).data
18 | 
19 |     def triangulate(self, multi_view_kps):
20 |         """
21 | 
22 |         Args:
23 |             multi_view_kps: N_view * N_kps * 3
24 | 
25 |         Returns:
26 |             kps_3d: N_kps * 3
27 | 
28 |         """
29 |         # multi_view_kps = multi_view_kps[:, :, 0:2]
30 | 
31 |         # # default total 60fps
32 |         # kps_3d = self.c_group.triangulate(multi_view_kps, undistort=True, progress=False)
33 | 
34 |         # # ransac slow total 10fps
35 |         # kps_3d = self.c_group.triangulate_ransac(multi_view_kps, progress=False)[0]
36 | 
37 |         # # offline  shape CxNxJx2
38 |         nframes = multi_view_kps.shape[0]
39 |         kps_3d = self.c_group.triangulate_optim(multi_view_kps,
40 |                                                 constraints=BONE_CONFIG['openpose_bodyhand67']['kintree'],
41 |                                                 verbose=True, init_progress=True)
42 | 
43 |         return kps_3d
44 | 
45 |     def project(self, kps_3d_):
46 |         """
47 | 
48 |         Args:
49 |             kps_3d_: N_kps * 3
50 | 
51 |         Returns:
52 |             kps_2d: N_view * N_kps * 2
53 | 
54 |         """
55 |         kps_2d = self.c_group.project(kps_3d_)
56 |         return kps_2d
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     from cv2box.utils import get_path_by_ext
61 | 
62 |     # triangulate
63 |     at = AniposeTriangulate('cgroup.pkl')
64 |     kp2ds = []
65 |     for file in get_path_by_ext('', ['.pkl']):
66 |         # 坐标变换
67 |         kp2ds.append(np.load(file, allow_pickle=True))
68 |         # kp3ds.append(np.load(file, allow_pickle=True)[None])
69 |     kp2ds = np.array(kp2ds).transpose((1, 0, 2, 3))[:, 600:900, :, :2]
70 |     results = at.triangulate(kp2ds)
71 |     CVFile('3dkp_optim.pkl').pickle_write(results)
72 | 
73 | 


--------------------------------------------------------------------------------
/body_lib/body_bbox_detector/body_bbox_detector_mmdet.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/6/27
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from cv2box import CVImage, MyFpsCounter
 7 | from apstone.wrappers.mmlab_wrapper import BboxDetectorBase
 8 | 
 9 | MODEL_ZOO = {
10 |     # gpu 55fps
11 |     'yolox_tiny': {
12 |         'model_path': 'pretrain_models/body_lib/body_bbox_detector/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906_dynamic.onnx',
13 |         'model_input_size': (416, 416),
14 |     },
15 |     # 207fps
16 |     'yolox_tiny_trt16': {
17 |         'model_path': 'pretrain_models/body_lib/body_bbox_detector/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906_static.engine',
18 |         'model_input_size': (416, 416),
19 |     },
20 |     # gpu 49fps
21 |     'yolox_s': {
22 |         'model_path': 'pretrain_models/body_lib/body_bbox_detector/yolox_s_8x8_300e_coco_20211121_095711-4592a793_dynamic.onnx',
23 |         'model_input_size': (640, 640),
24 |     },
25 | }
26 | 
27 | 
28 | class BodyBboxDetector(BboxDetectorBase):
29 |     def __init__(self, model='yolox_tiny', threshold=0.5, provider='gpu'):
30 |         self.threshold = threshold
31 |         super().__init__(MODEL_ZOO[model], provider)
32 | 
33 |     def forward(self, image_in_, show=False, max_bbox_num=1):
34 |         """
35 |         Args:
36 |             image_in_:
37 |             show:
38 |             max_bbox_num:
39 |         Returns: N*4
40 |         """
41 |         model_results = self.model.forward(self.preprocess(image_in_))
42 |         results_after = self.postprocess(model_results, self.threshold, max_bbox_num=max_bbox_num)
43 |         if show:
44 |             self.show(image_in_, results_after)
45 |         return results_after
46 | 
47 | 
48 | if __name__ == '__main__':
49 | 
50 |     image_path = 'resources/for_pose/t_pose_1080p.jpeg'
51 |     image_in = CVImage(image_path).bgr
52 | 
53 |     # yolox_tiny yolox_s yolox_tiny_static_trt
54 |     bbd = BodyBboxDetector(model='yolox_s', provider='gpu')
55 | 
56 |     bboxes = bbd.forward(image_in, show=True, max_bbox_num=3)
57 | 
58 |     from cv2box import CVBbox
59 | 
60 |     bboxes = CVBbox(bboxes).area_center_filter(image_in.shape)
61 |     print(bboxes)
62 | 
63 |     with MyFpsCounter('model forward 10 times fps:') as mfc:
64 |         for i in range(10):
65 |             bboxes = bbd.forward(image_in, max_bbox_num=3)
66 | 


--------------------------------------------------------------------------------
/seg_lib/segformer_b2_clothes/segformer_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/7/19
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from apstone import ModelBase
 6 | import numpy as np
 7 | from cv2box import CVImage
 8 | from PIL import Image
 9 | 
10 | """
11 | 0-1 RGB
12 | input_name:['pixel_values'], shape:[['batch', 'num_channels', 'height', 'width']]
13 | output_name:['last_hidden_state'], shape:[['batch', 'sequence', 'Transposelast_hidden_state_dim_2', 'Transposelast_hidden_state_dim_3']]
14 | 
15 | background     0
16 | hat            1
17 | hair           2 
18 | sunglass       3
19 | upper-clothes  4
20 | skirt          5
21 | pants          6
22 | dress          7
23 | belt           8
24 | left-shoe      9
25 | right-shoe     10
26 | face           11
27 | left-leg       12
28 | right-leg      13
29 | left-arm       14
30 | right-arm      15
31 | bag            16
32 | scarf          17
33 | """
34 | 
35 | MODEL_ZOO = {
36 |     # https://huggingface.co/mattmdjaga/segformer_b2_clothes
37 |     'segformer_b2_clothes': {
38 |         'model_path': 'pretrain_models/seg_lib/segformer_clothes/segformer_b2_clothes.onnx'
39 |     },
40 | }
41 | 
42 | 
43 | class SegFormer(ModelBase):
44 |     def __init__(self, model_name='segformer_b2_clothes', provider='gpu'):
45 |         super(SegFormer, self).__init__(MODEL_ZOO[model_name], provider)
46 |         self.mean = [0.485, 0.456, 0.406]
47 |         self.std = [0.229, 0.224, 0.225]
48 |         self.input_size = (512, 512)
49 | 
50 |     def forward(self, image_in):
51 |         """
52 |         Args:
53 |             image_in: CVImage class H*W*C
54 |         Returns: h*w*1
55 |         """
56 |         input_size_ = CVImage(image_in).bgr.shape[:2]
57 |         input_image = CVImage(image_in).blob_innormal(self.input_size, input_mean=self.mean, input_std=self.std,
58 |                                                       rgb=True)
59 |         pred_mask = self.model.forward(input_image)[0]
60 |         pred_mask = np.transpose(pred_mask[0], (1, 2, 0))
61 |         pred_mask = CVImage(pred_mask).resize(input_size_[::-1]).bgr
62 |         pred_mask = pred_mask.argmax(axis=2)[..., np.newaxis]
63 |         return pred_mask.astype(np.int8)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     img_p = 'resources/for_pose/yoga2.jpg'
68 | 
69 |     sf = SegFormer(model_name='segformer_b2_clothes')
70 |     mask_img = sf.forward(img_p)
71 |     CVImage(mask_img).show()
72 |     print(mask_img.shape)
73 | 


--------------------------------------------------------------------------------
/gpt_lib/models/llama.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/12
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from transformers import LlamaForCausalLM, LlamaTokenizer
 6 | import torch
 7 | import os
 8 | import platform
 9 | from accelerate import init_empty_weights
10 | from accelerate.utils import get_balanced_memory, infer_auto_device_map
11 | from transformers import AutoConfig, GenerationConfig
12 | from transformers.dynamic_module_utils import get_class_from_dynamic_module
13 | from transformers.modeling_utils import no_init_weights
14 | from transformers.utils import ContextManagers
15 | 
16 | from gpt_lib.models.llm_base import LLM
17 | 
18 | MODEL_ZOO = {
19 |     'llama-7b': {
20 |         'model_path': '/mnt/ljt/models/hugging_face/llama-7b-hf',
21 |         'model': LlamaForCausalLM,
22 |         'tokenizer': LlamaTokenizer,
23 |         'config': AutoConfig,
24 |         'prompt_template': """Below is an instruction that describes a task. Write a response that appropriately completes the request.
25 |                     ### Instruction:
26 |                     {instruction}
27 |                     ### Response:""",
28 |     },
29 | }
30 | 
31 | 
32 | class LLAMA(LLM):
33 |     def __init__(self, model_name, load_in_8bit):
34 |         self.model_path = MODEL_ZOO[model_name]['model_path']
35 |         self.config = MODEL_ZOO[model_name]['config'].from_pretrained(self.model_path, return_unused_kwargs=True,
36 |                                                                       trust_remote_code=True)[0]
37 |         super().__init__(MODEL_ZOO[model_name], load_in_8bit, self.get_device_map)
38 | 
39 |     def get_device_map(self):
40 |         return 'auto'
41 | 
42 |     def generate(self, prompt):
43 |         generation_config = GenerationConfig(temperature=0.1,
44 |                                              top_p=0.75,
45 |                                              top_k=40,
46 |                                              num_beams=4,
47 |                                              max_new_tokens=512,
48 |                                              do_sample=True,
49 |                                              no_repeat_ngram_size=6,
50 |                                              repetition_penalty=1.8,
51 |                                              )
52 |         result = self.generate_base(prompt, generation_config)
53 |         return result
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     llama = LLAMA('llama-7b', load_in_8bit=True)
58 |     print(llama.generate('你好'))
59 | 


--------------------------------------------------------------------------------
/seg_lib/ppmattingv2/ppmattingv2_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/2/3
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from apstone import ModelBase
 6 | import cv2
 7 | import numpy as np
 8 | import copy
 9 | from cv2box import CVImage
10 | 
11 | MODEL_ZOO = {
12 |     # https://github.com/jiachen0212/pp_mattingv2
13 |     '384x480': {
14 |         'model_path': 'pretrain_models/seg_lib/ppmattingv2/ppmattingv2_stdc1_human_384x480.onnx'
15 |     },
16 | }
17 | 
18 | 
19 | class PPMattingV2(ModelBase):
20 |     def __init__(self, model_name='384x480', provider='gpu'):
21 |         super(PPMattingV2, self).__init__(MODEL_ZOO[model_name], provider)
22 |         self.conf_threshold = 0.65
23 | 
24 |     def prepare_input(self, image):
25 |         input_image = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), dsize=(self.input_width, self.input_height))
26 |         input_image = input_image.astype(np.float32) / 255.0
27 |         input_image = input_image.transpose(2, 0, 1)
28 |         input_image = np.expand_dims(input_image, axis=0)
29 |         return input_image
30 | 
31 |     def forward(self, image):
32 |         input_image = self.prepare_input(image)
33 | 
34 |         # Perform inference on the image
35 |         result = self.forward(input_image)
36 | 
37 |         # Post process:squeeze
38 |         segmentation_map = result[0]
39 |         segmentation_map = np.squeeze(segmentation_map)
40 | 
41 |         image_width, image_height = image.shape[1], image.shape[0]
42 |         dst_image = copy.deepcopy(image)
43 |         segmentation_map = cv2.resize(
44 |             segmentation_map,
45 |             dsize=(image_width, image_height),
46 |             interpolation=cv2.INTER_LINEAR,
47 |         )
48 | 
49 |         # color list
50 |         color_image_list = []
51 |         # ID 0:BackGround
52 |         bg_image = np.zeros(image.shape, dtype=np.uint8)
53 |         bg_image[:] = (0, 0, 0)
54 |         color_image_list.append(bg_image)
55 |         # ID 1:Human
56 |         bg_image = np.zeros(image.shape, dtype=np.uint8)
57 |         bg_image[:] = (0, 255, 0)
58 |         color_image_list.append(bg_image)
59 | 
60 |         mask = np.where(segmentation_map > self.conf_threshold, 0, 1)
61 |         mask = np.stack((mask,) * 3, axis=-1).astype('uint8')
62 |         mask_image = np.where(mask, dst_image, color_image_list[1])
63 |         dst_image = cv2.addWeighted(dst_image, 0.5, mask_image, 0.5, 1.0)
64 |         return dst_image
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     ppm = PPMattingV2()
69 |     img_p = ''
70 |     ppm.forward(CVImage(img_p).bgr)
71 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detector/hand_detector_d2/hand_detector_d2_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/2/23
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from detectron2.config import get_cfg
 6 | from detectron2.engine import DefaultPredictor
 7 | from detectron2.data import DatasetCatalog, MetadataCatalog
 8 | from detectron2.utils.visualizer import Visualizer
 9 | from cv2box import CVImage
10 | import numpy as np
11 | 
12 | class ThirdViewDetector:
13 |     """
14 |     Hand Detector for third-view input.(https://github.com/ddshan/hand_detector.d2)
15 |     """
16 | 
17 |     def __init__(self):
18 |         print("Loading Third View Hand Detector")
19 |         self.__load_hand_detector()
20 |         self.cfg = None
21 | 
22 |     def __load_hand_detector(self):
23 |         # load cfg and model
24 |         self.cfg = get_cfg()
25 |         self.cfg.merge_from_file("pretrain_models/digital_human/hand_detector_d2/faster_rcnn_X_101_32x8d_FPN_3x_100DOH.yaml")
26 |         self.cfg.MODEL.WEIGHTS = 'pretrain_models/digital_human/hand_detector_d2/model_0529999.pth'  # add model weight here
27 |         self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3  # 0.5 , use low thresh to increase recall
28 |         self.hand_detector = DefaultPredictor(self.cfg)
29 | 
30 |     def get_cfg(self):
31 |         return self.cfg
32 | 
33 |     def forward(self, img, show=False):
34 |         results = self.hand_detector(img)
35 |         final_image = None
36 |         if show:
37 |             v = Visualizer(img[:, :, ::-1], MetadataCatalog.get("100DOH_hand_trainval"), scale=1.2)
38 |             v = v.draw_instance_predictions(results["instances"].to("cpu"))
39 |             final_image = v.get_image()[:, :, ::-1]
40 |             CVImage(final_image).show(1)
41 |         return results, final_image
42 | 
43 |     def get_hand_bbox(self, img):
44 |         bbox_tensor = self.hand_detector(img)['instances'].pred_boxes
45 |         bboxes = bbox_tensor.tensor.cpu().numpy()
46 |         return bboxes
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     # data path
51 |     test_img = 'test_img/test1.jpg'
52 |     im = CVImage(test_img).bgr
53 | 
54 |     tvd = ThirdViewDetector()
55 |     outputs = tvd.forward(im)
56 | 
57 |     v = Visualizer(im[:, :, ::-1], MetadataCatalog.get("100DOH_hand_trainval"), scale=1.2)
58 |     v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
59 |     CVImage(v.get_image()[:, :, ::-1]).show()
60 | 
61 |     # print
62 |     print(outputs["instances"].pred_classes)
63 |     bboxs = np.array(outputs["instances"].pred_boxes.tensor.to('cpu'))
64 |     print(bboxs)
65 | 


--------------------------------------------------------------------------------
/mocap_lib/skeleton_transfer/keypoints_map.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2022/6/15
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | 
  6 | 
  7 | # Just informative (from, e.g., https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch)
  8 | COCO_JOINTS = {
  9 |     0: "Nose",
 10 |     1: "LEye",
 11 |     2: "REye",
 12 |     3: "LEar",
 13 |     4: "REar",
 14 |     5: "LShoulder",
 15 |     6: "RShoulder",
 16 |     7: "LElbow",
 17 |     8: "RElbow",
 18 |     9: "LWrist",
 19 |     10: "RWrist",
 20 |     11: "LHip",
 21 |     12: "RHip",
 22 |     13: "LKnee",
 23 |     14: "RKnee",
 24 |     15: "LAnkle",
 25 |     16: "RAnkle"
 26 |     # It has no neck, you can add it (pos 17) for drawing or for converting to openpose
 27 | }
 28 | 
 29 | COCO_WHOLE_BODY_JOINTS = {
 30 |     0: "Nose",
 31 |     1: "LEye",
 32 |     2: "REye",
 33 |     3: "LEar",
 34 |     4: "REar",
 35 |     5: "LShoulder",
 36 |     6: "RShoulder",
 37 |     7: "LElbow",
 38 |     8: "RElbow",
 39 |     9: "LWrist",
 40 |     10: "RWrist",
 41 |     11: "LHip",
 42 |     12: "RHip",
 43 |     13: "LKnee",
 44 |     14: "RKnee",
 45 |     15: "LAnkle",
 46 |     16: "RAnkle",
 47 |     17: "LBigToe",
 48 |     18: "LSmallToe",
 49 |     19: "LHeel",
 50 |     20: "RBigToe",
 51 |     21: "RSmallToe",
 52 |     22: "RHeel",
 53 |     # It has no neck, you can add it (pos 17) for drawing or for converting to openpose
 54 | }
 55 | 
 56 | HALPE_JOINTS = {
 57 |     0: "Nose",
 58 |     1: "LEye",
 59 |     2: "REye",
 60 |     3: "LEar",
 61 |     4: "REar",
 62 |     5: "LShoulder",
 63 |     6: "RShoulder",
 64 |     7: "LElbow",
 65 |     8: "RElbow",
 66 |     9: "LWrist",
 67 |     10: "RWrist",
 68 |     11: "LHip",
 69 |     12: "RHip",
 70 |     13: "LKnee",
 71 |     14: "Rknee",
 72 |     15: "LAnkle",
 73 |     16: "RAnkle",
 74 |     17: "Head",
 75 |     18: "Neck",
 76 |     19: "Hip",
 77 |     20: "LBigToe",
 78 |     21: "RBigToe",
 79 |     22: "LSmallToe",
 80 |     23: "RSmallToe",
 81 |     24: "LHeel",
 82 |     25: "RHeel",
 83 | }
 84 | 
 85 | POSE_BODY_25_BODY_PARTS = [
 86 |     "Nose",
 87 |     "Neck",
 88 |     "RShoulder",
 89 |     "RElbow",
 90 |     "RWrist",
 91 |     "LShoulder",
 92 |     "LElbow",
 93 |     "LWrist",
 94 |     "MidHip",
 95 |     "RHip",
 96 |     "RKnee",
 97 |     "RAnkle",
 98 |     "LHip",
 99 |     "LKnee",
100 |     "LAnkle",
101 |     "REye",
102 |     "LEye",
103 |     "REar",
104 |     "LEar",
105 |     "LBigToe",
106 |     "LSmallToe",
107 |     "LHeel",
108 |     "RBigToe",
109 |     "RSmallToe",
110 |     "RHeel",
111 |     "Background"
112 | ]
113 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # my ignore
132 | cache/
133 | pretrain_models/
134 | private_models/
135 | sd_models/
136 | skeleton_imgs/
137 | resources/
138 | 


--------------------------------------------------------------------------------
/SPEEDTABLE.md:
--------------------------------------------------------------------------------
 1 | ### Notes
 2 | 
 3 | - '-' means use last result from top
 4 | - empty means no test results
 5 | - onnx convert based on [mmdeploy](https://github.com/open-mmlab/mmdeploy)
 6 | - model infer fps does not contain pre/post-process time cost
 7 | - trt & trt16 based on onnxruntime tensorrt EP
 8 | - input array located on cpu (io-binding done by onnxruntime itself)
 9 | - more info check [onnx test codes]()
10 | 
11 | 
12 | 
13 | ### Environment
14 | 
15 | | Name | Attr                                      |
16 | | ---- | ----------------------------------------- |
17 | | Sys  | Ubuntu 20.04                              |
18 | | GPU  | NVIDIA GeForce RTX 3080  10G              |
19 | | CPU  | Intel® Core™ i9-10850K CPU @ 3.60GHz × 20 |
20 | | MEM  | 32G                                       |
21 | | Libs | onnxruntime-gpu=1.13                      |
22 | 
23 | 
24 | 
25 | ### Pose Detect
26 | 
27 | 
28 | 
29 | | MMPose                                                       | input shape      | size   | cpu infer fps | gpu infer fps | trt infer fps | trt16 infer fps |
30 | | ------------------------------------------------------------ | ---------------- | ------ | ------------- | ------------- | ------------- | --------------- |
31 | | [pvtv2-b2_static_coco](https://github.com/open-mmlab/mmpose/tree/master/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/pvtv2-b2_coco_256x192.py) | [1, 3, 256, 192] | 116.3m | 4.9           | 73            | 184           | 257             |
32 | |                                                              | [4, 3, 256, 192] | -      | 2.5           | 47            | 106           | 178             |
33 | |                                                              |                  |        |               |               |               |                 |
34 | | [hrnet_w48_dark+_dynamic_cocowholebody](https://github.com/open-mmlab/mmpose/tree/master/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py) | [4, 3, 384, 288] | 254m   | 2.9           | 31            | 39            | 83              |
35 | |                                                              |                  |        |               |               |               |                 |
36 | | **MMPose Post-process**                                      |                  |        |               |               |               |                 |
37 | | gaussian_blur_k17                                            | [4, 133, 96, 72] | 0.19m  | 7.9           | 119           | 147           | 142             |
38 | |                                                              |                  |        |               |               |               |                 |
39 | 
40 | 


--------------------------------------------------------------------------------
/sd_lib/ip_adapter/ip_adapter_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/8/23
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import torch
 6 | from PIL import Image
 7 | from cv2box import CVImage
 8 | from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, \
 9 |     DDIMScheduler, AutoencoderKL
10 | 
11 | from sd_lib.ip_adapter.models import IPAdapter, IPAdapterPlus
12 | 
13 | SD_PRETRAIN = './sd_models/stable-diffusion-v1-5'
14 | VAE_PRETRAIN = './sd_models/stabilityai_sd-vae-ft-mse'
15 | CLIP_IMAGE_PRETRAIN = './sd_models/clip_image_encoder'
16 | IMAGE_PROJ_PRETRAIN = './sd_models/ip_adapter_image_proj/ip-adapter_sd15.bin'
17 | IMAGE_PROJ_PLUS_PRETRAIN = './sd_models/ip_adapter_image_proj/ip-adapter-plus_sd15.bin'
18 | 
19 | 
20 | def image_grid(imgs, rows, cols):
21 |     assert len(imgs) == rows * cols
22 | 
23 |     w, h = imgs[0].size
24 |     grid = Image.new('RGB', size=(cols * w, rows * h))
25 |     grid_w, grid_h = grid.size
26 | 
27 |     for i, img in enumerate(imgs):
28 |         grid.paste(img, box=(i % cols * w, i // cols * h))
29 |     return grid
30 | 
31 | 
32 | class IpAdapterAPI:
33 |     def __init__(self, device="cuda"):
34 |         # load SD pipeline
35 |         noise_scheduler = DDIMScheduler(
36 |             num_train_timesteps=1000,
37 |             beta_start=0.00085,
38 |             beta_end=0.012,
39 |             beta_schedule="scaled_linear",
40 |             clip_sample=False,
41 |             set_alpha_to_one=False,
42 |             steps_offset=1,
43 |         )
44 |         vae = AutoencoderKL.from_pretrained(VAE_PRETRAIN).to(dtype=torch.float16)
45 | 
46 |         sd_pipe = StableDiffusionPipeline.from_pretrained(
47 |             SD_PRETRAIN,
48 |             torch_dtype=torch.float16,
49 |             scheduler=noise_scheduler,
50 |             vae=vae,
51 |             feature_extractor=None,
52 |             safety_checker=None
53 |         )
54 | 
55 |         # load ip-adapter
56 |         # self.ip_model = IPAdapter(sd_pipe, CLIP_IMAGE_PRETRAIN, IMAGE_PROJ_PRETRAIN, device)
57 |         self.ip_model = IPAdapterPlus(sd_pipe, CLIP_IMAGE_PRETRAIN, IMAGE_PROJ_PLUS_PRETRAIN, device, num_tokens=16)
58 | 
59 |     def forward(self, image_pil):
60 |         # generate image variations
61 |         images = self.ip_model.generate(pil_image=image_pil, num_samples=4, num_inference_steps=50, seed=42)
62 |         grid = image_grid(images, 1, 4)
63 |         grid.show()
64 |         return grid
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     # read image prompt
69 |     ia = IpAdapterAPI()
70 |     image = Image.open('resources/for_sd/girl_reading_512_crop.png')
71 |     image.resize((256, 256))
72 |     out_image = ia.forward(image)
73 | 


--------------------------------------------------------------------------------
/data_lib/dataset_convert/coco-annotator_2_coco-mmdet.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/8/29
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | for hand detect
 7 | """
 8 | from cv2box import CVFile
 9 | from tqdm import tqdm
10 | 
11 | 
12 | def del_other_category(json_in_, json_out_):
13 |     json_data = CVFile(json_in_).data
14 |     out_data = json_data.copy()
15 |     out_data['annotations'] = []
16 |     out_data['categories'] = json_data['categories'][:1]
17 |     out_data['categories'][0]['name'] = 'hand'
18 |     out_data['categories'][0]['id'] = 1
19 |     print(len(json_data['annotations']))
20 |     for i in tqdm(range(len(json_data['annotations']))):
21 |         dummy = json_data['annotations'][i]
22 |         if dummy['category_id'] != 1 and 'bbox' in dummy.keys():
23 |             dummy['category_id'] = 1
24 |             out_data['annotations'].append(dummy)
25 | 
26 |     print(len(out_data['annotations']))
27 |     CVFile(json_out_).json_write(out_data)
28 | 
29 | 
30 | def del_some_name(json_in_, json_out_):
31 |     json_data = CVFile(json_in_).data
32 |     out_data = json_data.copy()
33 |     out_data['annotations'] = []
34 |     out_data['images'] = []
35 |     print(len(json_data['images']))
36 |     print(len(json_data['annotations']))
37 |     del_image_id_list = []
38 | 
39 |     for i in tqdm(range(len(json_data['images']))):
40 |         dummy = json_data['images'][i]
41 |         if '0707_4_' in dummy['file_name']:
42 |             del_image_id_list.append(dummy['id'])
43 |         else:
44 |             out_data['images'].append(dummy)
45 | 
46 |     for i in tqdm(range(len(json_data['annotations']))):
47 |         dummy = json_data['annotations'][i]
48 |         if dummy['image_id'] not in del_image_id_list:
49 |             out_data['annotations'].append(dummy)
50 | 
51 |     print(len(out_data['images']))
52 |     print(len(out_data['annotations']))
53 |     CVFile(json_out_).json_write(out_data)
54 | 
55 | 
56 | def concat_2_json(json_in_1, json_in_2, json_out_):
57 |     json_data_1 = CVFile(json_in_1).data
58 |     json_data_2 = CVFile(json_in_2).data
59 |     out_data = json_data_1.copy()
60 |     out_data['annotations'] += json_data_2['annotations']
61 |     out_data['images'] += json_data_2['images']
62 |     print(len(out_data['annotations']))
63 |     print(len(out_data['images']))
64 |     CVFile(json_out_).json_write(out_data)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     json_in = '/datasets_TVCOCO_hand_train/annotations/train.json'
69 |     json_out = '/datasets_TVCOCO_hand_train/annotations/train_out.json'
70 |     del_other_category(json_in, json_out)
71 | 
72 |     # json_in = ''
73 |     # json_out = ''
74 |     # del_some_name(json_in, json_out)
75 | 
76 |     # json_in_1 = ''
77 |     # json_in_2 = ''
78 |     # json_out = ''
79 |     # concat_2_json(json_in_1, json_in_2, json_out)
80 | 


--------------------------------------------------------------------------------
/mocap_lib/smooth_filter/smooth_filter.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/7/6
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from .smoothnet_api import SmoothNetFilter
 6 | from .one_euro_api import OneEuroFilter
 7 | import numpy as np
 8 | 
 9 | SMOOTH_NET_8 = 'pretrain_models/smooth_filter/smoothnet_ws8_h36m.pth'
10 | SMOOTH_NET_16 = 'pretrain_models/smooth_filter/smoothnet_ws16_h36m.pth'
11 | SMOOTH_NET_32 = 'pretrain_models/smooth_filter/smoothnet_ws32_h36m.pth'
12 | SMOOTH_NET_64 = 'pretrain_models/smooth_filter/smoothnet_ws64_h36m.pth'
13 | 
14 | 
15 | class SmoothFilter:
16 |     def __init__(self, filter_type, **kwargs):
17 | 
18 |         if filter_type == 'one_euro':
19 |             self.filter = OneEuroFilter()
20 |         # can not use !
21 |         # if filter_type == 'smooth_net_8':
22 |         #     self.window = 8
23 |         #     self.filter = SmoothNetFilter(8, SMOOTH_NET_8, root_index=kwargs['root_index'])
24 |         # elif filter_type == 'smooth_net_16':
25 |         #     self.window = 16
26 |         #     self.filter = SmoothNetFilter(16, SMOOTH_NET_16, root_index=kwargs['root_index'])
27 |         # elif filter_type == 'smooth_net_32':
28 |         #     self.window = 32
29 |         #     self.filter = SmoothNetFilter(32, SMOOTH_NET_32, root_index=kwargs['root_index'])
30 |         # elif filter_type == 'smooth_net_64':
31 |         #     self.window = 64
32 |         #     self.filter = SmoothNetFilter(64, SMOOTH_NET_64, root_index=kwargs['root_index'])
33 | 
34 |         # self.history_list = [[], [], []]
35 |         # self.thres_list = [[], [], []]
36 | 
37 |     def forward(self, x):
38 |         """
39 | 
40 |         Args:
41 |             x: [N, 2] or [N, 3]
42 | 
43 |         Returns:
44 | 
45 |         """
46 |         # history_now = self.history_list[id]
47 |         # thres_list_now = self.thres_list[id]
48 | 
49 |         # if x.shape[1] == 3:
50 |         #     x_new = x[:, :2]
51 |         #     thres = x[:, 2]
52 |         # else:
53 |         #     x_new = x
54 |         #     thres = 0
55 | 
56 |         results = self.filter.forward(np.array([x.copy()]))
57 |         return results[0]
58 |         # return np.concatenate((results[0], thres.reshape(-1, 1)), 1)
59 | 
60 |         # if len(self.history_list[id]) < self.window:
61 |         #     self.history_list[id].append(x_new)
62 |         #     self.thres_list[id].append(thres)
63 |         #     return x
64 |         # else:
65 |         #     self.history_list[id].append(x_new)
66 |         #     self.thres_list[id].append(thres)
67 |         #     self.history_list[id] = self.history_list[id][-self.window:]
68 |         #     self.thres_list[id] = self.thres_list[id][-self.window:]
69 |         #     results = self.filter.forward(np.array(self.history_list[id].copy()))
70 |         #
71 |         #     return np.concatenate((results[-1], self.thres_list[id][-1].reshape(-1,1)), 1)
72 | 


--------------------------------------------------------------------------------
/gpt_lib/chatglm6b_finetune/tokenize_dataset_rows.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/4/24
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | ref https://github.com/mymusise/ChatGLM-Tuning/blob/master/tokenize_dataset_rows.py
 7 | """
 8 | import argparse
 9 | import json
10 | from tqdm import tqdm
11 | import datasets
12 | import transformers
13 | from cv2box import CVFile
14 | 
15 | # init chatglm-6b model
16 | # model_name = 'THUDM/chatglm-6b'
17 | model_name = '/mnt/ljt/models/hugging_face/chatglm-6b'
18 | tokenizer = transformers.AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
19 | config = transformers.AutoConfig.from_pretrained(model_name, trust_remote_code=True, device_map='auto')
20 | 
21 | # global init
22 | prompt_row_name = 'instruction'
23 | target_row_name = 'output'
24 | 
25 | 
26 | def preprocess(example: dict, max_seq_length):
27 |     prompt = example[prompt_row_name]
28 |     target = example[target_row_name]
29 |     prompt_ids = tokenizer.encode(prompt, max_length=max_seq_length, truncation=True)
30 |     target_ids = tokenizer.encode(
31 |         target,
32 |         max_length=max_seq_length,
33 |         truncation=True,
34 |         add_special_tokens=False)
35 |     input_ids = prompt_ids + target_ids + [config.eos_token_id]
36 |     return {"input_ids": input_ids, "seq_len": len(prompt_ids)}
37 | 
38 | 
39 | # def read_jsonl(path, max_seq_length, skip_overlength=False):
40 | #     with open(path, "r") as f:
41 | #         for line in tqdm(f.readlines()):
42 | #             example = json.loads(line)
43 | #             feature = preprocess(example, max_seq_length)
44 | #             if skip_overlength and len(feature["input_ids"]) > max_seq_length:
45 | #                 continue
46 | #             feature["input_ids"] = feature["input_ids"][:max_seq_length]
47 | #             yield feature
48 | 
49 | 
50 | def read_json(path, max_seq_length, skip_overlength=False):
51 |     """
52 |     for alpaca-COT(https://github.com/PhoebusSi/Alpaca-CoT) format datasets
53 |     """
54 |     json_data = CVFile(path).data
55 |     for example in tqdm(json_data):
56 |         feature = preprocess(example, max_seq_length)
57 |         if skip_overlength and len(feature["input_ids"]) > max_seq_length:
58 |             continue
59 |         feature["input_ids"] = feature["input_ids"][:max_seq_length]
60 |         yield feature
61 | 
62 | 
63 | 
64 | def main():
65 |     parser = argparse.ArgumentParser()
66 |     parser.add_argument("--data_path", type=str, default="/mnt/ljt/dataset/NLP/liurun_99.json")
67 |     parser.add_argument("--save_path", type=str, default="/mnt/ljt/dataset/NLP/liurun_99")
68 |     parser.add_argument("--max_seq_length", type=int, default=384)
69 |     parser.add_argument("--skip_overlength", type=bool, default=False)
70 |     args = parser.parse_args()
71 | 
72 |     dataset = datasets.Dataset.from_generator(
73 |         lambda: read_json(args.data_path, args.max_seq_length, args.skip_overlength)
74 |     )
75 |     dataset.save_to_disk(args.save_path)
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     main()
80 | 


--------------------------------------------------------------------------------
/gpt_lib/models/chatglm_6b.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/9
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from transformers import AutoModel, AutoTokenizer
 6 | from transformers import AutoConfig
 7 | from transformers.dynamic_module_utils import get_class_from_dynamic_module
 8 | from transformers.modeling_utils import no_init_weights
 9 | from transformers.utils import ContextManagers
10 | from accelerate import init_empty_weights
11 | from accelerate.utils import get_balanced_memory, infer_auto_device_map
12 | 
13 | import torch
14 | import os
15 | import platform
16 | import signal
17 | 
18 | MODEL_ZOO = {
19 |     'moss': {
20 |         'model_path': '/mnt/ljt/models/hugging_face/moss-moon-003-sft',
21 |         'model': AutoModel,
22 |         'tokenizer': AutoTokenizer,
23 |         'config': AutoConfig,
24 |     },
25 | }
26 | 
27 | os_name = platform.system()
28 | clear_command = 'cls' if os_name == 'Windows' else 'clear'
29 | stop_stream = False
30 | 
31 | 
32 | def build_prompt(history):
33 |     prompt = "欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序"
34 |     for query, response in history:
35 |         prompt += f"\n\n用户：{query}"
36 |         prompt += f"\n\nChatGLM-6B：{response}"
37 |     return prompt
38 | 
39 | 
40 | def signal_handler(signal, frame):
41 |     global stop_stream
42 |     stop_stream = True
43 | 
44 | 
45 | class ChatGLM(LLM):
46 |     def __init__(self, model_name, load_in_8bit=False):
47 |         self.model_path = MODEL_ZOO[model_name]['model_path']
48 |         self.config = MODEL_ZOO[model_name]['config'].from_pretrained(self.model_path, return_unused_kwargs=True,
49 |                                                                       trust_remote_code=True)[0]
50 |         super().__init__(MODEL_ZOO[model_name], load_in_8bit, self.get_device_map)
51 |         self.model = self.model.half.cuda()
52 | 
53 |     def get_device_map(self):
54 |         return 'auto'
55 | 
56 |     def stream_chat(self):
57 |         self.model = self.model.eval()
58 |         history = []
59 |         global stop_stream
60 |         print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
61 |         while True:
62 |             query = input("\n用户：")
63 |             if query.strip() == "stop":
64 |                 break
65 |             if query.strip() == "clear":
66 |                 history = []
67 |                 os.system(clear_command)
68 |                 print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
69 |                 continue
70 |             count = 0
71 |             for response, history in self.model.stream_chat(self.tokenizer, query, history=history):
72 |                 if stop_stream:
73 |                     stop_stream = False
74 |                     break
75 |                 else:
76 |                     count += 1
77 |                     if count % 8 == 0:
78 |                         os.system(clear_command)
79 |                         print(build_prompt(history), flush=True)
80 |                         signal.signal(signal.SIGINT, signal_handler)
81 |             os.system(clear_command)
82 |             print(build_prompt(history), flush=True)
83 | 


--------------------------------------------------------------------------------
/sr_lab/realesrgan/realesrgan_onnx_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/11/11
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import cv2
 6 | from cv2box import CVImage, MyFpsCounter
 7 | from apstone import ModelBase
 8 | 
 9 | MODEL_ZOO = {
10 |     # https://github.com/xinntao/Real-ESRGAN
11 |     # input_name: ['input_1'], shape: [[1, 3, w, h]]
12 |     # output_name: ['output_1'], shape: [[1, 3, w*4, h*4]]
13 |     'realesr-general-x4v3': {
14 |         'model_path': 'pretrain_models/sr_lib/realesr-general-x4v3-dynamic.onnx'
15 |     },
16 |     # onnx will raise alloc memory error when big image input
17 |     'RealESRGAN_x4plus-dynamic': {
18 |         'model_path': 'pretrain_models/sr_lib/RealESRGAN_x4plus-dynamic.onnx'
19 |     },
20 |     'RealESRGAN_x2plus-dynamic': {
21 |         'model_path': 'pretrain_models/sr_lib/RealESRGAN_x2plus-dynamic.onnx'
22 |     },
23 | }
24 | 
25 | 
26 | class GFPGAN(ModelBase):
27 |     def __init__(self, model_type='realesr-general-x4v3', provider='gpu'):
28 |         super().__init__(MODEL_ZOO[model_type], provider)
29 |         self.model_type = model_type
30 |         self.input_std = 255
31 |         self.input_mean = 0
32 |         self.mod_pad_h = 0
33 |         self.mod_pad_w = 0
34 |         self.scale = 4
35 | 
36 |     def pad_for_scale_2(self, image_in_):
37 |         self.scale = 2
38 |         h, w, _ = image_in_.shape
39 |         if h % self.scale != 0:
40 |             self.mod_pad_h = (self.scale - h % self.scale)
41 |         if w % self.scale != 0:
42 |             self.mod_pad_w = (self.scale - w % self.scale)
43 |         image_out_ = cv2.copyMakeBorder(image_in_, 0, self.mod_pad_h, 0, self.mod_pad_w, cv2.BORDER_REPLICATE)
44 |         return image_out_
45 | 
46 |     def forward(self, input_image):
47 |         """
48 |         Args:
49 |             input_image: cv2 image 0-255 BGR
50 |         Returns:
51 |             BGR 512x512x3 0-1
52 |         """
53 |         if self.model_type == 'RealESRGAN_x2plus-dynamic':
54 |             input_image = self.pad_for_scale_2(CVImage(input_image).bgr)
55 |         ori_size = CVImage(input_image).bgr.shape[:2][::-1]
56 |         # print(ori_size)
57 |         image_in = CVImage(input_image).blob(ori_size, self.input_mean, self.input_std, rgb=True)
58 |         image_out = self.model.forward(image_in)
59 |         output_image = (image_out[0][0])[::-1].transpose(1, 2, 0).clip(0, 1)
60 |         if self.model_type == 'RealESRGAN_x2plus-dynamic':
61 |             output_h, output_w, _ = output_image.shape
62 |             output_image = output_image[0:output_h - self.mod_pad_h * self.scale, 0:output_w - self.mod_pad_w * self.scale, :]
63 |         # https://docs.opencv.org/4.x/da/d54/group__imgproc__transform.html
64 |         output_image = CVImage(output_image).resize(ori_size, interpolation=cv2.INTER_LANCZOS4).bgr
65 |         return output_image
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     face_img_p = 'resources/test1.jpg'
70 |     fa = GFPGAN(model_type='RealESRGAN_x2plus-dynamic', provider='gpu')
71 |     face = fa.forward(face_img_p)
72 |     # CVImage(face, image_format='cv2').save('./gfpgan.jpg')
73 |     CVImage(face, image_format='cv2').show()
74 | 


--------------------------------------------------------------------------------
/mocap_lib/middleware/VMCApi.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/2/14
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from osc4py3.as_eventloop import *  # osc module
 6 | from osc4py3 import oscbuildparse
 7 | 
 8 | # import time
 9 | 
10 | LEFT_MPII_HAND_LABELS = [
11 |     'LEFT_WRIST',  # 0
12 |     'LEFT_THUMB_CMC', 'LEFT_THUMB_MCP', 'LEFT_THUMB_IP', 'LEFT_THUMB_TIP',
13 |     'LEFT_INDEX_FINGER_MCP', 'LEFT_INDEX_FINGER_PIP', 'LEFT_INDEX_FINGER_DIP', 'LEFT_INDEX_FINGER_TIP',
14 |     'LEFT_MIDDLE_FINGER_MCP', 'LEFT_MIDDLE_FINGER_PIP', 'LEFT_MIDDLE_FINGER_DIP', 'LEFT_MIDDLE_FINGER_TIP',
15 |     'LEFT_RING_FINGER_MCP', 'LEFT_RING_FINGER_PIP', 'LEFT_RING_FINGER_DIP', 'LEFT_RING_FINGER_TIP',
16 |     'LEFT_PINKY_MCP', 'LEFT_PINKY_PIP', 'LEFT_PINKY_DIP', 'LEFT_PINKY_TIP',
17 | ]
18 | 
19 | RIGHT_MPII_HAND_LABELS = [
20 |     'RIGHT_WRIST',  # 0
21 |     'RIGHT_THUMB_CMC', 'RIGHT_THUMB_MCP', 'RIGHT_THUMB_IP', 'RIGHT_THUMB_TIP',
22 |     'RIGHT_INDEX_FINGER_MCP', 'RIGHT_INDEX_FINGER_PIP', 'RIGHT_INDEX_FINGER_DIP', 'RIGHT_INDEX_FINGER_TIP',
23 |     'RIGHT_MIDDLE_FINGER_MCP', 'RIGHT_MIDDLE_FINGER_PIP', 'RIGHT_MIDDLE_FINGER_DIP', 'RIGHT_MIDDLE_FINGER_TIP',
24 |     'RIGHT_RING_FINGER_MCP', 'RIGHT_RING_FINGER_PIP', 'RIGHT_RING_FINGER_DIP', 'RIGHT_RING_FINGER_TIP',
25 |     'RIGHT_PINKY_MCP', 'RIGHT_PINKY_PIP', 'RIGHT_PINKY_DIP', 'RIGHT_PINKY_TIP',
26 | ]
27 | 
28 | LEFT_UNI_HAND_LABELS = [
29 |     'LEFT_WRIST',  # 0
30 |     'LEFT_THUMB_CMC', 'LeftThumbProximal', 'LeftThumbIntermediate', 'LeftThumbDistal',
31 |     'LEFT_INDEX_FINGER_MCP', 'LeftIndexProximal', 'LeftIndexIntermediate', 'LeftIndexDistal',
32 |     'LEFT_MIDDLE_FINGER_MCP', 'LeftMiddleProximal', 'LeftMiddleIntermediate', 'LeftMiddleDistal',
33 |     'LEFT_RING_FINGER_MCP', 'LeftRingProximal', 'LeftRingIntermediate', 'LeftRingDistal',
34 |     'LEFT_PINKY_MCP', 'LeftLittleProximal', 'LeftLittleIntermediate', 'LeftLittleDistal',
35 | ]
36 | 
37 | RIGHT_UNI_HAND_LABELS = [
38 |     'Right_WRIST',  # 0
39 |     'Right_THUMB_CMC', 'RightThumbProximal', 'RightThumbIntermediate', 'RightThumbDistal',
40 |     'Right_INDEX_FINGER_MCP', 'RightIndexProximal', 'RightIndexIntermediate', 'RightIndexDistal',
41 |     'Right_MIDDLE_FINGER_MCP', 'RightMiddleProximal', 'RightMiddleIntermediate', 'RightMiddleDistal',
42 |     'Right_RING_FINGER_MCP', 'RightRingProximal', 'RightRingIntermediate', 'RightRingDistal',
43 |     'Right_PINKY_MCP', 'RightLittleProximal', 'RightLittleIntermediate', 'RightLittleDistal',
44 | ]
45 | 
46 | 
47 | class VMCApi:
48 |     def __init__(self, ip_address, ip_port):
49 |         # ip = '192.168.4.13'  # ip address
50 |         # port = 39539  # port number
51 | 
52 |         osc_startup()  # starts osc protocol
53 |         osc_udp_client(ip_address, ip_port, "VroidPoser")  # initializes osc client
54 | 
55 |     def sendosc(self, bone, x, y, z, w):  # condensed OSC message function
56 | 
57 |         msg = oscbuildparse.OSCMessage("/VMC/Ext/Bone/Pos", None,
58 |                                        [bone, float(0), float(0), float(0), float(x),
59 |                                         float(y), float(z), float(w)])
60 |         # print(msg)
61 |         osc_send(msg, "VroidPoser")
62 |         osc_process()
63 | 


--------------------------------------------------------------------------------
/data_lib/dataset_preprocess/gen_dataset_thumbnail.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/1/7
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | import os
 7 | 
 8 | import PIL.Image as Image
 9 | from cv2box.utils.util import get_path_by_ext
10 | 
11 | 
12 | def resize_by_width(infile, image_size):
13 |     """按照宽度进行所需比例缩放"""
14 |     im = Image.open(infile)
15 |     (x, y) = im.size
16 |     lv = round(x / image_size, 2) + 0.01
17 |     x_s = int(x // lv)
18 |     y_s = int(y // lv)
19 |     print("x_s", x_s, y_s)
20 |     out = im.resize((x_s, y_s), Image.ANTIALIAS)
21 |     return out
22 | 
23 | 
24 | def get_new_img_xy(infile, image_size):
25 |     """返回一个图片的宽、高像素"""
26 |     im = Image.open(infile)
27 |     (x, y) = im.size
28 |     lv = round(x / image_size, 2) + 0.01
29 |     x_s = x // lv
30 |     y_s = y // lv
31 |     # print("x_s", x_s, y_s)
32 |     # out = im.resize((x_s, y_s), Image.ANTIALIAS)
33 |     return x_s, y_s
34 | 
35 | 
36 | # 定义图像拼接函数
37 | def image_compose(image_colnum, image_size, image_rownum, image_names, image_save_path, x_new, y_new):
38 |     to_image = Image.new('RGB', (image_colnum * x_new, image_rownum * y_new))  # 创建一个新图
39 |     # 循环遍历，把每张图片按顺序粘贴到对应位置上
40 |     total_num = 0
41 |     for y in range(1, image_rownum + 1):
42 |         for x in range(1, image_colnum + 1):
43 |             from_image = resize_by_width(image_names[image_colnum * (y - 1) + x - 1], image_size)
44 |             # from_image = Image.open(image_names[image_colnum * (y - 1) + x - 1]).resize((image_size,image_size ), Image.ANTIALIAS)
45 |             to_image.paste(from_image, ((x - 1) * x_new, (y - 1) * y_new))
46 |             total_num += 1
47 |             if total_num == len(image_names):
48 |                 break
49 |     return to_image.save(image_save_path)  # 保存新图
50 | 
51 | 
52 | def merge_images(image_dir_path,image_size,image_colnum):
53 |     # 获取图片集地址下的所有图片名称
54 |     image_fullpath_list = get_path_by_ext(image_dir_path)[:100]
55 |     print("image_fullpath_list", len(image_fullpath_list), image_fullpath_list)
56 | 
57 |     image_save_path = r'{}_thumbnail.jpg'.format(image_dir_path)  # 图片转换后的地址
58 |     # image_rownum = 4  # 图片间隔，也就是合并成一张图后，一共有几行
59 |     image_rownum_yu = len(image_fullpath_list) % image_colnum
60 |     if image_rownum_yu == 0:
61 |         image_rownum = len(image_fullpath_list) // image_colnum
62 |     else:
63 |         image_rownum = len(image_fullpath_list) // image_colnum + 1
64 | 
65 |     x_list = []
66 |     y_list = []
67 |     for img_file in image_fullpath_list:
68 |         img_x, img_y = get_new_img_xy(str(img_file), image_size)
69 |         x_list.append(img_x)
70 |         y_list.append(img_y)
71 | 
72 |     print("x_list", sorted(x_list))
73 |     print("y_list", sorted(y_list))
74 |     x_new = int(x_list[len(x_list) // 5 * 4])
75 |     y_new = int(x_list[len(y_list) // 5 * 4])
76 |     image_compose(image_colnum, image_size, image_rownum, image_fullpath_list, image_save_path, x_new, y_new)  # 调用函数
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     image_dir_path = ''  # 图片集地址
81 |     image_size = 128  # 每张小图片的大小
82 |     image_colnum = 10  # 合并成一张图后，一行有几个小图
83 |     merge_images(image_dir_path, image_size, image_colnum)
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/seg_lib/cihp_pgn/cihp_pgn_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/6/28
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | from apstone import ModelBase
 6 | import numpy as np
 7 | from cv2box import CVImage
 8 | from PIL import Image
 9 | 
10 | """
11 | input_name:['create_inputs/sub:0'], shape:[['unk__40886', 'unk__40887', 3]]
12 | output_name:['ExpandDims_1:0', 'Max:0', 'Sigmoid:0'], shape:[[1, 'unk__40888', 'unk__40889', 1], [1, 'unk__40890', 'unk__40891'], [1, 'unk__40892', 'unk__40893', 1]]
13 | """
14 | MODEL_ZOO = {
15 |     # https://github.com/Engineering-Course/CIHP_PGN
16 |     'cihp_pgn': {
17 |         'model_path': 'pretrain_models/seg_lib/cihp_pgn/cihp_pgn.onnx'
18 |     },
19 | }
20 | 
21 | label_colours = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0), (0, 0, 85), (0, 119, 221),
22 |                  (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128), (0, 128, 0), (0, 0, 255), (51, 170, 221),
23 |                  (0, 255, 255), (85, 255, 170), (170, 255, 85), (255, 255, 0), (255, 170, 0)]
24 | N_CLASSES = 20
25 | 
26 | 
27 | def decode_labels(mask, num_images=1, num_classes=21):
28 |     """Decode batch of segmentation masks.
29 | 
30 |     Args:
31 |       mask: result of inference after taking argmax.
32 |       num_images: number of images to decode from the batch.
33 |       num_classes: number of classes to predict (including background).
34 | 
35 |     Returns:
36 |       A batch with num_images RGB images of the same size as the input.
37 |     """
38 |     n, h, w, c = mask.shape
39 |     assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (
40 |         n, num_images)
41 |     outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8)
42 |     for i in range(num_images):
43 |         img = Image.new('RGB', (len(mask[i, 0]), len(mask[i])))
44 |         pixels = img.load()
45 |         for j_, j in enumerate(mask[i, :, :, 0]):
46 |             for k_, k in enumerate(j):
47 |                 if k < num_classes:
48 |                     pixels[k_, j_] = label_colours[k]
49 |         outputs[i] = np.array(img)
50 |     return outputs
51 | 
52 | 
53 | class CIHPPGN(ModelBase):
54 |     def __init__(self, model_name='cihp_pgn', provider='gpu'):
55 |         super(CIHPPGN, self).__init__(MODEL_ZOO[model_name], provider)
56 |         self.mean = [125.0, 114.4, 107.9]
57 |         self.std = [1, 1, 1]
58 | 
59 |     def forward(self, image_in):
60 |         input_size_ = CVImage(image_in).bgr.shape[:2]
61 |         input_image = CVImage(image_in).blob_innormal(input_size_, input_mean=self.mean, input_std=self.std)
62 |         # h,w,3
63 |         input_image = input_image[0].transpose(1, 2, 0)
64 |         parsing_, scores, edge_ = self.model.forward(input_image)
65 |         mask_ = decode_labels(parsing_, num_classes=N_CLASSES)
66 |         return mask_[0], parsing_[0].astype(np.uint8), (edge_[0] * 255).astype(np.uint8)
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     cihp = CIHPPGN(model_name='cihp_pgn', provider='gpu')
71 | 
72 |     img_p = 'resources/for_pose/girl_640x480.jpg'
73 |     # decrease size to reduce GPU mem
74 |     img_p = CVImage(img_p).resize((320, 180)).bgr
75 |     mask, parsing, edge = cihp.forward(img_p)
76 |     print(mask.shape)
77 |     print(parsing.shape)
78 |     print(edge.shape)
79 |     CVImage(mask).show()
80 |     CVImage(parsing).show()
81 |     CVImage(edge).show()
82 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detector/hand_detector_mediapipe/hand_detector_mediapipe.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/2/23
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | import mediapipe as mp
 7 | import numpy as np
 8 | from cv2box import CVImage, MyFpsCounter, mfc
 9 | 
10 | 
11 | class MediapipeHand:
12 |     def __init__(self):
13 |         # mp_drawing = mp.solutions.drawing_utils
14 |         # mp_drawing_styles = mp.solutions.drawing_styles
15 |         self.mp_hands = mp.solutions.hands.Hands(
16 |             static_image_mode=True,
17 |             model_complexity=1,
18 |             max_num_hands=1,
19 |             min_detection_confidence=0)
20 | 
21 |     # @mfc('mediapipe')
22 |     def forward(self, image):
23 |         # height, width = image.shape[0], image.shape[1]
24 | 
25 |         image.flags.writeable = False
26 | 
27 |         results = self.mp_hands.process(image)
28 | 
29 |         try:
30 |             hand = results.multi_hand_world_landmarks[0]
31 |             multi_handedness = results.multi_handedness
32 |         except TypeError:
33 |             return None, None, None
34 |         # print(len(hand))
35 |         # print(multi_handedness[0].classification[0].label)
36 |         hand_np = []
37 |         for i in range(21):
38 |             hand_np.append(
39 |                 [hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z])
40 |         return hand_np, multi_handedness[0].classification[0].label, multi_handedness[0].classification[0].score
41 | 
42 | 
43 | class MediapipeHolistic:
44 |     def __init__(self):
45 |         # mp_drawing = mp.solutions.drawing_utils
46 |         # mp_drawing_styles = mp.solutions.drawing_styles
47 |         self.mp_holistic = mp.solutions.holistic
48 | 
49 |     def forward(self, image):
50 | 
51 |         height, width = image.shape[0], image.shape[1]
52 | 
53 |         with self.mp_holistic.Holistic(
54 |                 model_complexity=1,
55 |                 min_detection_confidence=0.5,
56 |                 min_tracking_confidence=0.5) as holistic:
57 |             # To improve performance, optionally mark the image as not writeable to
58 |             # pass by reference.
59 |             image.flags.writeable = False
60 | 
61 |             results = holistic.process(image)
62 | 
63 |             left_hand = results.left_hand_landmarks
64 |             right_hand = results.right_hand_landmarks
65 |             results = []
66 |             for hand in [right_hand, left_hand]:
67 |                 if hand is not None:
68 |                     hand_np = []
69 |                     for i in range(21):
70 |                         hand_np.append(
71 |                             [hand.landmark[i].x * width, hand.landmark[i].y * height, hand.landmark[i].z * width])
72 |                     box_left_top_x = np.min(hand_np, axis=0)[0]
73 |                     box_left_top_y = np.min(hand_np, axis=0)[1]
74 |                     box_right_bottle_x = np.max(hand_np, axis=0)[0]
75 |                     box_right_bottle_y = np.max(hand_np, axis=0)[1]
76 |                     results.append([box_left_top_x, box_left_top_y, box_right_bottle_x, box_right_bottle_y])
77 |         return np.array(results)
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     image_in = CVImage('').rgb
82 |     # [[1113.7602996826172, 539.147379398346, 1374.1822814941406, 850.5021500587463]]
83 |     # CVImage('').show()
84 |     hdm = MediapipeHand()
85 |     print(hdm.forward(image_in))
86 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_origin.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/7/21
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import numpy as np
 6 | from cv2box import CVImage, MyFpsCounter
 7 | 
 8 | from apstone import ONNXModel
 9 | from body_lib.body_kp_detector.blazepose_mediapipe.blaze_utils import postprocess, denormalize_landmarks, detection2roi, \
10 |     extract_roi
11 | from body_lib.body_kp_detector.blazepose_mediapipe.body_bbox_detector import BodyDetector
12 | 
13 | # input 1*256*256*3 output , 1*1 , , ,
14 | LITE_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_lite.onnx'
15 | FULL_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_full.onnx'
16 | HEAVY_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_heavy.onnx'
17 | 
18 | 
19 | class LandmarkDetectorOrigin:
20 |     def __init__(self, model_complexity=0, provider='gpu'):
21 |         self.bd = BodyDetector(provider=provider)
22 | 
23 |         model_path_list = [LITE_MODEL, FULL_MODEL, HEAVY_MODEL]
24 |         self.model = ONNXModel(model_path_list[model_complexity], provider=provider)
25 | 
26 |     def forward(self, image_in_, show=False):
27 |         """
28 | 
29 |         Args:
30 |             image_in_:
31 | 
32 |         Returns:
33 |             landmarks: 33*4
34 | 
35 |         """
36 |         filtered_detections = self.bd.forward(image_in_)
37 |         if show:
38 |             print(filtered_detections)
39 |         if filtered_detections.shape == (0, 13):
40 |             return np.zeros((33, 4))
41 |         elif len(filtered_detections) > 1:
42 |             filtered_detections = filtered_detections[0].reshape(1, 13)
43 | 
44 |         xc, yc, scale, theta = detection2roi(filtered_detections, detection2roi_method='alignment')
45 |         img, affine, box = extract_roi(CVImage(image_in_).bgr, xc, yc, theta, scale)
46 |         if show:
47 |             CVImage(img[0]).show(0, 'img_in')
48 |         normalized_landmarks, f, _, _, _ = self.model.forward(img.astype(np.float32))
49 |         normalized_landmarks = postprocess(normalized_landmarks)
50 |         landmarks_ = denormalize_landmarks(normalized_landmarks, affine)[0]
51 | 
52 |         # CVImage(img[0].cpu().numpy().transpose(2, 1, 0)).show()
53 |         # print(normalized_landmarks)
54 |         if show:
55 |             show_img = CVImage(image_in_).draw_landmarks(landmarks_)
56 |             CVImage(show_img).show(0, 'results')
57 | 
58 |         return landmarks_
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     image_path = 'resources/for_pose/girl_640x480.jpg'
63 |     image_in = CVImage(image_path).bgr
64 | 
65 |     """
66 |     model 0
67 |     gpu 70fps trt 133-196fps trt16 235-278fps t_pose_1500x
68 |     gpu  trt 221fps trt16 269fps t_pose_1080p
69 |     model 1 282fps
70 |     """
71 |     ld = LandmarkDetectorOrigin(model_complexity=2, provider='gpu')
72 | 
73 |     # landmarks = ld.forward(image_in, show=True)
74 |     # print(landmarks)
75 | 
76 |     # with MyFpsCounter('model forward 10 times fps: ') as mfc:
77 |     #     for i in range(10):
78 |     #         filtered_detections = ld.forward(image_in)
79 | 
80 |     # video tracking test
81 |     from cv2box import CVVideoLoader
82 |     with CVVideoLoader('') as cvvl:
83 |         for _ in range(len(cvvl)):
84 |             _, frame = cvvl.get()
85 |             landmarks = ld.forward(frame, show=True)
86 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/body_kp_detector_kapao/body_kp_detector_kapao.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/7/29
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | model from https://github.com/wmcnally/kapao
 7 | """
 8 | import torch
 9 | from cv2box import CVImage, MyFpsCounter, mfc
10 | import numpy as np
11 | from torchvision import transforms
12 | 
13 | from apstone.wrappers.mmlab_wrapper import KpDetectorBase
14 | from body_lib.body_kp_detector.body_kp_detector_kapao.utils import non_max_suppression_kp, post_process_batch, letterbox
15 | 
16 | MODEL_ZOO = {
17 |     # gpu 30fps trt 39fps trt16 43fps
18 |     # input_name:['actual_input_1'], shape:[[1, 3, 768, 1280]]
19 |     # output_name:['output1'], shape:[[1, 61200, 57]]
20 |     'kapao_s_coco_1080': {
21 |         'model_path': 'pretrain_models/body_lib/body_kp_detector/kapao/kapao_s_coco_static_1280x768.onnx',
22 |         'model_input_size': (1280, 768)
23 |     },
24 | }
25 | 
26 | 
27 | class KaPao(KpDetectorBase):
28 |     def __init__(self, model, provider):
29 |         super().__init__(MODEL_ZOO[model], provider)
30 |         self.origin_shape = None
31 | 
32 |     def preprocess(self, image_in_, bbox_, mirror=False):
33 |         image_in_ = CVImage(image_in_).bgr
34 |         # Padded resize
35 |         image_in_ = letterbox(image_in_, self.model_input_size[::-1], stride=64)[0]
36 | 
37 |         if self.model_type == 'trt':
38 |             transform = transforms.Compose([
39 |                 transforms.ToTensor(),
40 |             ])
41 |             image_in_ = dict(input=CVImage(image_in_.astype(np.float32)).tensor(transform).cuda())
42 |         else:
43 |             # HWC -> CHW BGR -> RGB
44 |             image_in_ = image_in_.astype(np.float32).transpose(2, 0, 1)[::-1][np.newaxis, :]/255
45 | 
46 |         return image_in_
47 | 
48 |     @mfc('postprocess')
49 |     def postprocess(self, model_results):
50 |         kp_flip = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
51 |         data = {'num_coords': 34, 'use_kp_dets': True, 'conf_thres_kp_person': 0.3,
52 |                 'overwrite_tol': 50, 'count_fused': False}
53 |         # lazy to rewrite torch nms to numpy
54 |         model_results = torch.Tensor(model_results[0])
55 |         person_dets = non_max_suppression_kp(model_results, conf_thres=0.7, iou_thres=0.45,
56 |                                              classes=[0],
57 |                                              num_coords=34)
58 |         kp_dets = non_max_suppression_kp(model_results, conf_thres=0.5, iou_thres=0.45,
59 |                                          classes=list(range(1, 1 + len(kp_flip))),
60 |                                          num_coords=34)
61 |         _, poses, _, _, _ = post_process_batch(data, self.model_input_size, self.origin_shape, person_dets, kp_dets)
62 | 
63 |         return poses[0]
64 | 
65 |     def forward(self, image_in_, show=False, max_bbox_num=1):
66 |         self.origin_shape = image_in_.shape
67 |         model_results = self.model.forward(self.preprocess(image_in_, None))
68 |         results_after = self.postprocess(model_results)
69 |         if show:
70 |             self.show(image_in_, results_after)
71 |         return results_after
72 | 
73 | 
74 | if __name__ == '__main__':
75 | 
76 |     image_path = 'resources/for_pose/t_pose_1080p.jpeg'
77 |     image_in = CVImage(image_path).bgr
78 |     kp = KaPao(model='kapao_s_coco_1080', provider='gpu')
79 | 
80 |     kps = kp.forward(image_in, show=True, max_bbox_num=3)
81 | 
82 | 


--------------------------------------------------------------------------------
/mocap_lib/body_wholebody/mediapipe_holistic.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/6/6
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | import numpy as np
 7 | import mediapipe as mp
 8 | from cv2box import CVImage, CVVideoLoader
 9 | import cv2
10 | from tqdm import tqdm
11 | 
12 | 
13 | class MediapipeHolistic:
14 |     def __init__(self):
15 |         self.mp_drawing = mp.solutions.drawing_utils
16 |         self.mp_drawing_styles = mp.solutions.drawing_styles
17 |         self.mp_holistic = mp.solutions.holistic
18 |         self.holistic = self.mp_holistic.Holistic(
19 |             # model_complexity=2,
20 |             smooth_landmarks=True,
21 |             # refine_face_landmarks=True,
22 |             min_detection_confidence=0.5,
23 |             min_tracking_confidence=0.5)
24 | 
25 |     def draw_show(self, image_in_, results):
26 |         self.mp_drawing.draw_landmarks(
27 |             image_in_,
28 |             results.face_landmarks,
29 |             self.mp_holistic.FACEMESH_CONTOURS,
30 |             landmark_drawing_spec=None,
31 |             connection_drawing_spec=self.mp_drawing_styles.get_default_face_mesh_contours_style())
32 |         self.mp_drawing.draw_landmarks(
33 |             image_in_,
34 |             results.pose_landmarks,
35 |             self.mp_holistic.POSE_CONNECTIONS,
36 |             landmark_drawing_spec=self.mp_drawing_styles.get_default_pose_landmarks_style())
37 |         self.mp_drawing.draw_landmarks(
38 |             image_in_,
39 |             results.left_hand_landmarks,
40 |             self.mp_holistic.HAND_CONNECTIONS,
41 |             landmark_drawing_spec=self.mp_drawing_styles.get_default_hand_landmarks_style())
42 |         self.mp_drawing.draw_landmarks(
43 |             image_in_,
44 |             results.right_hand_landmarks,
45 |             self.mp_holistic.HAND_CONNECTIONS,
46 |             landmark_drawing_spec=self.mp_drawing_styles.get_default_hand_landmarks_style())
47 |         CVImage(image_in_).show(wait_time=1)
48 |         # CVImage(image).save(
49 |         #     '/{}.jpg'.format(
50 |         #         i), create_path=True)
51 | 
52 |     @staticmethod
53 |     def result_convert(results_in, image_in_shape):
54 |         results_out = []
55 |         for i in range(len(results_in)):
56 |             # print(results_in[i].x)
57 |             results_out.append([results_in[i].x * image_in_shape[1], results_in[i].y * image_in_shape[0], 1.])
58 |         return results_out
59 | 
60 |     def forward(self, image_in_, draw_show=False):
61 |         image_in_ = CVImage(image_in_).rgb
62 |         image_in_.flags.writeable = False
63 |         results = self.holistic.process(image_in_)
64 |         image_in_.flags.writeable = True
65 |         image_in_ = cv2.cvtColor(image_in_, cv2.COLOR_RGB2BGR)
66 | 
67 |         if draw_show:
68 |             self.draw_show(image_in_, results)
69 | 
70 |         body_kp = self.result_convert(results.pose_landmarks.landmark, image_in_.shape)
71 |         left_hd_kp = self.result_convert(results.left_hand_landmarks.landmark, image_in_.shape)
72 |         right_hd_kp = self.result_convert(results.right_hand_landmarks.landmark, image_in_.shape)
73 | 
74 |         return body_kp, left_hd_kp, right_hd_kp
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     mh = MediapipeHolistic()
79 | 
80 |     with CVVideoLoader(
81 |             '') as cvvl:
82 |         for i in tqdm(range(len(cvvl))):
83 |             _, image = cvvl.get()
84 |             body_kp, left_h, right_h = mh.forward(image)
85 |             print(len(left_h))
86 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/body_detector_movenet/movenet_api_onnx.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/3/16
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from cv2box import CVImage, CVVideoLoader
 7 | import numpy as np
 8 | from tqdm import tqdm
 9 | from body_lib.body_kp_detector.body_detector_movenet.movenet_utils import crop_and_resize, init_crop_region, \
10 |     determine_crop_region, \
11 |     draw_prediction_on_image
12 | from apstone import ONNXModel
13 | 
14 | # https://tfhub.dev/s?q=movenet
15 | 
16 | class MoveNet:
17 |     def __init__(self, image_height, image_width):
18 |         self.crop_region = init_crop_region(image_height, image_width)
19 | 
20 |         # ONNX
21 |         self.movenet = ONNXModel(
22 |             'pretrain_models/digital_human/body_detector_movenet/movenet_singlepose_thunder_4.onnx')
23 | 
24 |     def forward(self, image):
25 |         image_height, image_width, _ = image.shape
26 | 
27 |         image = crop_and_resize(
28 |             np.expand_dims(image, axis=0), self.crop_region, crop_size=(256, 256))
29 | 
30 |         input_image = np.array(image, dtype=np.int32)
31 | 
32 |         outputs = self.movenet.forward(input_image)
33 |         # Output is a [1, 1, 17, 3] tensor.
34 |         # keypoints_with_scores = np.array(outputs['output_0'])
35 |         keypoints_with_scores = np.array(outputs[0])
36 | 
37 |         # print(keypoints_with_scores)
38 | 
39 |         for idx in range(17):
40 |             keypoints_with_scores[0, 0, idx, 0] = (
41 |                                                           self.crop_region['y_min'] * image_height +
42 |                                                           self.crop_region['height'] * image_height *
43 |                                                           keypoints_with_scores[0, 0, idx, 0]) / image_height
44 |             keypoints_with_scores[0, 0, idx, 1] = (
45 |                                                           self.crop_region['x_min'] * image_width +
46 |                                                           self.crop_region['width'] * image_width *
47 |                                                           keypoints_with_scores[0, 0, idx, 1]) / image_width
48 |         self.crop_region = determine_crop_region(
49 |             keypoints_with_scores, image_height, image_width)
50 |         return keypoints_with_scores, self.crop_region
51 | 
52 | 
53 | if __name__ == '__main__':
54 | 
55 |     mn = MoveNet(image_height=1920, image_width=1080)
56 | 
57 |     with CVVideoLoader('') as cvvl:
58 |         for i in tqdm(range(len(cvvl))):
59 |             _, image_bgr = cvvl.get()
60 | 
61 |             keypoints_with_scores, crop_region = mn.forward(image_bgr)
62 | 
63 |             # Visualize the predictions with image.
64 |             display_image = np.expand_dims(image_bgr, axis=0)
65 |             # display_image = tf.cast(tf.image.resize_with_pad(
66 |             #     display_image, 1080, 1080), dtype=tf.int32)
67 |             output_overlay = draw_prediction_on_image(
68 |                 np.squeeze(display_image, axis=0), keypoints_with_scores, crop_region=crop_region)
69 | 
70 |             CVImage(output_overlay).save(
71 |                 ''.format(i), create_path=True)
72 | 
73 |             # kp_list = []
74 |             # for kp in kps:
75 |             #     kp_list.append([kp[1] * 1080, kp[0] * 1920])
76 |             #
77 |             # image_bgr = cv2.drawKeypoints(image_bgr, cv2.KeyPoint_convert(kp_list), None, color=(0, 0, 255), flags=0)
78 |             #
79 |             # CVImage(output_overlay).show(1)
80 | 


--------------------------------------------------------------------------------
/audio_lib/svc/sovits_infer.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/5/25
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | https://github.com/voicepaw/so-vits-svc-fork
 7 | svc infer -c logs/44k/config.json -m logs/44k/G_2400.pth "*.wav"
 8 | """
 9 | 
10 | import json
11 | import os
12 | import subprocess
13 | from pathlib import Path
14 | 
15 | # import gradio as gr
16 | import librosa
17 | import numpy as np
18 | import torch
19 | from demucs.apply import apply_model
20 | from demucs.pretrained import DEFAULT_MODEL, get_model
21 | from huggingface_hub import hf_hub_download, list_repo_files
22 | import soundfile as sf
23 | 
24 | from so_vits_svc_fork.hparams import HParams
25 | from so_vits_svc_fork.inference.core import Svc
26 | 
27 | # Limit on duration of audio at inference time. increase if you can
28 | # In this parent app, we set the limit with an env var to 30 seconds
29 | # If you didnt set env var + you go OOM try changing 9e9 to <=300ish
30 | duration_limit = int(os.environ.get("MAX_DURATION_SECONDS", 9e9))
31 | 
32 | 
33 | class SoVits:
34 |     def __init__(self, generator_path, config_path, cluster_model_path):
35 |         hparams = HParams(**json.loads(Path(config_path).read_text()))
36 |         self.speaker = list(hparams.spk.keys())[0]
37 |         device = "cuda" if torch.cuda.is_available() else "cpu"
38 |         self.model = Svc(net_g_path=generator_path, config_path=config_path, device=device,
39 |                          cluster_model_path=cluster_model_path)
40 | 
41 |     def forward(self,
42 |                 audio,
43 |                 output_path,
44 |                 transpose: int = 0,
45 |                 auto_predict_f0: bool = False,
46 |                 cluster_infer_ratio: float = 0,
47 |                 noise_scale: float = 0.4,
48 |                 f0_method: str = "crepe",
49 |                 db_thresh: int = -40,
50 |                 pad_seconds: float = 0.5,
51 |                 chunk_seconds: float = 0.5,
52 |                 absolute_thresh: bool = False,
53 |                 ):
54 |         audio, _ = librosa.load(audio, sr=self.model.target_sample, duration=duration_limit)
55 |         audio = self.model.infer_silence(
56 |             audio.astype(np.float32),
57 |             speaker=self.speaker,
58 |             transpose=transpose,
59 |             auto_predict_f0=auto_predict_f0,
60 |             cluster_infer_ratio=cluster_infer_ratio,
61 |             noise_scale=noise_scale,
62 |             f0_method=f0_method,
63 |             db_thresh=db_thresh,
64 |             pad_seconds=pad_seconds,
65 |             chunk_seconds=chunk_seconds,
66 |             absolute_thresh=absolute_thresh,
67 |         )
68 | 
69 |         sf.write(output_path, audio, self.model.target_sample, 'PCM_24')
70 |         return audio
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     generator_path = './G_329600.pth'
75 |     config_path = "./config.json"
76 |     cluster_model_path = None
77 |     sv = SoVits(generator_path, config_path, cluster_model_path)
78 | 
79 |     input_path = 'test.wav'
80 | 
81 |     # output_path = input_path.replace('.wav', '_swap.wav')
82 |     # _ = sv.forward(input_path, output_path, auto_predict_f0=False)
83 | 
84 |     for f0_predict_method in ['crepe', 'parselmouth', 'dio', 'harvest']:
85 |         output_path = input_path.replace('.wav', f'_swap_auto_predict_{f0_predict_method}.wav')
86 |         _ = sv.forward(input_path, output_path, auto_predict_f0=True, f0_method=f0_predict_method)
87 | 
88 |     for i in range(-2, 2, 4):
89 |         output_path = input_path.replace('.wav', f'_swap_t_{i}.wav')
90 |         _ = sv.forward(input_path, output_path, auto_predict_f0=False, transpose=i)
91 | 


--------------------------------------------------------------------------------
/gpt_lib/lora_finetune/chatglm6b_lora_deepspeed.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/4/20
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import os
 6 | import tqdm
 7 | import json
 8 | import torch
 9 | import loralib as lora
10 | # import lora_utils.insert_lora
11 | # import dataset.GLM as GLM_Data
12 | from torch.utils.data import DataLoader
13 | from transformers import AutoTokenizer, AutoModel
14 | from accelerate import Accelerator, DeepSpeedPlugin
15 | from transformers import get_linear_schedule_with_warmup
16 | 
17 | checkpoint = "THUDM/chatglm-6b"
18 | mixed_precision = 'bf16'
19 | lora_config = {
20 |     'r': 32,
21 |     'lora_alpha': 32,
22 |     'lora_dropout': 0.1,
23 |     'enable_lora': [True, True, True],
24 | }
25 | max_length = 256
26 | LR = 2e-5
27 | NUM_EPOCHS = 2
28 | batch = 1
29 | accumulate_step = 8
30 | warm_up_ratio = 0.1
31 | 
32 | tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True, revision='main')
33 | model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True, revision='main')
34 | model = lora_utils.insert_lora.get_lora_model(model, lora_config)
35 | 
36 | deepspeed_plugin = DeepSpeedPlugin(zero_stage=2, gradient_accumulation_steps=accumulate_step)
37 | accelerator = Accelerator(mixed_precision=mixed_precision, gradient_accumulation_steps=accumulate_step,
38 |                           deepspeed_plugin=deepspeed_plugin)
39 | device = accelerator.device
40 | GLM_Data.device = device
41 | 
42 | import dataset.Alpaca as Alpaca_Data
43 | 
44 | pairs = Alpaca_Data.load('./data/alpaca_data.json')
45 | pairs_encoded = GLM_Data.encode_pairs(pairs, tokenizer)
46 | pairs_encoded = list(filter(lambda pair: len(pair['prompt']) + len(pair['completion']) <= max_length, pairs_encoded))
47 | train_dataset = GLM_Data.GLMDataset(pairs_encoded)
48 | train_dataloader = DataLoader(dataset=train_dataset, collate_fn=GLM_Data.collate_fn, shuffle=True, batch_size=batch)
49 | 
50 | optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
51 | 
52 | lr_scheduler = get_linear_schedule_with_warmup(
53 |     optimizer=optimizer,
54 |     num_warmup_steps=int(len(train_dataloader) / accumulate_step * warm_up_ratio),
55 |     num_training_steps=(int(len(train_dataloader) / accumulate_step) * NUM_EPOCHS),
56 | )
57 | 
58 | model, optimizer, train_dataloader = accelerator.prepare(model, optimizer, train_dataloader)
59 | model.to(device).train()
60 | 
61 | total_step = 0
62 | effective_step = 0
63 | 
64 | for epoch in range(NUM_EPOCHS):
65 |     epoch_loss_local = 0
66 |     for step, batch in enumerate(t := tqdm.tqdm(train_dataloader)):
67 |         outputs = model(**batch)
68 |         loss_d = outputs.loss.detach()
69 |         epoch_loss_local += loss_d
70 |         t.set_description(f"loss: {epoch_loss_local.cpu().float() / step}")
71 |         loss = outputs.loss / accumulate_step
72 |         accelerator.backward(loss)
73 |         if (step + 1) % accumulate_step == 0:
74 |             optimizer.step()
75 |             lr_scheduler.step()
76 |             optimizer.zero_grad()
77 | 
78 |     accelerator.wait_for_everyone()
79 |     all_epoch_loss, all_step = accelerator.gather((epoch_loss_local, torch.tensor(step, device=device)))
80 | 
81 |     if accelerator.is_main_process:
82 |         model_id = f"finetune_{epoch}"
83 |         accelerator.save(lora.lora_state_dict(accelerator.unwrap_model(model)), '/saved/' + model_id + '.pt')
84 | 
85 |         epoch_loss = all_epoch_loss.float().sum() / (all_step + 1).sum()
86 |         total_step += (all_step + 1).sum()
87 |         effective_step += ((all_step + 1) // accumulate_step).sum()
88 |         print(f'epoch: {epoch}, step {effective_step.cpu().numpy()}, training_loss: {epoch_loss.cpu().numpy()}')
89 | 
90 |     accelerator.wait_for_everyone()


--------------------------------------------------------------------------------
/mocap_lib/visualize/poseviz_demo/holistic_demo.py:
--------------------------------------------------------------------------------
 1 | import poseviz
 2 | import numpy as np
 3 | import mediapipe as mp
 4 | from cv2box import CVImage
 5 | 
 6 | mp_drawing = mp.solutions.drawing_utils
 7 | mp_drawing_styles = mp.solutions.drawing_styles
 8 | mp_holistic = mp.solutions.holistic
 9 | 
10 | 
11 | def main():
12 |     joint_names = ['nose',
13 |                    'left_eye_inner', 'left_eye', 'left_eye_outer',
14 |                    "right_eye_inner", "right_eye", "right_eye_outer",
15 |                    "left_ear", "right_ear",
16 |                    "mouth_left", "mouth_right",
17 |                    "left_shoulder", "right_shoulder",
18 |                    "left_elbow", "right_elbow",
19 |                    "left_wrist", "right_wrist",
20 |                    "left_pinky", "right_pinky",
21 |                    "left_index", "right_index",
22 |                    "left_thumb", "right_thumb",
23 |                    "left_hip", "right_hip",
24 |                    "left_knee", "right_knee",
25 |                    "left_ankle", "right_ankle",
26 |                    "left_heel", "right_heel",
27 |                    "left_foot_index", "right_foot_index"]
28 |     # joint_edges = [[0, 1], [0, 4], [1, 2], [2, 3], [3, 7], [4, 5], [5, 6], [6, 8], [9, 10], [18, 20], [20, 16],
29 |     #                [18, 16], [16, 22], [16, 14], [14, 12], [12, 11], [11, 13], [13, 15], [15, 21], [15, 17], [17, 19],
30 |     #                [12, 24], [11, 23], [23, 24], [24, 26], [23, 25], [26, 28], [25, 27], [28, 32], [28, 30], [30, 32],
31 |     #                [27, 29], [27, 31], [29, 31]]
32 | 
33 |     viz = poseviz.PoseViz(joint_names, mp_holistic.HAND_CONNECTIONS, world_up=(0, -1, 0))
34 | 
35 |     with mp_holistic.Holistic(
36 |             model_complexity=1,
37 |             min_detection_confidence=0.5,
38 |             min_tracking_confidence=0.5) as holistic:
39 |         image = CVImage('').rgb
40 |         height = image.shape[0]
41 |         width = image.shape[1]
42 |         print(height, width)
43 |         results = holistic.process(image)
44 |         # pose_33 = results.pose_world_landmarks.landmark
45 |         right_hand_21 = results.right_hand_landmarks.landmark
46 |         # pose_33 = results.pose_world_landmarks.landmark
47 |         # pose_33_np = []
48 |         right_hand_21_np = []
49 |         # for i in range(33):
50 |         #     pose_33_np.append([pose_33[i].x*1000, pose_33[i].y*1000, pose_33[i].z*1000+3000])
51 | 
52 |         for i in range(21):
53 |             right_hand_21_np.append(
54 |                 [right_hand_21[i].x * 5 * width - (width // 2), right_hand_21[i].y * 5 * height - (height // 2),
55 |                  right_hand_21[i].z * width * 5])
56 | 
57 |         print(right_hand_21_np)
58 |         # center_x = pose_33_np[23][0] - pose_33_np[24][0]
59 |         # center_y = pose_33_np[23][1] - pose_33_np[24][1]
60 |         # center_z = pose_33_np[24][2] + (pose_33_np[23][2] - pose_33_np[24][2])
61 |         # print(center_x, center_y, center_z)
62 | 
63 |         # mp_drawing.plot_landmarks(
64 |         #     results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
65 | 
66 |         # # Iterate over the frames of e.g. a video
67 |         for i in range(1):
68 |             #     # Get the current frame
69 |             #     frame = np.zeros([512, 512, 3], np.uint8)
70 |             frame = image
71 | 
72 |             # Make predictions here
73 |             # ...
74 | 
75 |             # Update the visualization
76 |             viz.update(
77 |                 frame=frame,
78 |                 boxes=np.array([[10, 20, 100, 100]], np.float32),
79 |                 poses=[right_hand_21_np],
80 |                 camera=poseviz.Camera.from_fov(55, frame.shape[:2]))
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     main()
85 | 


--------------------------------------------------------------------------------
/sd_lib/inversion_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/9/4
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import torch
 6 | from cv2box import CVImage
 7 | 
 8 | from diffusers import StableDiffusionPipeline, AutoencoderKL, DDPMScheduler, DDIMScheduler
 9 | from diffusers.image_processor import VaeImageProcessor
10 | 
11 | from sd_lib.prompt2prompt import ddim_inversion, null_optimization, EmptyControl
12 | 
13 | SD_PRETRAIN = './sd_models/stable-diffusion-v1-5'
14 | 
15 | 
16 | class DDIMInversion:
17 |     def __init__(self, device='cuda', num_inv_steps=50):
18 |         self.device = device
19 |         self.num_inv_steps = num_inv_steps
20 |         noise_scheduler = DDIMScheduler.from_pretrained(SD_PRETRAIN, subfolder='scheduler')
21 |         noise_scheduler.set_timesteps(self.num_inv_steps)
22 |         # noise_scheduler = DDIMScheduler(
23 |         #     num_train_timesteps=1000,
24 |         #     beta_start=0.00085,
25 |         #     beta_end=0.012,
26 |         #     beta_schedule="linear",
27 |         #     clip_sample=False,
28 |         #     set_alpha_to_one=False,
29 |         #     steps_offset=1,
30 |         # )
31 |         self.sd_pipe = StableDiffusionPipeline.from_pretrained(
32 |             SD_PRETRAIN,
33 |             torch_dtype=torch.float16,
34 |             scheduler=noise_scheduler,
35 |             # unet=unet,
36 |             feature_extractor=None,
37 |             safety_checker=None
38 |         ).to(self.device)
39 | 
40 |         # self.sd_pipe.enable_model_cpu_offload()
41 | 
42 |         self.vae_scale_factor = 2 ** (len(self.sd_pipe.vae.config.block_out_channels) - 1)
43 |         self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
44 | 
45 |     def forward(self, image_in, prompt, null_optim=True):
46 |         image_in_pil = CVImage(image_in).pillow()
47 |         image_in_pt = self.image_processor.preprocess(image_in_pil)
48 | 
49 |         latents = self.sd_pipe.vae.encode(
50 |             image_in_pt.to(self.device, dtype=self.sd_pipe.vae.dtype)).latent_dist.sample()
51 |         latents = latents * 0.18215
52 | 
53 |         ddim_inv_latents = ddim_inversion(
54 |             self.sd_pipe, self.sd_pipe.scheduler, latents=latents,
55 |             num_inv_steps=self.num_inv_steps, prompt=prompt)
56 | 
57 |         if null_optim:
58 |             num_inner_steps = 10
59 |             uncond_embeddings = null_optimization(self.sd_pipe, self.sd_pipe.scheduler, ddim_inv_latents, self.num_inv_steps, num_inner_steps, prompt)
60 |             # null_text_rec, _ = ptp_utils.text2image_ldm_stable(StableDiffuser, [prompt], EmptyControl(), latent=x_t,
61 |             #                                                    uncond_embeddings=uncond_embeddings)
62 |             # ptp_utils.view_images(null_text_rec)
63 |             return ddim_inv_latents[-1], uncond_embeddings
64 |         else:
65 |             return ddim_inv_latents[-1], None
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     image_p = 'resources/for_sd/girl_reading_512_crop.png'
70 |     blip_prompt = 'a woman reading a book'
71 |     ddimi = DDIMInversion(device='cuda', num_inv_steps=25)
72 |     latent_out, uncond_embedding = ddimi.forward(image_p, blip_prompt, null_optim=False)
73 | 
74 |     print(latent_out.shape)
75 |     print(uncond_embedding.shape)
76 | 
77 |     regenerate_image = ddimi.sd_pipe(
78 |         height=512,
79 |         width=512,
80 |         prompt=blip_prompt,
81 |         num_inference_steps=25,
82 |         guidance_scale=7.5,
83 |         generator=EmptyControl(),
84 |         negative_prompt_embeds=uncond_embedding,
85 |         latents=latent_out,
86 |         return_dict=False,
87 |     )[0][0]
88 |     print(regenerate_image.size)  # pillow
89 |     CVImage(regenerate_image, 'pillow').show()
90 | 


--------------------------------------------------------------------------------
/data_lib/dataset_vis/coco_detect_vis.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2022/9/21
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | """
  6 | https://github.com/wukaishuns/Coco-datasets-Visualization-and-change-tools/blob/main/viscoco.py
  7 | """
  8 | import os
  9 | import sys
 10 | 
 11 | # if "/opt/ros/kinetic/lib/python2.7/dist-packages" in sys.path:
 12 | #   sys.path.remove("/opt/ros/kinetic/lib/python2.7/dist-packages")
 13 | import cv2
 14 | import numpy as np
 15 | from skimage import io
 16 | import matplotlib
 17 | import matplotlib.pyplot as plt
 18 | from matplotlib import patches, lines
 19 | from matplotlib.patches import Polygon, Rectangle
 20 | from matplotlib.collections import PatchCollection
 21 | 
 22 | from pycocotools.coco import COCO
 23 | 
 24 | matplotlib.use('TkAgg')
 25 | annfile = '/annotations/annotations.json'
 26 | imgroot = '/images'
 27 | 
 28 | 
 29 | def showAnns(anns):
 30 |     if len(anns) == 0:
 31 |         return 0
 32 |     ax = plt.gca()
 33 |     ax.set_autoscale_on(False)
 34 |     captions = []
 35 |     polygons = []
 36 |     rectangles = []
 37 |     color = []
 38 |     for ann in anns:
 39 |         c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0]
 40 |         if 'segmentation' in ann:
 41 |             if type(ann['segmentation']) == list:
 42 |                 # polygon
 43 |                 for seg in ann['segmentation']:
 44 |                     # print(132131,ann['category_id'])
 45 |                     # print(cat_names[0])
 46 |                     captions.append(cat_names[ann['category_id'] - 1])
 47 |                     poly = np.array(seg).reshape((int(len(seg) / 2), 2))
 48 |                     l_corner, w, h = (ann['bbox'][0], ann['bbox'][1]), ann['bbox'][2], ann['bbox'][3]
 49 |                     rectangles.append(Rectangle(l_corner, w, h))
 50 |                     polygons.append(Polygon(poly))
 51 |                     color.append(c)
 52 | 
 53 |     p = PatchCollection(rectangles, facecolor='none', edgecolors=color, alpha=1, linestyle='--', linewidths=2)
 54 |     ax.add_collection(p)
 55 | 
 56 |     for i in range(len(captions)):
 57 |         x = rectangles[i].xy[0]
 58 |         y = rectangles[i].xy[1]
 59 |         ax.text(x, y, captions[i], size=10, verticalalignment='top', color='w', backgroundcolor="none")
 60 | 
 61 |     p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.6)
 62 |     ax.add_collection(p)
 63 |     # p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
 64 |     p = PatchCollection(polygons, facecolor='none', edgecolors='b', linewidths=0.5)
 65 |     ax.add_collection(p)
 66 |     print('Ok!')
 67 | 
 68 | 
 69 | import random
 70 | 
 71 | coco = COCO(annfile)
 72 | cats = coco.loadCats(coco.getCatIds())
 73 | cat_names = [cat['name'] for cat in cats]
 74 | print(cat_names)
 75 | catids = coco.getCatIds(catNms=random.randint(0, len(cat_names) - 1))
 76 | imgids = coco.getImgIds(catIds=catids)
 77 | 
 78 | 
 79 | def draw(m, n, i):
 80 |     img = coco.loadImgs(imgids[np.random.randint(0, len(imgids))])[0]
 81 |     I = io.imread(os.path.join(imgroot, img['file_name']))
 82 |     plt.subplot(m, n, i)
 83 |     plt.axis('off')
 84 |     plt.title(img['file_name'], fontsize=8, color='blue')
 85 |     plt.imshow(I, aspect='equal')
 86 |     annids = coco.getAnnIds(imgIds=img['id'])
 87 |     anns = coco.loadAnns(annids)
 88 |     showAnns(anns)
 89 | 
 90 | 
 91 | if 1:
 92 |     m = 4
 93 |     n = 4
 94 |     plt.figure(figsize=(m * 6, n * 4))
 95 |     plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0, wspace=0)
 96 |     plt.margins(0, 0)
 97 |     # fig = plt.figure(figsize=(18*m,12*n))
 98 |     for i in range(1, m * n + 1):
 99 |         draw(m, n, i)
100 |     plt.savefig('detect_example.png')
101 |     plt.show()
102 | 


--------------------------------------------------------------------------------
/math_lib/gaussian_filter.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2022/8/12
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from scipy import ndimage
 10 | import numpy as np
 11 | 
 12 | 
 13 | def make_gaussian_kernel(sigma, kernel_size):
 14 |     """
 15 |     Args:
 16 |         sigma:
 17 |         kernel_size:
 18 |     Returns: torch tensor 1*kernel_size
 19 |     """
 20 |     ts = torch.linspace(-kernel_size // 2, kernel_size // 2 + 1, kernel_size)
 21 |     gauss = torch.exp((-(ts / sigma) ** 2 / 2))
 22 |     kernel = gauss / gauss.sum()
 23 |     return kernel
 24 | 
 25 | 
 26 | def init_model(num_keypoints, kernel):
 27 |     seq = nn.Sequential(
 28 |         nn.ReflectionPad2d(kernel // 2),
 29 |         nn.Conv2d(num_keypoints, num_keypoints, kernel, stride=1, padding=0, bias=None,
 30 |                   groups=num_keypoints))
 31 |     return seq
 32 | 
 33 | 
 34 | class GaussianLayer(nn.Module):
 35 |     def __init__(self, num_keypoints, kernel):
 36 |         """
 37 |         batch gaussian layer
 38 |         Args:
 39 |             num_keypoints: cocowholebody 133
 40 |             kernel:
 41 |         """
 42 |         super(GaussianLayer, self).__init__()
 43 |         self.kernel = kernel
 44 |         self.seq = nn.Sequential(
 45 |             nn.ReflectionPad2d(kernel // 2),
 46 |             nn.Conv2d(num_keypoints, num_keypoints, kernel, stride=1, padding=0, bias=None,
 47 |                       groups=num_keypoints))
 48 |         self.weights_init()
 49 | 
 50 |     def forward(self, x):
 51 |         """
 52 |         Args:
 53 |             x: N keypoints number B*N*H*W
 54 |         Returns:
 55 |         """
 56 |         return self.seq(x)
 57 | 
 58 |     def weights_init(self):
 59 |         # check mmpose /mmpose/mmpose/core/evaluation/top_down_eval.py
 60 |         sigma = 0.3 * ((self.kernel - 1) * 0.5 - 1) + 0.8
 61 |         n = np.zeros((self.kernel, self.kernel))
 62 |         n[self.kernel // 2, self.kernel // 2] = 1
 63 |         k = ndimage.gaussian_filter(n, sigma=sigma)
 64 |         for name, f in self.named_parameters():
 65 |             f.data.copy_(torch.from_numpy(k))
 66 | 
 67 | 
 68 | class GaussianLayerPicklable(nn.Module):
 69 |     def __init__(self, num_keypoints, kernel):
 70 |         super().__init__()
 71 |         self.num_keypoints = num_keypoints
 72 |         self.kernel = kernel
 73 |         # self.seq = init_model(self.num_keypoints, self.kernel).cuda()
 74 |         # self.weights_init()
 75 | 
 76 |     def forward(self, x):
 77 |         return self.seq(x)
 78 | 
 79 |     def weights_init(self):
 80 |         # check mmpose /mmpose/mmpose/core/evaluation/top_down_eval.py
 81 |         sigma = 0.3 * ((self.kernel - 1) * 0.5 - 1) + 0.8
 82 |         n = np.zeros((self.kernel, self.kernel))
 83 |         n[self.kernel // 2, self.kernel // 2] = 1
 84 |         k = ndimage.gaussian_filter(n, sigma=sigma)
 85 |         for name, f in self.named_parameters():
 86 |             # f.data.copy_(k)
 87 |             f.data.copy_(torch.from_numpy(k).cuda())
 88 | 
 89 |     def __getstate__(self):
 90 |         return {
 91 |             'num_keypoints': self.num_keypoints,
 92 |             'kernel': self.kernel,
 93 |         }
 94 | 
 95 |     def __setstate__(self, values):
 96 |         super().__init__()
 97 |         self.num_keypoints = values['num_keypoints']
 98 |         self.kernel = values['kernel']
 99 |         self.seq = init_model(self.num_keypoints, self.kernel).cuda()
100 |         self.weights_init()
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     sigma = 2.9
105 |     kernel = 17
106 |     n = np.zeros((kernel, kernel))
107 |     n[kernel // 2, kernel // 2] = 1
108 |     k = ndimage.gaussian_filter(n, sigma=sigma)
109 |     print(k)
110 | 


--------------------------------------------------------------------------------
/seg_lib/u2net/u2net_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/1/13
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from cv2box import CVImage, MyFpsCounter
 7 | from apstone import ModelBase
 8 | import cv2
 9 | import numpy as np
10 | 
11 | MODEL_ZOO = {
12 |     # https://github.com/danielgatis/rembg
13 |     # input_name:['input.1'], shape:[[1, 3, 320, 320]]
14 |     # output_name:['1959', '1960', '1961', '1962', '1963', '1964', '1965'], shape:[[1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320]]
15 |     'u2net': {
16 |         'model_path': 'pretrain_models/seg_lib/u2net/u2net.onnx'
17 |     },
18 |     'u2net_human_seg': {
19 |         'model_path': 'pretrain_models/seg_lib/u2net/u2net_human_seg.onnx'
20 |     },
21 |     # same as u2net, smaller
22 |     'u2netp': {
23 |         'model_path': 'pretrain_models/seg_lib/u2net/u2netp.onnx'
24 |     },
25 |     # quantization from onnx-runtime
26 |     # https://github.com/xuebinqin/U-2-Net/issues/295#issuecomment-1083041216
27 |     'silueta': {
28 |         'model_path': 'pretrain_models/seg_lib/u2net/silueta.onnx'
29 |     },
30 |     # from https://www.modelscope.cn/models/damo/cv_u2net_salient-detection/summary
31 |     'u2net-salient-detection_damo': {
32 |         'model_path': 'pretrain_models/seg_lib/u2net/u2net-salient-detection_damo.onnx'
33 |     },
34 | }
35 | 
36 | 
37 | class U2netSeg(ModelBase):
38 |     def __init__(self, model_type='u2net', provider='gpu'):
39 |         super().__init__(MODEL_ZOO[model_type], provider)
40 |         self.model_type = model_type
41 | 
42 |         self.input_mean = (0.485, 0.456, 0.406)
43 |         self.input_std = (0.229, 0.224, 0.225)
44 |         self.input_size = (320, 320)
45 | 
46 |     def forward(self, image_in, post_process=False):
47 |         """
48 |         Args:
49 |             image_in: CVImage access type
50 |             post_process: Post Process the mask for a smooth boundary by applying Morphological Operations
51 |                 Research based on paper: https://www.sciencedirect.com/science/article/pii/S2352914821000757
52 |         Returns: mask 0-1
53 |         """
54 |         image_in_size = CVImage(image_in).bgr.shape
55 |         image_in_pre = CVImage(image_in).blob_innormal(self.input_size, self.input_mean, self.input_std, rgb=True,
56 |                                                    interpolation=cv2.INTER_LANCZOS4)
57 |         pred = self.model.forward(image_in_pre)[0][:, 0, :, :].transpose(1, 2, 0)
58 |         ma = np.max(pred)
59 |         mi = np.min(pred)
60 |         pred = (pred - mi) / (ma - mi)
61 |         pred = (pred*255).astype(np.uint8)
62 |         if post_process:
63 |             # 开操作 平滑mask边缘
64 |             pred = cv2.morphologyEx(pred, cv2.MORPH_OPEN,
65 |                                          cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)))
66 |             pred = cv2.GaussianBlur(pred, (5, 5), sigmaX=2, sigmaY=2, borderType=cv2.BORDER_DEFAULT)
67 |             pred = np.where(pred < 0.5, 0, 1)[..., np.newaxis].astype(np.float32)
68 |         pred = CVImage(pred).resize(image_in_size[:-1][::-1], interpolation=cv2.INTER_LANCZOS4).bgr
69 | 
70 |         # First create the image with alpha channel
71 |         rgba = cv2.cvtColor(CVImage(image_in).bgr, cv2.COLOR_RGB2RGBA)
72 |         # Then assign the mask to the last channel of the image
73 |         rgba[:, :, 3] = pred
74 |         # CVImage(rgba).show()
75 | 
76 |         # rgb = cv2.bitwise_and(rgba, rgba, mask=mask)
77 | 
78 |         return pred, rgba
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     fb_cur = U2netSeg(model_type='u2net-salient-detection_damo', provider='gpu')
83 |     mask, rgba = fb_cur.forward('resources/test1.jpg', post_process=False)
84 |     CVImage(mask).show()
85 |     CVImage(rgba).save('output.png')
86 | 


--------------------------------------------------------------------------------
/sd_lib/clip_encoder.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/8/19
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | import numpy as np
 6 | import torch
 7 | from PIL import Image
 8 | 
 9 | from transformers import CLIPTextModel, CLIPTokenizer
10 | from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
11 | 
12 | """
13 | CLIP from openai
14 | clip image encoder from:
15 | https://github.com/tencent-ailab/IP-Adapter/blob/00cbac222600928f68103c16ed9931074fca9edd/ip_adapter/ip_adapter.py#L45
16 | """
17 | 
18 | CLIP_TEXT_PRETRAIN = './sd_models/stable-diffusion-v1-5'
19 | CLIP_IMAGE_PRETRAIN = './sd_models/clip_image_encoder'
20 | IMAGE_PROJ_PRETRAIN = './sd_models/ip_adapter_image_proj/ip-adapter_sd15.bin'
21 | 
22 | 
23 | class ClipText:
24 |     def __init__(self):
25 |         self.tokenizer = CLIPTokenizer.from_pretrained(CLIP_TEXT_PRETRAIN, subfolder="tokenizer")
26 |         self.text_encoder = CLIPTextModel.from_pretrained(CLIP_TEXT_PRETRAIN, subfolder="text_encoder").cuda()
27 |         self.text_encoder.requires_grad_(False)
28 | 
29 |     def forward(self, prompt: list[str]):
30 |         # (b,77)
31 |         prompt_ids = self.tokenizer(
32 |             prompt, max_length=self.tokenizer.model_max_length, padding="max_length", truncation=True,
33 |             return_tensors="pt"
34 |         ).input_ids
35 |         # (b,77,768)
36 |         encoder_hidden_states = self.text_encoder(prompt_ids[0][np.newaxis, :].cuda())[0]
37 |         return encoder_hidden_states
38 | 
39 | 
40 | class ClipImage:
41 |     def __init__(self):
42 |         self.device = 'cuda'
43 |         self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(CLIP_IMAGE_PRETRAIN).to(self.device,
44 |                                                                                                    dtype=torch.float16)
45 |         self.clip_image_processor = CLIPImageProcessor()
46 | 
47 |     def forward(self, pil_image):
48 |         """
49 |         Args:
50 |             pil_image: RGB
51 |         Returns: torch.Size([1, 1024])
52 |         """
53 |         if isinstance(pil_image, Image.Image):
54 |             pil_image = [pil_image]
55 |         clip_image = self.clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
56 |         clip_image_embeds = self.image_encoder(clip_image.to(self.device, dtype=torch.float16)).image_embeds
57 |         return clip_image_embeds
58 | 
59 | 
60 | class ImageProj:
61 |     def __init__(self, num_tokens=4):
62 |         from sd_lib.models import ImageProjModel
63 |         self.device = "cuda"
64 |         self.image_proj_model = ImageProjModel(
65 |             cross_attention_dim=768,  # self.pipe.unet.config.cross_attention_dim
66 |             clip_embeddings_dim=1024,  # self.image_encoder.config.projection_dim
67 |             clip_extra_context_tokens=num_tokens,
68 |         ).to(self.device, dtype=torch.float16)
69 |         state_dict = torch.load(IMAGE_PROJ_PRETRAIN, map_location="cpu")
70 |         self.image_proj_model.load_state_dict(state_dict["image_proj"])
71 | 
72 |     def forward(self, clip_image_embeds):
73 |         """
74 |         Args:
75 |             clip_image_embeds: torch.Size([1, 1024])
76 |         Returns: torch.Size([1, 4, 768])
77 |         """
78 |         image_prompt_embeds = self.image_proj_model(clip_image_embeds)
79 |         uncond_image_prompt_embeds = self.image_proj_model(torch.zeros_like(clip_image_embeds))
80 |         return image_prompt_embeds, uncond_image_prompt_embeds
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     image_p = 'resources/for_sd/girl_reading_512_crop.png'
85 |     clip_image = ClipImage()
86 |     image_embedding = clip_image.forward(Image.open(image_p))
87 |     print(image_embedding.shape)
88 | 
89 |     ip = ImageProj()
90 |     image_proj_embedding, _ = ip.forward(image_embedding)
91 |     print(image_proj_embedding.shape)
92 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/body_detector_lightweight/body_detector_lightweight_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/3/10
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch
 7 | """
 8 | import cv2
 9 | import numpy as np
10 | from cv2box import CVImage
11 | import math
12 | from apstone import ONNXModel
13 | 
14 | model_path = 'pretrain_models/digital_human/body_detector_lightweight/body_detector_dynamic.onnx'
15 | 
16 | 
17 | class BodyDetectorLightweight:
18 |     def __init__(self, input_height_size=256, pad_value=(0, 0, 0), stride=8, upsample_ratio=4):
19 |         self.input_height_size = input_height_size
20 |         self.pad_value = pad_value
21 |         self.stride = stride
22 |         self.upsample_ratio = upsample_ratio
23 |         self.input_std = 256
24 |         self.input_mean = 128
25 | 
26 |         self.model = ONNXModel(model_path)
27 | 
28 |     @staticmethod
29 |     def pad_width(img, stride, pad_value, min_dims):
30 |         h, w, _ = img.shape
31 |         h = min(min_dims[0], h)
32 |         min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride
33 |         min_dims[1] = max(min_dims[1], w)
34 |         min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride
35 |         pad = []
36 |         pad.append(int(math.floor((min_dims[0] - h) / 2.0)))
37 |         pad.append(int(math.floor((min_dims[1] - w) / 2.0)))
38 |         pad.append(int(min_dims[0] - h - pad[0]))
39 |         pad.append(int(min_dims[1] - w - pad[1]))
40 |         padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3],
41 |                                         cv2.BORDER_CONSTANT, value=pad_value)
42 |         return padded_img, pad
43 | 
44 |     # def post_process(self):
45 |     #     total_keypoints_num = 0
46 |     #     all_keypoints_by_type = []
47 |     #     num_keypoints = 18
48 |     #     for kpt_idx in range(num_keypoints):  # 19th for bg
49 |     #         total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type,
50 |     #                                                  total_keypoints_num)
51 |     #
52 |     #     pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True)
53 |     #     for kpt_id in range(all_keypoints.shape[0]):
54 |     #         all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
55 |     #         all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
56 | 
57 |     def forward(self, img):
58 |         height, width, _ = img.shape
59 |         scale = self.input_height_size / height
60 |         scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
61 | 
62 |         min_dims = [self.input_height_size, max(scaled_img.shape[1], self.input_height_size)]
63 |         padded_img, pad = self.pad_width(scaled_img, self.stride, self.pad_value, min_dims)
64 | 
65 |         stages_output = self.model.forward(
66 |             CVImage(padded_img).set_blob(self.input_std, self.input_mean, input_size=None).blob_rgb)
67 | 
68 |         stage2_heatmaps = stages_output[-2]
69 |         heatmaps = np.transpose(stage2_heatmaps[0], (1, 2, 0))
70 |         heatmaps = cv2.resize(heatmaps, (0, 0), fx=self.upsample_ratio, fy=self.upsample_ratio,
71 |                               interpolation=cv2.INTER_CUBIC)
72 | 
73 |         stage2_pafs = stages_output[-1]
74 |         pafs = np.transpose(stage2_pafs.squeeze(), (1, 2, 0))
75 |         pafs = cv2.resize(pafs, (0, 0), fx=self.upsample_ratio, fy=self.upsample_ratio, interpolation=cv2.INTER_CUBIC)
76 | 
77 |         return heatmaps, pafs, scale, pad
78 | 
79 | if __name__ == '__main__':
80 |     img_p = 'test_img/t_pose.jpeg'
81 |     bdl = BodyDetectorLightweight()
82 |     results = bdl.forward(CVImage(img_p).bgr)
83 |     print(results)
84 | 


--------------------------------------------------------------------------------
/data_lib/dataset_preprocess/gen_dataset_txt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import sys
  4 | import random
  5 | from pathlib import Path
  6 | from utils import get_path_by_ext
  7 | from cv2box import CVFile
  8 | 
  9 | 
 10 | def gen_txt_from_path(base_path, img_format='jpg', train_ratio=0.8):
 11 |     train_data_path = os.path.join(base_path, 'dataset')
 12 | 
 13 |     labels = os.listdir(train_data_path)
 14 | 
 15 |     for index, label in enumerate(labels):
 16 |         print('label: {}\t index: {}'.format(label, index))
 17 |         # img_list = glob.glob(os.path.join(train_data_path, label, '*.{}'.format(img_format)))
 18 |         img_list = list(Path(os.path.join(train_data_path, label)).glob('*/*.{}'.format(img_format)))
 19 |         random.shuffle(img_list)
 20 |         print(len(img_list))
 21 |         train_list = img_list[:int(train_ratio * len(img_list))]
 22 |         val_list = img_list[(int(train_ratio * len(img_list)) + 1):]
 23 |         with open(os.path.join(base_path, 'train.txt'), 'a') as f:
 24 |             for img in train_list:
 25 |                 img = str(img).replace(base_path, '')
 26 |                 # print(img)
 27 |                 f.write(img + ' ' + str(index))
 28 |                 f.write('\n')
 29 | 
 30 |         with open(os.path.join(base_path, 'val.txt'), 'a') as f:
 31 |             for img in val_list:
 32 |                 img = str(img).replace(base_path, '')
 33 |                 # print(img + ' ' + str(index))
 34 |                 f.write(img + ' ' + str(index))
 35 |                 f.write('\n')
 36 | 
 37 |     # imglist = glob.glob(os.path.join(valdata_path, '*.jpg'))
 38 |     # with open(txtpath + 'test.txt', 'a') as f:
 39 |     #     for img in imglist:
 40 |     #         f.write(img)
 41 |     #         f.write('\n')
 42 | 
 43 | 
 44 | """
 45 | 'female','male', 
 46 | 'front', 'side',
 47 | 'clean','occlusion', 
 48 | 'super_hq', 'hq', 'blur',
 49 | 'nonhuman'
 50 | """
 51 | 
 52 | 
 53 | def gen_txt_from_json(base_path, train_ratio=0.8):
 54 |     # multi label , labelme
 55 |     train_list = {}
 56 |     test_list = {}
 57 | 
 58 |     for img_path in get_path_by_ext(base_path):
 59 |         label = ''
 60 |         img_path_str = str(img_path)[70:]
 61 |         json_path = str(img_path.parent / (str(img_path.stem) + '.json'))
 62 |         # print(json_path)
 63 |         json_data = CVFile(json_path).data
 64 |         try:
 65 |             label += '01' if json_data['flags']['男'] else '10'
 66 |             label += '01' if json_data['flags']['侧脸'] else '10'
 67 |             label += '01' if json_data['flags']['遮挡'] else '10'
 68 |             if json_data['flags']['非常清晰']:
 69 |                 label += '100'
 70 |             elif json_data['flags']['清晰']:
 71 |                 label += '010'
 72 |             else:
 73 |                 label += '001'
 74 |             label += '1' if json_data['flags']['非人脸'] else '0'
 75 |         except TypeError:
 76 |             print(json_path, json_data)
 77 |             continue
 78 | 
 79 |         if random.random() > train_ratio:
 80 |             test_list[img_path_str] = label
 81 |         else:
 82 |             train_list[img_path_str] = label
 83 | 
 84 |     with open(os.path.join(base_path, 'train.txt'), 'a') as f:
 85 |         for k, v in train_list.items():
 86 |             f.write(k + ' ' + v)
 87 |             f.write('\n')
 88 | 
 89 |     with open(os.path.join(base_path, 'val.txt'), 'a') as f:
 90 |         for k, v in test_list.items():
 91 |             f.write(k + ' ' + v)
 92 |             f.write('\n')
 93 | 
 94 | 
 95 | if __name__ == '__main__':
 96 |     gen_txt_from_json('')
 97 | 
 98 |     # from tqdm import tqdm
 99 |     # for img_path in tqdm(get_path_by_ext('')):
100 |     #     json_p = str(img_path).replace('.jpg', '.json')
101 |     #     if not Path(json_p).exists():
102 |     #         os.remove(str(img_path))
103 |     #         # raise '11'
104 |     #     # ef93c75f17c544ccbb436f5cfeb6e656.json


--------------------------------------------------------------------------------
/hand_lib/hand_mesh/minimal_hands/kinematics.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2022/2/11
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | import numpy as np
  6 | 
  7 | 
  8 | class MANOHandJoints:
  9 |     n_joints = 21
 10 | 
 11 |     labels = [
 12 |         'W',  # 0
 13 |         'I0', 'I1', 'I2',  # 3
 14 |         'M0', 'M1', 'M2',  # 6
 15 |         'L0', 'L1', 'L2',  # 9
 16 |         'R0', 'R1', 'R2',  # 12
 17 |         'T0', 'T1', 'T2',  # 15
 18 |         'I3', 'M3', 'L3', 'R3', 'T3'  # 20, tips are manually added (not in MANO)
 19 |     ]
 20 | 
 21 |     # finger tips are not joints in MANO, we label them on the mesh manually
 22 |     mesh_mapping = {16: 333, 17: 444, 18: 672, 19: 555, 20: 744}
 23 | 
 24 |     parents = [
 25 |         None,
 26 |         0, 1, 2,
 27 |         0, 4, 5,
 28 |         0, 7, 8,
 29 |         0, 10, 11,
 30 |         0, 13, 14,
 31 |         3, 6, 9, 12, 15
 32 |     ]
 33 | 
 34 | 
 35 | class MPIIHandJoints:
 36 |     n_joints = 21
 37 | 
 38 |     labels = [
 39 |         'W',  # 0
 40 |         'T0', 'T1', 'T2', 'T3',  # 4
 41 |         'I0', 'I1', 'I2', 'I3',  # 8
 42 |         'M0', 'M1', 'M2', 'M3',  # 12
 43 |         'R0', 'R1', 'R2', 'R3',  # 16
 44 |         'L0', 'L1', 'L2', 'L3',  # 20
 45 |     ]
 46 | 
 47 |     parents = [
 48 |         None,
 49 |         0, 1, 2, 3,
 50 |         0, 5, 6, 7,
 51 |         0, 9, 10, 11,
 52 |         0, 13, 14, 15,
 53 |         0, 17, 18, 19
 54 |     ]
 55 | 
 56 | 
 57 | def mpii_to_mano(mpii):
 58 |     """
 59 |     Map data from MPIIHandJoints order to MANOHandJoints order.
 60 |     Parameters
 61 |     ----------
 62 |     mpii : np.ndarray, [21, ...]
 63 |       Data in MPIIHandJoints order. Note that the joints are along axis 0.
 64 |     Returns
 65 |     -------
 66 |     np.ndarray
 67 |       Data in MANOHandJoints order.
 68 |     """
 69 |     mano = []
 70 |     for j in range(MANOHandJoints.n_joints):
 71 |         mano.append(
 72 |             mpii[MPIIHandJoints.labels.index(MANOHandJoints.labels[j])]
 73 |         )
 74 |     mano = np.stack(mano, 0)
 75 |     return mano
 76 | 
 77 | 
 78 | def mano_to_mpii(mano):
 79 |     """
 80 |     Map data from MANOHandJoints order to MPIIHandJoints order.
 81 |     Parameters
 82 |     ----------
 83 |     mano : np.ndarray, [21, ...]
 84 |       Data in MANOHandJoints order. Note that the joints are along axis 0.
 85 |     Returns
 86 |     -------
 87 |     np.ndarray
 88 |       Data in MPIIHandJoints order.
 89 |     """
 90 |     mpii = []
 91 |     for j in range(MPIIHandJoints.n_joints):
 92 |         mpii.append(
 93 |             mano[MANOHandJoints.labels.index(MPIIHandJoints.labels[j])]
 94 |         )
 95 |     mpii = np.stack(mpii, 0)
 96 |     return mpii
 97 | 
 98 | 
 99 | def xyz_to_delta(xyz, joints_def):
100 |     """
101 |     Compute bone orientations from joint coordinates (child joint - parent joint).
102 |     The returned vectors are normalized.
103 |     For the root joint, it will be a zero vector.
104 |     Parameters
105 |     ----------
106 |     xyz : np.ndarray, shape [J, 3]
107 |       Joint coordinates.
108 |     joints_def : object
109 |       An object that defines the kinematic skeleton, e.g. MPIIHandJoints.
110 |     Returns
111 |     -------
112 |     np.ndarray, shape [J, 3]
113 |       The **unit** vectors from each child joint to its parent joint.
114 |       For the root joint, it's are zero vector.
115 |     np.ndarray, shape [J, 1]
116 |       The length of each bone (from child joint to parent joint).
117 |       For the root joint, it's zero.
118 |     """
119 |     delta = []
120 |     for j in range(joints_def.n_joints):
121 |         p = joints_def.parents[j]
122 |         if p is None:
123 |             delta.append(np.zeros(3))
124 |         else:
125 |             delta.append(xyz[j] - xyz[p])
126 |     delta = np.stack(delta, 0)
127 |     lengths = np.linalg.norm(delta, axis=-1, keepdims=True)
128 |     delta /= np.maximum(lengths, np.finfo(xyz.dtype).eps)
129 |     return delta, lengths
130 | 


--------------------------------------------------------------------------------
/mocap_lib/smooth_filter/one_euro_api.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Adapted from https://github.com/HoBeom/OneEuroFilter-Numpy
  3 | # Original licence: Copyright (c)  HoBeom Jeon, under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | import math
  6 | 
  7 | import numpy as np
  8 | 
  9 | 
 10 | def smoothing_factor(t_e, cutoff):
 11 |     r = 2 * math.pi * cutoff * t_e
 12 |     return r / (r + 1)
 13 | 
 14 | 
 15 | def exponential_smoothing(a, x, x_prev):
 16 |     return a * x + (1 - a) * x_prev
 17 | 
 18 | 
 19 | class OneEuro:
 20 | 
 21 |     def __init__(self, t0, x0, dx0, min_cutoff, beta, d_cutoff=1.0):
 22 |         super(OneEuro, self).__init__()
 23 |         """Initialize the one euro filter."""
 24 |         # The parameters.
 25 |         self.min_cutoff = float(min_cutoff)
 26 |         self.beta = float(beta)
 27 |         self.d_cutoff = float(d_cutoff)
 28 |         # Previous values.
 29 |         self.x_prev = x0
 30 |         self.dx_prev = dx0
 31 |         self.t_prev = t0
 32 | 
 33 |     def __call__(self, x, t=None):
 34 |         """Compute the filtered signal."""
 35 | 
 36 |         if t is None:
 37 |             # Assume input is feed frame by frame if not specified
 38 |             t = self.t_prev + 1
 39 | 
 40 |         t_e = t - self.t_prev
 41 | 
 42 |         # The filtered derivative of the signal.
 43 |         a_d = smoothing_factor(t_e, self.d_cutoff)  # [k, c]
 44 |         dx = (x - self.x_prev) / t_e
 45 |         dx_hat = exponential_smoothing(a_d, dx, self.dx_prev)
 46 | 
 47 |         # The filtered signal.
 48 |         cutoff = self.min_cutoff + self.beta * np.abs(dx_hat)
 49 |         a = smoothing_factor(t_e, cutoff)
 50 |         x_hat = exponential_smoothing(a, x, self.x_prev)
 51 |         # Memorize the previous values.
 52 |         self.x_prev = x_hat
 53 |         self.dx_prev = dx_hat
 54 |         self.t_prev = t
 55 |         return x_hat
 56 | 
 57 | 
 58 | class OneEuroFilter:
 59 |     """Oneeuro filter, source code: https://github.com/mkocabas/VIBE/blob/c0
 60 |     c3f77d587351c806e901221a9dc05d1ffade4b/lib/utils/smooth_pose.py.
 61 | 
 62 |     Args:
 63 |         min_cutoff (float, optional): Decreasing the minimum cutoff frequency
 64 |             decreases slow speed jitter
 65 |         beta (float, optional): Increasing the speed coefficient(beta)
 66 |             decreases speed lag.
 67 |     """
 68 | 
 69 |     # # Not shareable because the filter holds status of a specific target
 70 |     # _shareable: bool = False
 71 | 
 72 |     def __init__(self, min_cutoff=0.004, beta=0.7):
 73 |         # OneEuroFilter has Markov Property and maintains status variables
 74 |         # within the class, thus has a windows_size of 1
 75 |         # super().__init__(window_size=1)
 76 |         self.min_cutoff = min_cutoff
 77 |         self.beta = beta
 78 |         self._one_euro = None
 79 | 
 80 |     def forward(self, x: np.ndarray):
 81 |         assert x.ndim == 3, ('Input should be an array with shape [T, K, C]'
 82 |                              f', but got invalid shape {x.shape}')
 83 | 
 84 |         pred_pose_hat = x.copy()
 85 | 
 86 |         if self._one_euro is None:
 87 |             # The filter is invoked for the first time
 88 |             # Initialize the filter
 89 |             self._one_euro = OneEuro(
 90 |                 np.zeros_like(x[0]),
 91 |                 x[0],
 92 |                 dx0=0.0,
 93 |                 min_cutoff=self.min_cutoff,
 94 |                 beta=self.beta,
 95 |             )
 96 |             t0 = 1
 97 |         else:
 98 |             # The filter has been invoked
 99 |             t0 = 0
100 | 
101 |         for t, pose in enumerate(x):
102 |             if t < t0:
103 |                 # If the filter is invoked for the first time
104 |                 # set pred_pose_hat[0] = x[0]
105 |                 continue
106 |             pose = self._one_euro(pose)
107 |             pred_pose_hat[t] = pose
108 | 
109 |         return pred_pose_hat
110 | 


--------------------------------------------------------------------------------
/mocap_lib/body_wholebody/wholebody_kp_detector_mmpose.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/8/17
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from cv2box import CVImage, MyFpsCounter
 7 | from apstone.wrappers.mmlab_wrapper import KpDetectorBase
 8 | 
 9 | MODEL_ZOO = {
10 |     # API 62fps trt16 154fps
11 |     'r50': {
12 |         'model_path': 'pretrain_models/mocap_lib/coco_whole_body/res50_coco_wholebody_256x192-9e37ed88_20201004_remove_initializer.onnx',
13 |         'model_input_size': (192, 256)
14 |     },  # w h
15 |     # 195fps
16 |     'r50_trt': {
17 |         'model_path': 'pretrain_models/mocap_lib/coco_whole_body/res50_coco_wholebody_256x192-9e37ed88_20201004.engine',
18 |         'model_input_size': (192, 256)
19 |     },
20 |     # API 34fps
21 |     'vipnas_mbv3_dark': {
22 |         'model_path': 'pretrain_models/mocap_lib/coco_whole_body/vipnas_mbv3_coco_wholebody_256x192_dark-e2158108_20211205_remove_initializer.onnx',
23 |         'model_input_size': (192, 256)
24 |     },
25 |     # API 38fps
26 |     'vipnas_r50_dark': {
27 |         'model_path': 'pretrain_models/mocap_lib/coco_whole_body/vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112_remove_initializer.onnx',
28 |         'model_input_size': (192, 256)
29 |     },
30 |     # trt16 50fps
31 |     'hrnet_w48_384_dark': {
32 |         'model_path': 'pretrain_models/mocap_lib/coco_whole_body/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918_remove_initializer.onnx',
33 |         'model_input_size': (288, 384),
34 |         'kernel': 17},
35 |     'hrnet_w48_384_dark_dynamic': {
36 |         'model_path': 'pretrain_models/mocap_lib/coco_whole_body/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918_dynamic.onnx',
37 |         'model_input_size': (288, 384),
38 |         'input_dynamic_shape': (4, 3, 288, 384),
39 |         'kernel': 17},
40 |     # 48fps
41 |     'hrnet_w48_384_dark_trt': {
42 |         'model_path': 'pretrain_models/mocap_lib/coco_whole_body/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.engine',
43 |         'model_input_size': (288, 384),
44 |         'kernel': 17},
45 | }
46 | 
47 | # 用于镜像翻转的pair对
48 | flip_pairs = [
49 |     [1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 20], [18, 21], [19, 22]
50 | ]
51 | for i in range(91, 112):
52 |     flip_pairs.append([i, i + 21])
53 | 
54 | 
55 | class BodyWholebodyDetector(KpDetectorBase):
56 |     def __init__(self, model_type='r50', provider='gpu'):
57 |         super().__init__(MODEL_ZOO[model_type], provider)
58 |         self.dark_flag = model_type.find('dark') > 0
59 | 
60 |     def forward(self, image_in_, bbox_, show=False, mirror_test=False):
61 |         if len(bbox_) == 0:
62 |             return [[0, 0, 0]] * 133
63 | 
64 |         outputs = self.model.forward(self.preprocess(image_in_, bbox_))
65 | 
66 |         if mirror_test:
67 |             outputs_mirror = self.model.forward(self.preprocess(image_in_, bbox_, mirror=mirror_test))
68 |             kp_results = self.postprocess_mirror(outputs, outputs_mirror, flip_pairs)
69 |         else:
70 |             kp_results = self.postprocess(outputs)
71 | 
72 |         if show:
73 |             self.show(image_in_, kp_results)
74 | 
75 |         return kp_results
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     image_path = 'resources/for_pose/t_pose_1080p.jpeg'
80 |     image_in = CVImage(image_path).bgr
81 |     bbox = [493, 75, 1427, 1044]
82 | 
83 |     bwd = BodyWholebodyDetector(model_type='hrnet_w48_384_dark_dynamic', provider='trt')
84 |     kps = bwd.forward(image_in, bbox, show=True, mirror_test=False)
85 |     # print(kps)
86 | 
87 |     with MyFpsCounter('model forward 10 times fps: ') as mfc:
88 |         for i in range(10):
89 |             kps = bwd.forward(image_in, bbox)
90 | 
91 |     # # for video
92 |     # from cv2box import CVVideoLoader
93 |     # from tqdm import tqdm
94 |     #
95 |     # with CVVideoLoader('') as cvvl:
96 |     #     for _ in tqdm(range(len(cvvl))):
97 |     #         _, frame = cvvl.get()
98 |     #         kps = bwd.forward(image_in, bbox, show=True, mirror_test=False)
99 | 


--------------------------------------------------------------------------------
/art_lib/optical_flow_estimate/raft/utils.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2023/8/21
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | # Ref: https://github.com/liruoteng/OpticalFlowToolkit/blob/5cf87b947a0032f58c922bbc22c0afb30b90c418/lib/flowlib.py#L249
  6 | 
  7 | import numpy as np
  8 | 
  9 | UNKNOWN_FLOW_THRESH = 1e7
 10 | 
 11 | 
 12 | def make_color_wheel():
 13 |     """
 14 |     Generate color wheel according Middlebury color code
 15 |     :return: Color wheel
 16 |     """
 17 |     RY = 15
 18 |     YG = 6
 19 |     GC = 4
 20 |     CB = 11
 21 |     BM = 13
 22 |     MR = 6
 23 | 
 24 |     ncols = RY + YG + GC + CB + BM + MR
 25 | 
 26 |     colorwheel = np.zeros([ncols, 3])
 27 | 
 28 |     col = 0
 29 | 
 30 |     # RY
 31 |     colorwheel[0:RY, 0] = 255
 32 |     colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY))
 33 |     col += RY
 34 | 
 35 |     # YG
 36 |     colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG))
 37 |     colorwheel[col:col + YG, 1] = 255
 38 |     col += YG
 39 | 
 40 |     # GC
 41 |     colorwheel[col:col + GC, 1] = 255
 42 |     colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC))
 43 |     col += GC
 44 | 
 45 |     # CB
 46 |     colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB))
 47 |     colorwheel[col:col + CB, 2] = 255
 48 |     col += CB
 49 | 
 50 |     # BM
 51 |     colorwheel[col:col + BM, 2] = 255
 52 |     colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM))
 53 |     col += + BM
 54 | 
 55 |     # MR
 56 |     colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
 57 |     colorwheel[col:col + MR, 0] = 255
 58 | 
 59 |     return colorwheel
 60 | 
 61 | 
 62 | colorwheel = make_color_wheel()
 63 | 
 64 | 
 65 | def compute_color(u, v):
 66 |     """
 67 |     compute optical flow color map
 68 |     :param u: optical flow horizontal map
 69 |     :param v: optical flow vertical map
 70 |     :return: optical flow in color code
 71 |     """
 72 |     [h, w] = u.shape
 73 |     img = np.zeros([h, w, 3])
 74 |     nanIdx = np.isnan(u) | np.isnan(v)
 75 |     u[nanIdx] = 0
 76 |     v[nanIdx] = 0
 77 | 
 78 |     ncols = np.size(colorwheel, 0)
 79 | 
 80 |     rad = np.sqrt(u ** 2 + v ** 2)
 81 | 
 82 |     a = np.arctan2(-v, -u) / np.pi
 83 | 
 84 |     fk = (a + 1) / 2 * (ncols - 1) + 1
 85 | 
 86 |     k0 = np.floor(fk).astype(int)
 87 | 
 88 |     k1 = k0 + 1
 89 |     k1[k1 == ncols + 1] = 1
 90 |     f = fk - k0
 91 | 
 92 |     for i in range(0, np.size(colorwheel, 1)):
 93 |         tmp = colorwheel[:, i]
 94 |         col0 = tmp[k0 - 1] / 255
 95 |         col1 = tmp[k1 - 1] / 255
 96 |         col = (1 - f) * col0 + f * col1
 97 | 
 98 |         idx = rad <= 1
 99 |         col[idx] = 1 - rad[idx] * (1 - col[idx])
100 |         notidx = np.logical_not(idx)
101 | 
102 |         col[notidx] *= 0.75
103 |         img[:, :, i] = np.uint8(np.floor(255 * col * (1 - nanIdx)))
104 | 
105 |     return img
106 | 
107 | 
108 | def flow_to_image(flow):
109 |     """
110 |     Convert flow into middlebury color code image
111 |     :param flow: optical flow map
112 |     :return: optical flow image in middlebury color
113 |     """
114 |     u = flow[:, :, 0]
115 |     v = flow[:, :, 1]
116 | 
117 |     # maxu = -999.
118 |     # maxv = -999.
119 |     # minu = 999.
120 |     # minv = 999.
121 | 
122 |     idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
123 |     u[idxUnknow] = 0
124 |     v[idxUnknow] = 0
125 | 
126 |     # maxu = max(maxu, np.max(u))
127 |     # minu = min(minu, np.min(u))
128 |     #
129 |     # maxv = max(maxv, np.max(v))
130 |     # minv = min(minv, np.min(v))
131 | 
132 |     rad = np.sqrt(u ** 2 + v ** 2)
133 |     maxrad = max(-1, np.max(rad))
134 | 
135 |     u = u / (maxrad + np.finfo(float).eps)
136 |     v = v / (maxrad + np.finfo(float).eps)
137 | 
138 |     img = compute_color(u, v)
139 | 
140 |     idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
141 |     img[idx] = 0
142 | 
143 |     return np.uint8(img)
144 | 


--------------------------------------------------------------------------------
/hand_lib/hand_detector/hand_detector_yolox/hand_detector_yolox.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2022/1/7
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | from cv2box import CVImage, MyFpsCounter, CVVideoLoader, CVVideoMaker, CVFile
  6 | from apstone.mmlab_wrapper import BboxDetectorBase
  7 | from tqdm import tqdm
  8 | 
  9 | # input 1*3*640*640 output 1*N*5 1*N
 10 | MODEL_ZOO = {
 11 |     # gpu 68fps
 12 |     'yolox_s': {
 13 |         'model_path': 'private_models/hand_lib/hand_detector_yolox/yolox_s_100DOH_epoch90_mmdeploy_dynamic.onnx',
 14 |         'input_dynamic_shape': (1, 3, 640, 640),
 15 |         'model_input_size': (640, 640),
 16 |         'label': 1,
 17 |     },
 18 |     'yolox_s_local': {
 19 |         'model_path': 'private_models/hand_lib/hand_detector_yolox/yolox_s_2dataset_epoch127_0922_dynamic.onnx',
 20 |         'input_dynamic_shape': (1, 3, 640, 640),
 21 |         'model_input_size': (640, 640),
 22 |         'label': 0,
 23 |     },
 24 |     # 260 fps
 25 |     'yolox_s_trt16': {
 26 |         'model_path': 'private_models/hand_lib/hand_detector_yolox/yolox_s_100DOH_epoch90_mmdeploy_static.engine',
 27 |         'input_dynamic_shape': (1, 3, 640, 640),
 28 |         'model_input_size': (640, 640),
 29 |         'label': 1,
 30 |     },
 31 | }
 32 | 
 33 | 
 34 | class HandDetectorYolox(BboxDetectorBase):
 35 |     def __init__(self, model='yolox_s', threshold=0.5, provider='gpu'):
 36 |         super().__init__(MODEL_ZOO[model], provider)
 37 |         self.threshold = threshold
 38 |         self.label = MODEL_ZOO[model]['label']
 39 | 
 40 |     def forward(self, image_in_, show=False):
 41 |         model_results = self.model.forward(self.preprocess(image_in_))
 42 |         results_after = self.postprocess(model_results, self.threshold, label=self.label, max_bbox_num=5)
 43 |         if show:
 44 |             _ = self.show(image_in_, results_after)
 45 |         return results_after
 46 | 
 47 | 
 48 | if __name__ == '__main__':
 49 |     # image_p = 'resources/for_pose/t_pose_1080p.jpeg'
 50 |     # img_bgr = CVImage(image_p).bgr
 51 |     # hd = HandDetectorYolox(model='yolox_s_trt16', threshold=0.5, provider='gpu')  # yolox_s_trt16
 52 |     # hd_result = hd.forward(img_bgr, show=True)
 53 |     # print(hd_result)
 54 |     #
 55 |     # with MyFpsCounter('model forward 10 times fps:') as mfc:
 56 |     #     for i in range(10):
 57 |     #         bboxes = hd.forward(img_bgr)
 58 | 
 59 |     # # video detect and show
 60 |     # hd = HandDetectorYolox(model='yolox_s_local', threshold=0.5)
 61 |     # with CVVideoLoader('') as cvvl:
 62 |     #     for _ in tqdm(range(len(cvvl))):
 63 |     #         _, img = cvvl.get()
 64 |     #         hd_result = hd.forward(img, show=True)
 65 | 
 66 |     # video detect and show to video
 67 |     hd = HandDetectorYolox(model='yolox_s_local', threshold=0.5)
 68 |     count = 0
 69 |     with CVVideoLoader('') as cvvl:
 70 |         for _ in tqdm(range(len(cvvl))):
 71 |             _, img = cvvl.get()
 72 |             hd_result = hd.forward(img, show=False)
 73 |             out_img = hd.show(img, hd_result)
 74 |             CVImage(out_img).save(f'./cache/hand_out/{count}.jpg', create_path=True)
 75 |             count += 1
 76 | 
 77 |     # # video 2 pkl
 78 |     # from cv2box import CVFile
 79 |     #
 80 |     # for video_name in ['268', '617', '728', '886']:
 81 |     #     result_list = []
 82 |     #     video_p = '/{}.mp4'.format(
 83 |     #         video_name)
 84 |     #     cap = cv2.VideoCapture(video_p)
 85 |     #     hd = HandDetectorYolox(0.5)
 86 |     #     while True:
 87 |     #         success, img = cap.read()
 88 |     #         if not success:
 89 |     #             break
 90 |     #         hd_result = hd.forward(img, show=True)
 91 |     #
 92 |     #         person_results = []
 93 |     #         for bbox in hd_result[0]:
 94 |     #             person = {'bbox': np.concatenate([bbox, [1]])}
 95 |     #             person_results.append(person)
 96 |     #         result_list.append(person_results)
 97 |     #
 98 |     #         # result_list.append(hd_result[0])
 99 |     #
100 |     #     CVFile(video_p.replace('.mp4', '_hand_bbox_out.pkl')).pickle_write(result_list)
101 | 


--------------------------------------------------------------------------------
/sd_lib/ip_adapter/models/resampler.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2023/8/23
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | # modified from https://github.com/mlfoundations/open_flamingo/blob/main/open_flamingo/src/helpers.py
  6 | import math
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | 
 11 | 
 12 | # FFN
 13 | def FeedForward(dim, mult=4):
 14 |     inner_dim = int(dim * mult)
 15 |     return nn.Sequential(
 16 |         nn.LayerNorm(dim),
 17 |         nn.Linear(dim, inner_dim, bias=False),
 18 |         nn.GELU(),
 19 |         nn.Linear(inner_dim, dim, bias=False),
 20 |     )
 21 | 
 22 | 
 23 | def reshape_tensor(x, heads):
 24 |     bs, length, width = x.shape
 25 |     # (bs, length, width) --> (bs, length, n_heads, dim_per_head)
 26 |     x = x.view(bs, length, heads, -1)
 27 |     # (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head)
 28 |     x = x.transpose(1, 2)
 29 |     # (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head)
 30 |     x = x.reshape(bs, heads, length, -1)
 31 |     return x
 32 | 
 33 | 
 34 | class PerceiverAttention(nn.Module):
 35 |     def __init__(self, *, dim, dim_head=64, heads=8):
 36 |         super().__init__()
 37 |         self.scale = dim_head ** -0.5
 38 |         self.dim_head = dim_head
 39 |         self.heads = heads
 40 |         inner_dim = dim_head * heads
 41 | 
 42 |         self.norm1 = nn.LayerNorm(dim)
 43 |         self.norm2 = nn.LayerNorm(dim)
 44 | 
 45 |         self.to_q = nn.Linear(dim, inner_dim, bias=False)
 46 |         self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False)
 47 |         self.to_out = nn.Linear(inner_dim, dim, bias=False)
 48 | 
 49 |     def forward(self, x, latents):
 50 |         """
 51 |         Args:
 52 |             x (torch.Tensor): image features
 53 |                 shape (b, n1, D)
 54 |             latent (torch.Tensor): latent features
 55 |                 shape (b, n2, D)
 56 |         """
 57 |         x = self.norm1(x)
 58 |         latents = self.norm2(latents)
 59 | 
 60 |         b, l, _ = latents.shape
 61 | 
 62 |         q = self.to_q(latents)
 63 |         kv_input = torch.cat((x, latents), dim=-2)
 64 |         k, v = self.to_kv(kv_input).chunk(2, dim=-1)
 65 | 
 66 |         q = reshape_tensor(q, self.heads)
 67 |         k = reshape_tensor(k, self.heads)
 68 |         v = reshape_tensor(v, self.heads)
 69 | 
 70 |         # attention
 71 |         scale = 1 / math.sqrt(math.sqrt(self.dim_head))
 72 |         weight = (q * scale) @ (k * scale).transpose(-2, -1)  # More stable with f16 than dividing afterwards
 73 |         weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
 74 |         out = weight @ v
 75 | 
 76 |         out = out.permute(0, 2, 1, 3).reshape(b, l, -1)
 77 | 
 78 |         return self.to_out(out)
 79 | 
 80 | 
 81 | class Resampler(nn.Module):
 82 |     def __init__(
 83 |             self,
 84 |             dim=1024,
 85 |             depth=8,
 86 |             dim_head=64,
 87 |             heads=16,
 88 |             num_queries=8,
 89 |             embedding_dim=768,
 90 |             output_dim=1024,
 91 |             ff_mult=4,
 92 |     ):
 93 |         super().__init__()
 94 | 
 95 |         self.latents = nn.Parameter(torch.randn(1, num_queries, dim) / dim ** 0.5)
 96 | 
 97 |         self.proj_in = nn.Linear(embedding_dim, dim)
 98 | 
 99 |         self.proj_out = nn.Linear(dim, output_dim)
100 |         self.norm_out = nn.LayerNorm(output_dim)
101 | 
102 |         self.layers = nn.ModuleList([])
103 |         for _ in range(depth):
104 |             self.layers.append(
105 |                 nn.ModuleList(
106 |                     [
107 |                         PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads),
108 |                         FeedForward(dim=dim, mult=ff_mult),
109 |                     ]
110 |                 )
111 |             )
112 | 
113 |     def forward(self, x):
114 | 
115 |         latents = self.latents.repeat(x.size(0), 1, 1)
116 | 
117 |         x = self.proj_in(x)
118 | 
119 |         for attn, ff in self.layers:
120 |             latents = attn(x, latents) + latents
121 |             latents = ff(latents) + latents
122 | 
123 |         latents = self.proj_out(latents)
124 |         return self.norm_out(latents)
125 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/blazepose_mediapipe/body_bbox_detector.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/7/21
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | """
 6 | based on
 7 | https://github.com/PINTO0309/PINTO_model_zoo/tree/main/053_BlazePose
 8 | https://github.com/positive666/mediapipe_PoseEstimation_pytorch/blob/main/blazebase.py
 9 | https://github.com/Azzallon/teste/tree/DPR/pose_estimation_3d/blazepose-fullbody
10 | """
11 | 
12 | import numpy as np
13 | from apstone import ONNXModel
14 | 
15 | from cv2box import CVImage, MyFpsCounter
16 | from body_lib.body_kp_detector.blazepose_mediapipe.blaze_utils import denormalize_detections, \
17 |     resize_pad, raw_output_to_detections, weighted_non_max_suppression
18 | 
19 | # from body_lib.body_kp_detector.blazepose_mediapipe.utils.blazepose_utils_numpy import raw_output_to_detections, \
20 | #     weighted_non_max_suppression
21 | 
22 | # ANCHORS_128 = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/anchors/anchors_896_128.npy'
23 | ANCHORS_224 = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/anchors/anchors_2254_224.npy'
24 | 
25 | # input: 1*3*224*224  output: score 1*2254*1 box 1*2254*12
26 | LITE_BLAZEPOSE_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/densify_full_body_detector.onnx'
27 | 
28 | 
29 | class BodyDetector:
30 |     def __init__(self, provider='gpu'):
31 |         super().__init__()
32 |         self.anchors = np.load(ANCHORS_224)
33 |         self.model = ONNXModel(LITE_BLAZEPOSE_MODEL, provider=provider)
34 | 
35 |         # self.input_std = 127.5
36 |         # self.input_mean = 127.5
37 |         # self.input_size = (224, 224)
38 |         #
39 |         # self.x_scale = self.y_scale = 224
40 |         # self.w_scale = self.h_scale = 224
41 |         # self.num_keypoints = 4
42 |         # self.score_clipping_thresh = 100.0
43 |         self.min_score_thresh = 0.5
44 |         # self.min_suppression_threshold = 0.3
45 |         # self.num_coords = 12
46 | 
47 |         # # These settings are for converting detections to ROIs which can then
48 |         # # be extracted and feed into the landmark network
49 |         # # use mediapipe/calculators/util/alignment_points_to_rects_calculator.cc
50 |         # self.detection2roi_method = 'alignment'
51 |         # self.kp1 = 2
52 |         # self.kp2 = 3
53 |         # self.theta0 = 90 * np.pi / 180
54 |         # self.dscale = 1.5
55 |         # self.dy = 0.
56 | 
57 |     def forward(self, img_in_, show=False):
58 |         img_crop, scale, pad = resize_pad(CVImage(img_in_).bgr)
59 |         image_blob = img_crop.astype(np.float32) / 255
60 | 
61 |         out = self.model.forward(image_blob.transpose((2, 1, 0))[np.newaxis, :])
62 | 
63 |         detections = raw_output_to_detections(out[1], out[0], self.anchors, self.min_score_thresh)
64 | 
65 |         filtered_detections = []
66 |         for i in range(len(detections)):
67 |             # faces = self._weighted_non_max_suppression(detections[i])
68 |             faces = weighted_non_max_suppression(detections[i])
69 |             faces = np.stack(faces) if len(faces) > 0 else np.zeros((0, 13))
70 |             filtered_detections.append(faces)
71 | 
72 |         filtered_detections = denormalize_detections(filtered_detections[0], scale, pad)
73 | 
74 |         if show and len(filtered_detections) > 0:
75 |             print(filtered_detections)
76 |             # kps
77 |             image_show = CVImage(img_in_).draw_landmarks(filtered_detections[0, 4:12].reshape((4, 2))[:, ::-1])
78 |             # box
79 |             image_show = CVImage(image_show).draw_landmarks(filtered_detections[0, 0:4].reshape((2, 2)),
80 |                                                             color=(0, 255, 255))
81 |             CVImage(image_show).show(0)
82 |         return filtered_detections
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     image_path = 'resources/yoga2.webp'
87 |     image_in = CVImage(image_path).bgr
88 |     pd = BodyDetector(provider='gpu')
89 |     filtered_detections = pd.forward(image_in, show=True)
90 | 
91 |     # with MyFpsCounter('model forward 10 times fps: ') as mfc:
92 |     #     for i in range(10):
93 |     #         filtered_detections = pd.forward(image_in)
94 | 
95 |     # img_in, ratio, pad_w, pad_h = CVImage('resources/t_pose.jpeg').resize_keep_ratio((128, 128))
96 |     # CVImage(img_in).show(0)
97 | 


--------------------------------------------------------------------------------
/gpt_lib/chatglm6b_finetune/finetune.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2023/4/25
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | 
  6 | from transformers.integrations import TensorBoardCallback
  7 | from torch.utils.tensorboard import SummaryWriter
  8 | from transformers import TrainingArguments
  9 | from transformers import Trainer, HfArgumentParser
 10 | from transformers import AutoTokenizer, AutoModel
 11 | import torch
 12 | import torch.nn as nn
 13 | from peft import get_peft_model, LoraConfig, TaskType
 14 | from dataclasses import dataclass, field
 15 | import datasets
 16 | import os
 17 | 
 18 | 
 19 | tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
 20 | 
 21 | 
 22 | @dataclass
 23 | class FinetuneArguments:
 24 |     dataset_path: str = field(default="data/alpaca")
 25 |     model_path: str = field(default="output")
 26 |     lora_rank: int = field(default=8)
 27 | 
 28 | 
 29 | class CastOutputToFloat(nn.Sequential):
 30 |     def forward(self, x):
 31 |         return super().forward(x).to(torch.float32)
 32 | 
 33 | 
 34 | def data_collator(features: list) -> dict:
 35 |     len_ids = [len(feature["input_ids"]) for feature in features]
 36 |     longest = max(len_ids)
 37 |     input_ids = []
 38 |     labels_list = []
 39 |     for ids_l, feature in sorted(zip(len_ids, features), key=lambda x: -x[0]):
 40 |         ids = feature["input_ids"]
 41 |         seq_len = feature["seq_len"]
 42 |         labels = (
 43 |             [-100] * (seq_len - 1) + ids[(seq_len - 1) :] + [-100] * (longest - ids_l)
 44 |         )
 45 |         ids = ids + [tokenizer.pad_token_id] * (longest - ids_l)
 46 |         _ids = torch.LongTensor(ids)
 47 |         labels_list.append(torch.LongTensor(labels))
 48 |         input_ids.append(_ids)
 49 |     input_ids = torch.stack(input_ids)
 50 |     labels = torch.stack(labels_list)
 51 |     return {
 52 |         "input_ids": input_ids,
 53 |         "labels": labels,
 54 |     }
 55 | 
 56 | 
 57 | # class ModifiedTrainer(Trainer):
 58 | #     def compute_loss(self, model, inputs, return_outputs=False):
 59 | #         return model(
 60 | #             input_ids=inputs["input_ids"],
 61 | #             labels=inputs["labels"],
 62 | #         ).loss
 63 | #
 64 | #     def save_model(self, output_dir=None, _internal_call=False):
 65 | #         from transformers.trainer import TRAINING_ARGS_NAME
 66 | #
 67 | #         os.makedirs(output_dir, exist_ok=True)
 68 | #         torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
 69 | #         saved_params = {
 70 | #             k: v.to("cpu") for k, v in self.model.named_parameters() if v.requires_grad
 71 | #         }
 72 | #         torch.save(saved_params, os.path.join(output_dir, "adapter_model.bin"))
 73 | 
 74 | 
 75 | def main():
 76 |     writer = SummaryWriter()
 77 |     finetune_args, training_args = HfArgumentParser(
 78 |         (FinetuneArguments, TrainingArguments)
 79 |     ).parse_args_into_dataclasses()
 80 | 
 81 |     # init model
 82 |     model = AutoModel.from_pretrained(
 83 |         "THUDM/chatglm-6b", load_in_8bit=True, trust_remote_code=True, device_map="auto"
 84 |     )
 85 |     model.gradient_checkpointing_enable()
 86 |     model.enable_input_require_grads()
 87 |     model.is_parallelizable = True
 88 |     model.model_parallel = True
 89 |     model.lm_head = CastOutputToFloat(model.lm_head)
 90 |     model.config.use_cache = (
 91 |         False  # silence the warnings. Please re-enable for inference!
 92 |     )
 93 | 
 94 |     # setup peft
 95 |     peft_config = LoraConfig(
 96 |         task_type=TaskType.CAUSAL_LM,
 97 |         inference_mode=False,
 98 |         r=finetune_args.lora_rank,
 99 |         lora_alpha=32,
100 |         lora_dropout=0.1,
101 |     )
102 |     model = get_peft_model(model, peft_config)
103 | 
104 |     # load dataset
105 |     dataset = datasets.load_from_disk(finetune_args.dataset_path)
106 |     print(f"\n{len(dataset)=}\n")
107 | 
108 |     # start train
109 |     trainer = Trainer(
110 |         model=model,
111 |         train_dataset=dataset,
112 |         args=training_args,
113 |         callbacks=[TensorBoardCallback(writer)],
114 |         data_collator=data_collator,
115 |     )
116 |     trainer.train()
117 |     writer.close()
118 |     # save model
119 |     model.save_pretrained(training_args.output_dir)
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     main()


--------------------------------------------------------------------------------
/seg_lib/u2net/u2net_cloth_api.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2023/1/13
 3 | # @Author : ykk648
 4 | # @Project : https://github.com/ykk648/AI_power
 5 | 
 6 | from cv2box import CVImage, MyFpsCounter
 7 | from apstone import ModelBase
 8 | import cv2
 9 | import numpy as np
10 | 
11 | MODEL_ZOO = {
12 |     # https://github.com/danielgatis/rembg/blob/main/rembg/session_cloth.py
13 |     # input_name:['input'], shape:[['batch_size', 3, 768, 768]]
14 |     # output_name:['output', 'd1', 'onnx::Concat_1876', 'onnx::Concat_1896', 'onnx::Concat_1916', 'onnx::Concat_1936', 'onnx::Concat_1956'], shape:[['batch_size', 4, 768, 768], ['Convd1_dim_0', 4, 768, 768], ['Resizeonnx::Concat_1876_dim_0', 'Resizeonnx::Concat_1876_dim_1', 'Resizeonnx::Concat_1876_dim_2', 'Resizeonnx::Concat_1876_dim_3'], ['Resizeonnx::Concat_1896_dim_0', 'Resizeonnx::Concat_1896_dim_1', 'Resizeonnx::Concat_1896_dim_2', 'Resizeonnx::Concat_1896_dim_3'], ['Resizeonnx::Concat_1916_dim_0', 'Resizeonnx::Concat_1916_dim_1', 'Resizeonnx::Concat_1916_dim_2', 'Resizeonnx::Concat_1916_dim_3'], ['Resizeonnx::Concat_1936_dim_0', 'Resizeonnx::Concat_1936_dim_1', 'Resizeonnx::Concat_1936_dim_2', 'Resizeonnx::Concat_1936_dim_3'], ['Resizeonnx::Concat_1956_dim_0', 'Resizeonnx::Concat_1956_dim_1', 'Resizeonnx::Concat_1956_dim_2', 'Resizeonnx::Concat_1956_dim_3']]
15 |     'u2net_cloth_seg': {
16 |         'model_path': 'pretrain_models/seg_lib/u2net/u2net_cloth_seg.onnx',
17 |         'input_dynamic_shape': (1, 3, 768, 768),
18 |     },
19 | }
20 | 
21 | 
22 | class U2netClothSeg(ModelBase):
23 |     def __init__(self, model_type='u2net_cloth_seg', provider='gpu'):
24 |         super().__init__(MODEL_ZOO[model_type], provider)
25 |         self.model_type = model_type
26 | 
27 |         self.input_mean = (0.485, 0.456, 0.406)
28 |         self.input_std = (0.229, 0.224, 0.225)
29 |         self.input_size = (768, 768)
30 | 
31 |     def forward(self, image_in, **kwargs):
32 |         """
33 |         Args:
34 |             image_in: CVImage access type
35 |             post_process: Post Process the mask for a smooth boundary by applying Morphological Operations
36 |                 Research based on paper: https://www.sciencedirect.com/science/article/pii/S2352914821000757
37 |         Returns: mask 0-1
38 |         """
39 |         image_in_size = CVImage(image_in).bgr.shape
40 |         image_in_pre = CVImage(image_in).blob_innormal(self.input_size, self.input_mean, self.input_std, rgb=True,
41 |                                                        interpolation=cv2.INTER_LANCZOS4)
42 |         pred_mask = self.model.forward(image_in_pre)
43 |         from scipy.special import log_softmax
44 |         pred_mask = log_softmax(pred_mask[0], 1)
45 |         pred_mask = np.argmax(pred_mask, axis=1, keepdims=True)
46 |         pred_mask = np.squeeze(pred_mask, 0)
47 |         pred_mask = np.squeeze(pred_mask, 0)
48 |         pred_mask = pred_mask.astype(np.uint8)
49 | 
50 |         pred_mask = CVImage(pred_mask).resize(image_in_size[:-1][::-1], interpolation=cv2.INTER_LANCZOS4).bgr
51 | 
52 |         # First create the image with alpha channel
53 |         rgba = CVImage(image_in).bgr
54 |         rgba = cv2.cvtColor(rgba, cv2.COLOR_BGR2RGBA)
55 |         # Then assign the mask to the last channel of the image
56 |         rgba[:, :, 3] = pred_mask
57 | 
58 |         upper_body_mask = pred_mask.copy()
59 |         upper_body_mask[np.where(upper_body_mask != 1)] = 0
60 |         upper_body_mask[np.where(upper_body_mask == 1)] = 255
61 | 
62 |         lower_body_mask = pred_mask.copy()
63 |         lower_body_mask[np.where(lower_body_mask != 2)] = 0
64 |         lower_body_mask[np.where(lower_body_mask == 2)] = 255
65 | 
66 |         full_body_mask = pred_mask.copy()
67 |         full_body_mask[np.where(full_body_mask != 3)] = 0
68 |         full_body_mask[np.where(full_body_mask == 3)] = 255
69 | 
70 |         return [upper_body_mask, lower_body_mask, full_body_mask]
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     fb_cur = U2netClothSeg(model_type='u2net_cloth_seg', provider='gpu')
75 |     mask = fb_cur.forward('', post_process=False)
76 |     CVImage(mask[0]).show()
77 |     CVImage(mask[1]).show()
78 |     CVImage(mask[2]).show()
79 | 
80 |     combined_mask = np.maximum(mask[0], mask[1])
81 |     # # reverse
82 |     # combined_mask[np.where(combined_mask == 255)] = 233
83 |     # combined_mask[np.where(combined_mask == 0)] = 255
84 |     # combined_mask[np.where(combined_mask == 233)] = 0
85 |     CVImage(combined_mask).save('')
86 | 


--------------------------------------------------------------------------------
/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_yolox.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2022/7/21
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | import numpy as np
  6 | from cv2box import CVImage, MyFpsCounter, CVBbox
  7 | 
  8 | from apstone import ONNXModel
  9 | from body_lib.body_kp_detector.blazepose_mediapipe.blaze_utils import postprocess, denormalize_landmarks, detection2roi, \
 10 |     extract_roi
 11 | from body_lib.body_bbox_detector import BodyBboxDetector
 12 | 
 13 | # input 1*256*256*3 output , 1*1 , , ,
 14 | LITE_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_lite.onnx'
 15 | FULL_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_full.onnx'
 16 | HEAVY_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_heavy.onnx'
 17 | 
 18 | 
 19 | class LandmarkDetectorYolox:
 20 |     def __init__(self, model_complexity=0, provider='gpu'):
 21 |         self.bbd = BodyBboxDetector(model='yolox_tiny_trt16', threshold=0.5)
 22 | 
 23 |         model_path_list = [LITE_MODEL, FULL_MODEL, HEAVY_MODEL]
 24 |         self.model = ONNXModel(model_path_list[model_complexity], provider=provider)
 25 | 
 26 |         self.need_bbox_flag = True
 27 |         self.history = []
 28 | 
 29 |     def forward(self, image_in_, show=False):
 30 |         """
 31 | 
 32 |         Args:
 33 |             image_in_:
 34 |             show:
 35 |         Returns:
 36 |             landmarks: 33*4
 37 | 
 38 |         """
 39 |         bbox_result = self.bbd.forward(image_in_, show=False, max_bbox_num=1)[0]
 40 |         img, ratio, left, top = CVImage(image_in_).crop_keep_ratio(bbox_result, (256, 256), padding_ratio=1.)
 41 | 
 42 |         if show:
 43 |             CVImage(img).show(0, 'img_crop')
 44 | 
 45 |         blob = (img / 256).astype(np.float32)[np.newaxis, :]
 46 |         normalized_landmarks, f, ee, rr, tt = self.model.forward(blob)
 47 |         normalized_landmarks = postprocess(normalized_landmarks)[0]
 48 |         landmarks_ = CVImage(None).recover_from_crop(normalized_landmarks, ratio, left, top, (256, 256))
 49 | 
 50 |         if show:
 51 |             show_img = CVImage(image_in_).draw_landmarks(landmarks_)
 52 |             CVImage(show_img).show(0, 'results')
 53 |         return landmarks_
 54 | 
 55 |     def forward_w_tracking(self, image_in_, show=False):
 56 |         if self.need_bbox_flag:
 57 |             bbox_result = self.bbd.forward(image_in_, show=False, max_bbox_num=1)[0]
 58 |         else:
 59 |             reserve_points = [0, 7, 8, 11, 12, 23, 24, 25, 26, 27, 28]
 60 |             bbox_result = CVBbox(None).get_bbox_from_points(self.history[-1][reserve_points], image_in_.shape,
 61 |                                                             margin_ratio=0.2)
 62 | 
 63 |         img, ratio, left, top = CVImage(image_in_).crop_keep_ratio(bbox_result, (256, 256), padding_ratio=1.)
 64 | 
 65 |         if show:
 66 |             CVImage(img).show(0, 'img_crop')
 67 | 
 68 |         blob = (img / 256).astype(np.float32)[np.newaxis, :]
 69 |         normalized_landmarks, f, _, _, _ = self.model.forward(blob)
 70 |         normalized_landmarks = postprocess(normalized_landmarks)[0]
 71 |         landmarks_ = CVImage(None).recover_from_crop(normalized_landmarks, ratio, left, top, (256, 256))
 72 | 
 73 |         self.need_bbox_flag = False
 74 |         self.history.append(landmarks_)
 75 |         self.history = self.history[-2:]
 76 | 
 77 |         if show:
 78 |             show_img = CVImage(image_in_).draw_landmarks(landmarks_)
 79 |             CVImage(show_img).show(0, 'results')
 80 |         return landmarks_
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     # image_path = 'resources/for_pose/t_pose_1080p.jpeg'
 85 |     # image_in = CVImage(image_path).bgr
 86 | 
 87 |     """
 88 |     model 1 82fps trt16  trt 109fps
 89 |     model 2 67fps trt16 output Nan trt 97fps
 90 |     """
 91 |     ld = LandmarkDetectorYolox(model_complexity=2, provider='trt')
 92 | 
 93 |     # landmarks = ld.forward(image_in, show=True)
 94 |     # print(landmarks)
 95 |     #
 96 |     # with MyFpsCounter('model forward 10 times fps: ') as mfc:
 97 |     #     for i in range(10):
 98 |     #         filtered_detections = ld.forward(image_in)
 99 | 
100 |     # video tracking test
101 |     from cv2box import CVVideoLoader
102 |     from tqdm import tqdm
103 | 
104 |     with CVVideoLoader('') as cvvl:
105 |         for _ in tqdm(range(len(cvvl))):
106 |             _, frame = cvvl.get()
107 |             landmarks = ld.forward_w_tracking(frame, show=False)
108 | 


--------------------------------------------------------------------------------
/sd_lib/controlnet/controlnet_api.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | # @Time : 2023/10/20
  3 | # @Author : ykk648
  4 | # @Project : https://github.com/ykk648/AI_power
  5 | 
  6 | from diffusers import ControlNetModel
  7 | from diffusers.image_processor import VaeImageProcessor
  8 | from cv2box import CVImage
  9 | import numpy as np
 10 | import torch
 11 | 
 12 | MODEL_ZOO = {
 13 |     'control_v11p_sd15_canny': {
 14 |         'model_path': 'sd_models/controlnets/control_v11p_sd15_canny/',
 15 |         'use_safetensors': False,
 16 |     },
 17 |     'control_v11p_sd15_normalbae': {
 18 |         'model_path': 'sd_models/controlnets/control_v11p_sd15_normalbae/',
 19 |         'use_safetensors': False,
 20 |     },
 21 |     'control_v11f1e_sd15_tile': {
 22 |         'model_path': 'sd_models/controlnets/control_v11f1e_sd15_tile/',
 23 |         'use_safetensors': False,
 24 |     },
 25 |     'control_v11e_sd15_ip2p': {
 26 |         'model_path': 'sd_models/controlnets/control_v11e_sd15_ip2p/',
 27 |         'use_safetensors': False,
 28 |     },
 29 |     'control_v11p_sd15_inpaint': {
 30 |         'model_path': 'sd_models/controlnets/control_v11p_sd15_inpaint/',
 31 |         'use_safetensors': False,
 32 |     },
 33 | 
 34 | }
 35 | 
 36 | 
 37 | class ControlNet:
 38 |     def __init__(self, model_name='control_v11p_sd15_canny', cond_scale=1, vae_scale_factor=8, height=512, width=512,
 39 |                  device='cuda', dtype=torch.float32):
 40 |         self.cond_scale = cond_scale
 41 |         self.vae_scale_factor = vae_scale_factor
 42 |         self.height = height
 43 |         self.width = width
 44 |         self.device = device
 45 |         self.dtype = dtype
 46 |         self.condition_image = None
 47 | 
 48 |         self.model = ControlNetModel.from_pretrained(MODEL_ZOO[model_name]['model_path'],
 49 |                                                      torch_dtype=dtype,
 50 |                                                      use_safetensors=MODEL_ZOO[model_name]['use_safetensors']
 51 |                                                      ).to(self.device)
 52 |         self.control_image_processor = VaeImageProcessor(
 53 |             vae_scale_factor=vae_scale_factor, do_convert_rgb=True, do_normalize=False
 54 |         )
 55 | 
 56 |     def preprocess(self, condition_image, do_classifier_free_guidance, guess_mode):
 57 |         if self.condition_image is None:
 58 |             if isinstance(condition_image, list):
 59 |                 # inpainting
 60 |                 from .utils import make_inpaint_condition
 61 |                 condition_image = make_inpaint_condition(condition_image[0], condition_image[1])
 62 |             else:
 63 |                 condition_image = CVImage(condition_image).pillow()
 64 |             self.condition_image = self.control_image_processor.preprocess(condition_image, height=self.height,
 65 |                                                                            width=self.width).to(self.device,
 66 |                 dtype=self.dtype)
 67 |             if do_classifier_free_guidance and not guess_mode:
 68 |                 self.condition_image = torch.cat([self.condition_image] * 2)
 69 | 
 70 |     def forward(self, latent, t, condition_image, encoder_hidden_states, conditioning_scale=1, do_classifier_free_guidance=True,
 71 |                 guess_mode=False):
 72 |         """
 73 |         Args:
 74 |             latent: ([2,4,64,64])
 75 |             t: int
 76 |             condition_image: str or list(inpainting img+mask
 77 |             encoder_hidden_states: ([2, 77, 768])
 78 |             conditioning_scale:
 79 |             do_classifier_free_guidance:
 80 |             guess_mode:
 81 |         Returns:
 82 | 
 83 |         """
 84 |         self.preprocess(condition_image, do_classifier_free_guidance, guess_mode)
 85 | 
 86 |         down_block_res_samples, mid_block_res_sample = self.model(
 87 |             latent.to(self.dtype),
 88 |             t,
 89 |             encoder_hidden_states=encoder_hidden_states,
 90 |             controlnet_cond=self.condition_image,
 91 |             conditioning_scale=conditioning_scale,
 92 |             guess_mode=guess_mode,
 93 |             return_dict=False,
 94 |         )
 95 |         return down_block_res_samples, mid_block_res_sample
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     condition_image_p = 'resources/for_sd/controlnet/astronaut_canny.png'
100 | 
101 |     cn = ControlNet(model_name='control_v11p_sd15_inpaint', cond_scale=1, vae_scale_factor=8, height=512, width=512,
102 |                     dtype=torch.float32)
103 | 
104 |     down_block_res_samples_, mid_block_res_sample_ = cn.forward(torch.rand((2, 4, 64, 64)),
105 |                                                                 20,
106 |                                                                 condition_image_p,
107 |                                                                 torch.rand((2, 77, 768))
108 |                                                                 )
109 |     print(down_block_res_samples_[0].shape)
110 |     print(mid_block_res_sample_.shape)
111 | 


--------------------------------------------------------------------------------