├── utils ├── README.md ├── __init__.py └── ai_utils.py ├── data_lib ├── __init__.py ├── dataset_preprocess │ ├── __init__.py │ ├── count_mean_std.py │ ├── gen_dataset_thumbnail.py │ └── gen_dataset_txt.py ├── dataset_tools.py ├── dataset_convert │ ├── coco-annotator_2_coco-mmpose.py │ ├── eric_yolov3_2_coco-mmdet.py │ └── coco-annotator_2_coco-mmdet.py └── dataset_vis │ └── coco_detect_vis.py ├── hand_lib ├── __init__.py ├── hand_mesh │ ├── __init__.py │ └── minimal_hands │ │ ├── __init__.py │ │ ├── ik_model.py │ │ └── kinematics.py ├── hand_detector │ ├── hand_detecotr_21kp │ │ └── __init__.py │ ├── hand_detector_d2 │ │ ├── __init__.py │ │ └── hand_detector_d2_api.py │ ├── hand_detector_mediapipe │ │ ├── __init__.py │ │ └── hand_detector_mediapipe.py │ ├── hand_detector_yolox │ │ ├── __init__.py │ │ └── hand_detector_yolox.py │ └── __init__.py ├── README.md └── hand_detect_and_estimate.py ├── mocap_lib ├── calibration │ ├── __init__.py │ ├── example_boards │ │ ├── charuco_9x16.yaml │ │ ├── intrinsic │ │ │ ├── charuco_A1_44_intri_1.yaml │ │ │ ├── charuco_A1_88_intri_4.yaml │ │ │ └── charuco_A1_88_intri_1.yaml │ │ ├── charuco_7x5.yaml │ │ ├── charuco_A1_44.yaml │ │ └── charuco_A1_88.yaml │ ├── calibration_by_multical.sh │ ├── gopro_wifi_reader.py │ └── calibration_w_human.py ├── middleware │ ├── __init__.py │ └── VMCApi.py ├── visualize │ ├── __init__.py │ └── poseviz_demo │ │ ├── __init__.py │ │ └── holistic_demo.py ├── body_wholebody │ ├── __init__.py │ ├── mediapipe_holistic.py │ └── wholebody_kp_detector_mmpose.py ├── triangulate │ ├── utils │ │ └── __init__.py │ ├── __init__.py │ └── anipose_triangulate.py ├── bbox_tracking │ ├── __init__.py │ └── bbox_tracking.py ├── smooth_filter │ ├── __init__.py │ ├── smooth_filter.py │ └── one_euro_api.py ├── __init__.py ├── skeleton_transfer │ ├── __init__.py │ ├── openpose_lib.py │ └── keypoints_map.py ├── body_regress │ └── spin_onnx.py ├── README.md └── get_body_bbox_kps.py ├── body_lib ├── body_kp_detector │ ├── __init__.py │ ├── body_detector_lightweight │ │ ├── __init__.py │ │ └── body_detector_lightweight_api.py │ ├── body_detector_movenet │ │ ├── __init__.py │ │ └── movenet_api_onnx.py │ ├── blazepose_mediapipe │ │ ├── __init__.py │ │ ├── pose_landmark_origin.py │ │ ├── body_bbox_detector.py │ │ └── pose_landmark_yolox.py │ ├── kp_detector_mmpose.py │ └── body_kp_detector_kapao │ │ └── body_kp_detector_kapao.py ├── __init__.py └── body_bbox_detector │ ├── __init__.py │ └── body_bbox_detector_mmdet.py ├── art_lib ├── style_transfer │ └── dct_net │ │ ├── __init__.py │ │ ├── utils.py │ │ └── dct_net.py ├── talking_head │ ├── __init__.py │ ├── wav2lip │ │ └── audio_encoder.py │ └── sadtalker │ │ └── audio_2_pose.py ├── README.md ├── inpainting │ └── lama.py └── optical_flow_estimate │ └── raft │ ├── raft_api.py │ └── utils.py ├── sd_lib ├── prompt2prompt │ └── __init__.py ├── ip_adapter │ ├── models │ │ ├── __init__.py │ │ └── resampler.py │ └── ip_adapter_api.py ├── README.md ├── controlnet │ ├── utils.py │ └── controlnet_api.py ├── tagger │ └── tagger_api.py ├── inversion_api.py └── clip_encoder.py ├── requirements.txt ├── gpt_lib ├── code_example │ ├── huggingface_demo.py │ ├── chatglm6b_demo.py │ ├── openai_gpt3_demo.py │ └── openai_azure_demo.py ├── langchain │ ├── utils.py │ └── model_config.py ├── models │ ├── llm_base.py │ ├── llama.py │ └── chatglm_6b.py ├── textsplitter │ └── chinese_text_splitter.py ├── chatglm6b_finetune │ ├── tokenize_dataset_rows.py │ └── finetune.py └── lora_finetune │ └── chatglm6b_lora_deepspeed.py ├── audio_lib ├── tts │ └── bark_example.py └── svc │ └── sovits_infer.py ├── seg_lib ├── README.md ├── carvekit │ └── carvekit_api.py ├── segformer_b2_clothes │ └── segformer_api.py ├── ppmattingv2 │ └── ppmattingv2_api.py ├── cihp_pgn │ └── cihp_pgn_api.py └── u2net │ ├── u2net_api.py │ └── u2net_cloth_api.py ├── math_lib ├── affine_matrix.py ├── k_means.py ├── wrap_affine.py └── gaussian_filter.py ├── ocr_lib ├── paddle_excel.py └── paddle_ocr.py ├── .gitignore ├── SPEEDTABLE.md └── sr_lab └── realesrgan └── realesrgan_onnx_api.py /utils/README.md: -------------------------------------------------------------------------------- 1 | ### Dataset Preprocess 2 | 3 | - count imgs mean & std 4 | - generate img names from dir to txt 5 | -------------------------------------------------------------------------------- /data_lib/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /hand_lib/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/6/22 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /hand_lib/hand_mesh/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /mocap_lib/calibration/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/5/25 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /mocap_lib/middleware/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /mocap_lib/visualize/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /mocap_lib/body_wholebody/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /mocap_lib/triangulate/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/8 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /art_lib/style_transfer/dct_net/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/1/17 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /hand_lib/hand_mesh/minimal_hands/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /mocap_lib/visualize/poseviz_demo/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /hand_lib/hand_detector/hand_detecotr_21kp/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/4 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /hand_lib/hand_detector/hand_detector_d2/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /hand_lib/hand_detector/hand_detector_mediapipe/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /hand_lib/hand_detector/hand_detector_yolox/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/body_detector_lightweight/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/body_detector_movenet/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | -------------------------------------------------------------------------------- /art_lib/talking_head/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/2/22 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .tpsmm.tpsmm import TPSMM, KPDetector 6 | -------------------------------------------------------------------------------- /mocap_lib/bbox_tracking/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/15 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .bbox_tracking import BboxTracking 6 | -------------------------------------------------------------------------------- /mocap_lib/smooth_filter/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/16 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .smooth_filter import SmoothFilter 6 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2021/11/19 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .ai_utils import MyTimer, get_path_by_ext, make_random_name 6 | -------------------------------------------------------------------------------- /data_lib/dataset_preprocess/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from gen_dataset_txt import gen_txt_from_path 6 | -------------------------------------------------------------------------------- /body_lib/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .body_bbox_detector.body_bbox_detector_mmdet import BodyBboxDetector 6 | -------------------------------------------------------------------------------- /body_lib/body_bbox_detector/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .body_bbox_detector_mmdet import BodyBboxDetector 6 | -------------------------------------------------------------------------------- /mocap_lib/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/3 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .body_wholebody.wholebody_kp_detector_mmpose import BodyWholebodyDetector 6 | -------------------------------------------------------------------------------- /mocap_lib/skeleton_transfer/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/15 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .cocowholebody_2_openpose import cocowb_2_openpose 6 | -------------------------------------------------------------------------------- /hand_lib/hand_detector/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .hand_detector_yolox.hand_detector_yolox import HandDetectorYolox -------------------------------------------------------------------------------- /sd_lib/prompt2prompt/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/9/4 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .ddim_inversion import ddim_inversion, null_optimization, EmptyControl 6 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/blazepose_mediapipe/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | # from pose_landmark_lite_full_heavy import LandmarkDetector -------------------------------------------------------------------------------- /sd_lib/ip_adapter/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/8/23 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .ip_adapter import IPAdapter, IPAdapterPlus, ImageProjModel 6 | from .resampler import Resampler 7 | -------------------------------------------------------------------------------- /mocap_lib/triangulate/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/8 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .easymocap_triangulate import EasyMocapTriangulate 6 | from .anipose_triangulate import AniposeTriangulate 7 | -------------------------------------------------------------------------------- /mocap_lib/calibration/example_boards/charuco_9x16.yaml: -------------------------------------------------------------------------------- 1 | boards: 2 | charuco_9x16: 3 | _type_: charuco 4 | size: [9, 16] 5 | aruco_dict: 4X4_1000 6 | 7 | square_length: 0.0173 8 | marker_length: 0.013 9 | 10 | min_rows: 2 11 | min_points: 10 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cv2box 2 | apstone 3 | numpy 4 | onnxruntime 5 | numba==0.54.1 6 | nvidia_tensorrt==8.0.0.3 7 | onnx==1.10.1 8 | scipy==1.6.2 9 | torchvision==0.9.1+cu111 10 | scikit_image==0.18.3 11 | tqdm==4.61.1 12 | torch==1.8.1+cu111 13 | pycuda==2021.1 14 | matplotlib==3.4.3 15 | Pillow==8.4.0 16 | -------------------------------------------------------------------------------- /sd_lib/README.md: -------------------------------------------------------------------------------- 1 | ## StableDiffusion Lib 2 | 3 | 4 | ### CLIP encoder 5 | 6 | - [CLIP encoder](https://huggingface.co/openai/clip-vit-base-patch32) including text&image encoder 7 | 8 | ### IP-Adapter 9 | 10 | - [ip_adapter](https://github.com/tencent-ailab/IP-Adapter/blob/main/ip_adapter/ip_adapter.py) 11 | -------------------------------------------------------------------------------- /mocap_lib/calibration/example_boards/intrinsic/charuco_A1_44_intri_1.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | _type_: 'charuco' 3 | size: [4, 4] 4 | aruco_dict: '4X4_250' 5 | square_length: 0.28 6 | marker_length: 0.224 7 | 8 | min_rows: 2 9 | min_points: 4 10 | 11 | boards: 12 | charuco_A1_0: 13 | aruco_offset: 0 14 | 15 | -------------------------------------------------------------------------------- /gpt_lib/code_example/huggingface_demo.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/4/17 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | hugging face use case 7 | """ 8 | 9 | from huggingface_hub import Repository 10 | repo = Repository(local_dir="/mnt/models/hugging_face/Alpaca-CoT", clone_from="QingyiSi/Alpaca-CoT") -------------------------------------------------------------------------------- /art_lib/README.md: -------------------------------------------------------------------------------- 1 | ### Inpainting 2 | 3 | - [lama](https://github.com/Sanster/lama-cleaner) 4 | 5 | ### Style Transfer 6 | 7 | - [DctNet](https://www.modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models/summary) 8 | 9 | ### Talking Head 10 | 11 | - [SadTalker](https://github.com/OpenTalker/SadTalker) 12 | - [TPSMM](https://github.com/yoyo-nb/Thin-Plate-Spline-Motion-Model) 13 | - [Wav2lip](https://github.com/Rudrabha/Wav2Lip) -------------------------------------------------------------------------------- /mocap_lib/body_regress/spin_onnx.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/4/14 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | import torch 7 | from apstone import ONNXModel 8 | 9 | # https://github.com/nkolot/SPIN 10 | 11 | onnx_model_p = 'pretrain_models/body_regressor_spin/body_regressor_spin-eft-agora.onnx' 12 | 13 | spin = ONNXModel(onnx_model_p) 14 | print(spin.forward(torch.randn(1, 3, 224, 224).numpy())) 15 | -------------------------------------------------------------------------------- /mocap_lib/calibration/example_boards/intrinsic/charuco_A1_88_intri_4.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | _type_: 'charuco' 3 | size: [8, 8] 4 | aruco_dict: '4X4_1000' 5 | square_length: 0.07 6 | marker_length: 0.056 7 | 8 | min_rows: 1 9 | min_points: 6 10 | 11 | boards: 12 | charuco_A1_0: 13 | aruco_offset: 0 14 | 15 | charuco_A1_1: 16 | aruco_offset: 220 17 | 18 | charuco_A1_2: 19 | aruco_offset: 440 20 | 21 | charuco_A1_3: 22 | aruco_offset: 660 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /mocap_lib/calibration/example_boards/charuco_7x5.yaml: -------------------------------------------------------------------------------- 1 | boards: 2 | charuco_7x5: 3 | _type_: charuco 4 | size: [7, 5] 5 | aruco_dict: 4X4_250 6 | 7 | square_length: 0.13 8 | marker_length: 0.104 9 | 10 | min_rows: 1 11 | min_points: 6 12 | 13 | aruco_params: 14 | adaptiveThreshWinSizeMin: 3 15 | adaptiveThreshWinSizeMax: 23 16 | adaptiveThreshWinSizeStep: 1 17 | minMarkerPerimeterRate: 0.01 18 | maxMarkerPerimeterRate: 4.0 19 | perspectiveRemovePixelPerCell: 1 20 | -------------------------------------------------------------------------------- /mocap_lib/calibration/example_boards/charuco_A1_44.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | _type_: 'charuco' 3 | size: [4, 4] 4 | aruco_dict: '4X4_250' 5 | square_length: 0.28 6 | marker_length: 0.224 7 | 8 | min_rows: 2 9 | min_points: 4 10 | 11 | boards: 12 | charuco_A1_0: 13 | aruco_offset: 0 14 | 15 | charuco_A1_1: 16 | aruco_offset: 50 17 | 18 | charuco_A1_2: 19 | aruco_offset: 100 20 | 21 | charuco_A1_3: 22 | aruco_offset: 150 23 | 24 | charuco_A1_4: 25 | aruco_offset: 200 26 | 27 | -------------------------------------------------------------------------------- /mocap_lib/calibration/example_boards/charuco_A1_88.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | _type_: 'charuco' 3 | size: [8, 8] 4 | aruco_dict: '4X4_1000' 5 | square_length: 0.07 6 | marker_length: 0.056 7 | 8 | min_rows: 1 9 | min_points: 6 10 | 11 | boards: 12 | charuco_A1_0: 13 | aruco_offset: 0 14 | 15 | charuco_A1_1: 16 | aruco_offset: 220 17 | 18 | charuco_A1_2: 19 | aruco_offset: 440 20 | 21 | charuco_A1_3: 22 | aruco_offset: 660 23 | 24 | charuco_A1_4: 25 | aruco_offset: 880 26 | 27 | 28 | -------------------------------------------------------------------------------- /mocap_lib/calibration/example_boards/intrinsic/charuco_A1_88_intri_1.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | _type_: 'charuco' 3 | size: [8, 8] 4 | aruco_dict: '4X4_1000' 5 | square_length: 0.07 6 | marker_length: 0.056 7 | 8 | min_rows: 1 9 | min_points: 6 10 | 11 | boards: 12 | charuco_A1_0: 13 | aruco_offset: 0 14 | 15 | 16 | #aruco_params: 17 | # adaptiveThreshWinSizeMin: 3 18 | # adaptiveThreshWinSizeMax: 23 19 | # adaptiveThreshWinSizeStep: 1 20 | # minMarkerPerimeterRate: 0.01 21 | # maxMarkerPerimeterRate: 4.0 22 | # perspectiveRemovePixelPerCell: 1 23 | -------------------------------------------------------------------------------- /gpt_lib/code_example/chatglm6b_demo.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/4/12 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from transformers import AutoTokenizer, AutoModel 6 | 7 | tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 8 | model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() 9 | model = model.eval() 10 | response, history = model.chat(tokenizer, "你好", history=[]) 11 | print(response) 12 | response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history) 13 | print(response) -------------------------------------------------------------------------------- /gpt_lib/langchain/utils.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/9 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import torch 6 | 7 | def torch_gc(): 8 | if torch.cuda.is_available(): 9 | # with torch.cuda.device(DEVICE): 10 | torch.cuda.empty_cache() 11 | torch.cuda.ipc_collect() 12 | elif torch.backends.mps.is_available(): 13 | try: 14 | from torch.mps import empty_cache 15 | empty_cache() 16 | except Exception as e: 17 | print(e) 18 | print("如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本,以支持及时清理 torch 产生的内存占用。") -------------------------------------------------------------------------------- /hand_lib/README.md: -------------------------------------------------------------------------------- 1 | ### Hand Detect 2 | 3 | - hand detector d2 (from [detector.d2](https://github.com/ddshan/hand_detector.d2) based detectron2) 4 | - hand detector from [mediapipe](https://github.com/google/mediapipe) 5 | - hand detector based by yolox (self-trained by [mmdetection](https://github.com/ykk648/mmdetection)) 6 | 7 | ### Hand Mesh Recovery 8 | 9 | - IK model from [minimal-hands](https://github.com/MengHao666/Minimal-Hand-pytorch) 10 | 11 | ### Hand Regress 12 | 13 | - H3DWModel from [frankmocap](https://github.com/facebookresearch/frankmocap/blob/bb05b851bc3f1e27a55fd15e9f46093e7c05fc12/handmocap/hand_mocap_api.py#L44) (convert 2 onnx) -------------------------------------------------------------------------------- /audio_lib/tts/bark_example.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/4/27 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | https://github.com/suno-ai/bark 7 | """ 8 | from bark import SAMPLE_RATE, generate_audio, preload_models 9 | from IPython.display import Audio 10 | 11 | # download and load all models 12 | preload_models() 13 | 14 | # generate audio from text 15 | text_prompt = """ 16 | Hello, my name is Suno. And, uh — and I like pizza. [laughs] 17 | But I also have other interests such as playing tic tac toe. 18 | """ 19 | audio_array = generate_audio(text_prompt) 20 | 21 | # play text in notebook 22 | Audio(audio_array, rate=SAMPLE_RATE) -------------------------------------------------------------------------------- /seg_lib/README.md: -------------------------------------------------------------------------------- 1 | ## Segmentation Lib 2 | 3 | 4 | ### carvekit 5 | 6 | - [image-background-remove-tool](https://github.com/OPHoperHPO/image-background-remove-tool) API example, for cloth seg. 7 | 8 | ### cihp_pgn 9 | 10 | - [CIHP_PGN](https://github.com/Engineering-Course/CIHP_PGN) Human segmentation, model converted to onnx 11 | 12 | ### u2net 13 | 14 | - [rembg](https://github.com/danielgatis/rembg) 15 | - [cv_u2net_salient-detection](https://www.modelscope.cn/models/damo/cv_u2net_salient-detection/summary) 16 | 17 | ### ppmattingv2 18 | 19 | - [pp_mattingv2](https://github.com/jiachen0212/pp_mattingv2) 20 | 21 | ### RAFT 22 | 23 | - [RAFT](https://github.com/princeton-vl/RAFT) 24 | -------------------------------------------------------------------------------- /data_lib/dataset_tools.py: -------------------------------------------------------------------------------- 1 | from data_lib.dataset_preprocess import gen_txt_from_path 2 | 3 | """ 4 | generate dataset list as follow formats: 5 | |-- dataset 6 | |-- train 7 | |--label1 8 | |--dataset prefix 1 9 | |--*.jpg 10 | |--dataset prefix 2 11 | |--*.jpg 12 | |--label2 13 | |--dataset prefix 14 | |--*.jpg 15 | |--label 16 | |--dataset prefix 17 | |--*.jpg 18 | ... 19 | 20 | |-- val 21 | |--*.jpg 22 | |--train.txt 23 | |--val.txt 24 | """ 25 | 26 | if __name__ == '__main__': 27 | test_path = '' 28 | # count_mean_std(test_path) 29 | gen_txt_from_path(test_path, img_format='jpg', train_ratio=0.8) 30 | -------------------------------------------------------------------------------- /mocap_lib/calibration/calibration_by_multical.sh: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/5/16 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | # install 7 | pip install multical 8 | 9 | # separate 10 | multical intrinsic --image_path ./ --boards ./example_boards/intrinsic/charuco_A1_88_intri_4.yaml 11 | multical calibrate --image_path ./ --calibration ./intrinsic.json --fix_intrinsic --boards ./example_boards/charuco_A1_44.yaml 12 | 13 | # generate board 14 | multical boards --boards ./example_boards/charuco_A1_44.yaml --paper_size A1 --pixels_mm 10 --write my_images 15 | 16 | # intrinsic and extrinsic 17 | multical calibrate --image_path ./ --boards ./example_boards/charuco_A1_44.yaml --limit_images 200 --fix_aspect 18 | multical vis --workspace_file calibration.pkl 19 | -------------------------------------------------------------------------------- /sd_lib/controlnet/utils.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/10/23 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import numpy as np 6 | import torch 7 | from cv2box import CVImage 8 | 9 | 10 | def make_inpaint_condition(image_p, image_mask_p): 11 | image = CVImage(image_p).pillow() 12 | image_mask = CVImage(image_mask_p).pillow() 13 | image = np.array(image.convert("RGB")).astype(np.float32) / 255.0 14 | image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0 15 | assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size" 16 | image[image_mask > 0.5] = -1.0 # set as masked pixel 17 | image = np.expand_dims(image, 0).transpose((0, 3, 1, 2)) 18 | image = torch.from_numpy(image) 19 | return image 20 | -------------------------------------------------------------------------------- /art_lib/style_transfer/dct_net/utils.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/1/17 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import numpy as np 6 | import cv2 7 | 8 | 9 | def resize_size(image, size=720): 10 | h, w, c = np.shape(image) 11 | if min(h, w) > size: 12 | if h > w: 13 | h, w = int(size * h / w), size 14 | else: 15 | h, w = size, int(size * w / h) 16 | image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA) 17 | return image 18 | 19 | 20 | def padTo16x(image): 21 | h, w, c = np.shape(image) 22 | if h % 16 == 0 and w % 16 == 0: 23 | return image, h, w 24 | nh, nw = (h // 16 + 1) * 16, (w // 16 + 1) * 16 25 | img_new = np.ones((nh, nw, 3), np.uint8) * 255 26 | img_new[:h, :w, :] = image 27 | 28 | return img_new, h, w 29 | -------------------------------------------------------------------------------- /math_lib/affine_matrix.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/11/11 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import numpy as np 6 | 7 | 8 | def inverse_cv_affine(mat): 9 | """ 10 | similar to mat_rev = cv2.invertAffineTransform(mat) 11 | Args: 12 | mat: 13 | Returns: 14 | """ 15 | # inverse the Affine transformation matrix 16 | mat_rev = np.zeros([2, 3]) 17 | div1 = mat[0][0] * mat[1][1] - mat[0][1] * mat[1][0] 18 | mat_rev[0][0] = mat[1][1] / div1 19 | mat_rev[0][1] = -mat[0][1] / div1 20 | mat_rev[0][2] = -(mat[0][2] * mat[1][1] - mat[0][1] * mat[1][2]) / div1 21 | div2 = mat[0][1] * mat[1][0] - mat[0][0] * mat[1][1] 22 | mat_rev[1][0] = mat[1][0] / div2 23 | mat_rev[1][1] = -mat[0][0] / div2 24 | mat_rev[1][2] = -(mat[0][2] * mat[1][0] - mat[0][0] * mat[1][2]) / div2 25 | -------------------------------------------------------------------------------- /mocap_lib/README.md: -------------------------------------------------------------------------------- 1 | ## Mocap Lib 2 | 3 | ### Body Regress 4 | 5 | - [SPIN](https://github.com/open-mmlab/mmhuman3d/tree/main/configs/spin/) onnx model 6 | 7 | ### Whole Body Keypoints Detect 8 | 9 | - mediapipe wrapper / [mmpose](https://github.com/open-mmlab/mmpose) model support 10 | 11 | ### Calibration 12 | 13 | - [multical](https://github.com/oliver-batchelor/multical) 14 | 15 | ### Middleware 16 | 17 | - [VMC](https://protocol.vmc.info/) protocol demo. 18 | 19 | ### Smooth Filter 20 | 21 | - [SmoothNet](https://github.com/cure-lab/SmoothNet) 22 | - OneEuro 23 | 24 | ### Triangulation 25 | 26 | - [anipose method](https://github.com/lambdaloop/anipose) 27 | - [easymocap method](https://github.com/zju3dv/EasyMocap) 28 | - [pose2sim method](https://github.com/perfanalytics/pose2sim/blob/main/Pose2Sim/triangulate_3d.py) 29 | 30 | ### Visualize 31 | 32 | - [poseviz](https://github.com/isarandi/poseviz) demo for mediapipe. -------------------------------------------------------------------------------- /ocr_lib/paddle_excel.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/6/12 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import cv2 6 | from paddleocr import PPStructure, draw_structure_result, save_structure_res 7 | import os 8 | 9 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" 10 | table_engine = PPStructure(show_log=True) 11 | 12 | save_folder = 'output' 13 | img_path = '' 14 | img = cv2.imread(img_path) 15 | result = table_engine(img) 16 | save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0]) 17 | 18 | for line in result: 19 | line.pop('img') 20 | print(line) 21 | 22 | from PIL import Image 23 | 24 | font_path = './pretrain_models/ocr_lib/simfang.ttf' # PaddleOCR下提供字体包 25 | image = Image.open(img_path).convert('RGB') 26 | im_show = draw_structure_result(image, result, font_path=font_path) 27 | im_show = Image.fromarray(im_show) 28 | im_show.save('result_structure.jpg') -------------------------------------------------------------------------------- /math_lib/k_means.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/13 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | def cal_dis(points, centroids, k): 10 | cal_dis_res = [] 11 | for point in points: 12 | dis = np.linalg.norm(np.tile(point, (k, 1)) - centroids) 13 | cal_dis_res.append(dis) 14 | return cal_dis_res 15 | 16 | 17 | def update_centroids(points, centroids, k): 18 | cal_dis_list = cal_dis(points, centroids, k) 19 | min_cal_dis_list = np.argmin(cal_dis_list, axis=1) 20 | new_centroids = pd.DataFrame(points).groupby(min_cal_dis_list).mean() 21 | diff = new_centroids - centroids 22 | return new_centroids, diff 23 | 24 | 25 | def k_means(points, k): 26 | centroids = points.sample(k) 27 | # use min diff or optim fix rounds 28 | for i in range(100): 29 | centroids, _ = update_centroids(points, centroids, k) 30 | return centroids 31 | -------------------------------------------------------------------------------- /mocap_lib/calibration/gopro_wifi_reader.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/5/23 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | # This is the script without the need of a FFmpeg installation, pure OpenCV 7 | # This is not useful for image processing (eg: find faces) as there will be more lag, around 6 seconds added. 8 | import cv2 9 | import numpy as np 10 | from time import time 11 | import socket 12 | from goprocam import GoProCamera 13 | from goprocam import constants 14 | gpCam = GoProCamera.GoPro() 15 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 16 | t = time() 17 | gpCam.livestream("start") 18 | cap = cv2.VideoCapture("udp://10.5.5.9:8554") 19 | while True: 20 | nmat, frame = cap.read() 21 | cv2.imshow("GoPro OpenCV", frame) 22 | if cv2.waitKey(1) & 0xFF == ord('q'): 23 | break 24 | if time() - t >= 2.5: 25 | sock.sendto("_GPHD_:0:0:2:0.000000\n".encode(), ("10.5.5.9", 8554)) 26 | t = time() 27 | 28 | cap.release() 29 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /mocap_lib/skeleton_transfer/openpose_lib.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/25 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | mediapipe33_to_openpose25 = [0, 0, 12, 14, 16, 11, 13, 15, 0, 24, 26, 28, 23, 25, 27, 5, 2, 8, 7, 31, 31, 29, 7 | 32, 32, 30] 8 | 9 | alphapose17_to_openpose25 = [9, 8, 14, 15, 16, 11, 12, 13, 0, 1, 2, 3, 4, 5, 6, -1, -1, -1, 10 | -1, -1, -1, -1, -1, -1, -1, ] 11 | 12 | 13 | class Openpose25: 14 | def __init__(self, poses=None): 15 | self.poses = poses 16 | 17 | def from_mediapipe_33(self, poses): 18 | """ 19 | 20 | Args: 21 | poses: 33 * 3 22 | 23 | Returns: 25 * 3 24 | 25 | """ 26 | poses = poses[mediapipe33_to_openpose25] 27 | poses[8, :2] = poses[[9, 12], :2].mean(axis=0) 28 | poses[8, 2] = poses[[9, 12], 2].min(axis=0) 29 | poses[1, :2] = poses[[2, 5], :2].mean(axis=0) 30 | poses[1, 2] = poses[[2, 5], 2].min(axis=0) 31 | return poses 32 | -------------------------------------------------------------------------------- /art_lib/talking_head/wav2lip/audio_encoder.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/7/25 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import os 6 | # import numba 7 | import numpy as np 8 | from apstone import ModelBase 9 | from cv2box import CVImage 10 | from scipy.spatial import ConvexHull 11 | import cv2 12 | """ 13 | input_name:['input_1'], shape:[[1, 32, 1, 80, 16]] 14 | output_name:['output_1'], shape:[[1, 32, 512]] 15 | """ 16 | 17 | MODEL_ZOO = { 18 | # 32 frame , mel spectrogram 19 | # input_name: ['input_1'], shape: [[1, 32, 1, 80, 16]] 20 | # output_name: ['output_1'], shape: [[1, 32, 512]] 21 | 'audio_encoder': { 22 | 'model_path': 'pretrain_models/talking_head/wav2lip/audio_encoder.onnx', 23 | }, 24 | } 25 | 26 | 27 | class Audio2PoseDecoder(ModelBase): 28 | def __init__(self, model_type='audio_encoder', provider='cpu'): 29 | super().__init__(MODEL_ZOO[model_type], provider) 30 | self.model_type = model_type 31 | 32 | def forward(self, img_source, img_driving, pass_drive_kp=False): 33 | pass -------------------------------------------------------------------------------- /ocr_lib/paddle_ocr.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/23 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | https://github.com/PaddlePaddle/PaddleOCR 7 | """ 8 | from paddleocr import PaddleOCR, draw_ocr 9 | 10 | # Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换 11 | # 例如`ch`, `en`, `fr`, `german`, `korean`, `japan` 12 | ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory 13 | img_path = '' 14 | result = ocr.ocr(img_path, cls=True) 15 | for idx in range(len(result)): 16 | res = result[idx] 17 | for line in res: 18 | print(line) 19 | 20 | # 显示结果 21 | # 如果本地没有simfang.ttf,可以在doc/fonts目录下下载 22 | from PIL import Image 23 | result = result[0] 24 | image = Image.open(img_path).convert('RGB') 25 | boxes = [line[0] for line in result] 26 | txts = [line[1][0] for line in result] 27 | scores = [line[1][1] for line in result] 28 | im_show = draw_ocr(image, boxes, txts, scores, font_path='./pretrain_models/ocr_lib/simfang.ttf') 29 | im_show = Image.fromarray(im_show) 30 | im_show.save('result2.jpg') 31 | -------------------------------------------------------------------------------- /utils/ai_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import PIL 3 | import torch 4 | import torch.nn.functional as F 5 | import time 6 | import numpy as np 7 | import uuid 8 | from pathlib import Path 9 | import pickle 10 | 11 | 12 | def load_checkpoint(model, filename): 13 | return model.load_state_dict(torch.load(filename)) 14 | 15 | 16 | def make_random_name(f_name): 17 | return uuid.uuid4().hex + '.' + f_name.split('.')[-1] 18 | 19 | 20 | def down_sample(target_, size): 21 | return F.interpolate(target_, size=size, mode='bilinear', align_corners=True) 22 | 23 | 24 | class MyTimer(object): 25 | """ 26 | timer 27 | """ 28 | 29 | def __enter__(self): 30 | self.t0 = time.time() 31 | 32 | def __exit__(self, exc_type, exc_val, exc_tb): 33 | print('[finished, spent time: {time:.2f}s]'.format(time=time.time() - self.t0)) 34 | 35 | 36 | def get_path_by_ext(this_dir, ext_list=None): 37 | if ext_list is None: 38 | print('Use image ext as default !') 39 | ext_list = [".jpg", ".png", ".JPG", ".webp", ".jpeg"] 40 | return [p for p in Path(this_dir).rglob('*') if p.suffix in ext_list] 41 | -------------------------------------------------------------------------------- /art_lib/talking_head/sadtalker/audio_2_pose.py: -------------------------------------------------------------------------------- 1 | import os 2 | # import numba 3 | import numpy as np 4 | from apstone import ModelBase 5 | from cv2box import CVImage 6 | from scipy.spatial import ConvexHull 7 | import cv2 8 | 9 | """ 10 | ref https://github.com/OpenTalker/SadTalker/blob/main/src/audio2pose_models/cvae.py 11 | """ 12 | 13 | MODEL_ZOO = { 14 | # input: 32frames z(random) class(style) ref(reference coeff) audio_emb(audio feature) 15 | # input_name: ['input_1', 'input_2', 'input_3', 'input_4'], shape: [[1, 6], [1], [1, 64], [1, 32, 512]] 16 | # output: pose_motion_pred 17 | # output_name: ['output_1'], shape: [[1, 32, 6]] 18 | 'audio2pose_decoder': { 19 | 'model_path': 'pretrain_models/art_lib/talking_head/sadtalker/audio_2_pose.onnx', 20 | }, 21 | } 22 | 23 | 24 | class Audio2PoseDecoder(ModelBase): 25 | def __init__(self, model_type='audio2pose_decoder', provider='cpu'): 26 | super().__init__(MODEL_ZOO[model_type], provider) 27 | self.model_type = model_type 28 | 29 | def forward(self, img_source, img_driving, pass_drive_kp=False): 30 | pass 31 | 32 | 33 | if __name__ == '__main__': 34 | pass 35 | -------------------------------------------------------------------------------- /data_lib/dataset_preprocess/count_mean_std.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | from tqdm import tqdm 5 | from utils import get_path_by_ext 6 | 7 | 8 | def count_mean_std(img_dir_path): 9 | # path = img_path 10 | means = [0, 0, 0] 11 | stdevs = [0, 0, 0] 12 | 13 | # index = 1 14 | num_imgs = 0 15 | # img_names = os.listdir(path) 16 | for img_path in tqdm(get_path_by_ext(img_dir_path)): 17 | num_imgs += 1 18 | # print(img_name) 19 | img = cv2.imread(str(img_path)) 20 | img = np.asarray(img) 21 | img = img.astype(np.float32) # / 255. 22 | for i in range(3): 23 | means[i] += img[:, :, i].mean() 24 | stdevs[i] += img[:, :, i].std() 25 | # print(num_imgs) 26 | means.reverse() 27 | stdevs.reverse() 28 | 29 | means = np.asarray(means) / num_imgs 30 | stdevs = np.asarray(stdevs) / num_imgs 31 | 32 | print("normMean = {}".format(means)) 33 | print("normStd = {}".format(stdevs)) 34 | print('transforms.Normalize(normMean={},normStd = {})'.format(means, stdevs).replace(' ', ',')) 35 | 36 | 37 | if __name__ == '__main__': 38 | count_mean_std('') 39 | -------------------------------------------------------------------------------- /data_lib/dataset_convert/coco-annotator_2_coco-mmpose.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from cv2box import CVFile 7 | from tqdm import tqdm 8 | 9 | 10 | def get_coco_bbox_gt(json_in_, json_out_): 11 | json_data = CVFile(json_in_).data 12 | 13 | out_list = [] 14 | for i in tqdm(range(len(json_data['annotations']))): 15 | dummy = json_data['annotations'][i] 16 | if dummy['category_id'] == 1: 17 | out_list.append({ 18 | 'bbox': dummy['bbox'], 19 | 'category_id': dummy['category_id'], 20 | 'image_id': dummy['image_id'], 21 | 'score': 1.0, 22 | }) 23 | print(len(out_list)) 24 | CVFile(json_out_).json_write(out_list) 25 | 26 | 27 | def del_other_category(json_in_, json_out_): 28 | json_data = CVFile(json_in_).data 29 | out_data = json_data.copy() 30 | out_data['annotations'] = [] 31 | print(len(json_data['annotations'])) 32 | for i in tqdm(range(len(json_data['annotations']))): 33 | dummy = json_data['annotations'][i] 34 | if dummy['category_id'] == 1 and 'keypoints' in dummy.keys(): 35 | out_data['annotations'].append(dummy) 36 | 37 | print(len(out_data['annotations'])) 38 | CVFile(json_out_).json_write(out_data) 39 | 40 | 41 | if __name__ == '__main__': 42 | json_in = '' 43 | json_out = '' 44 | # get_coco_bbox_gt(json_in, json_out) 45 | del_other_category(json_in, json_out) 46 | -------------------------------------------------------------------------------- /gpt_lib/code_example/openai_gpt3_demo.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/2/6 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | import os 7 | import openai as ai 8 | 9 | 10 | # Get the key from an environment variable on the machine it is running on 11 | # ai.api_key = os.environ.get("OPENAI_API_KEY") 12 | 13 | 14 | def generate_gpt3_response(user_text, print_output=False): 15 | """ 16 | Query OpenAI GPT-3 for the specific key and get back a response 17 | :type user_text: str the user's text to query for 18 | :type print_output: boolean whether or not to print the raw output JSON 19 | """ 20 | completions = ai.Completion.create( 21 | engine='text-davinci-003', # Determines the quality, speed, and cost. 22 | temperature=0.5, # Level of creativity in the response 23 | prompt=user_text, # What the user typed in 24 | max_tokens=3500, # Maximum tokens in the prompt AND response 25 | n=1, # The number of completions to generate 26 | stop=None, # An optional setting to control response generation 27 | ) 28 | 29 | # Displaying the output can be helpful if things go wrong 30 | if print_output: 31 | print(completions) 32 | 33 | # Return the first choice's text 34 | return completions.choices[0].text 35 | 36 | 37 | if __name__ == '__main__': 38 | os.environ.setdefault("OPENAI_API_KEY", '') 39 | ai.api_key = os.environ.get("OPENAI_API_KEY") 40 | 41 | text = '写一篇乔·拜登的演讲稿' 42 | results = generate_gpt3_response(text) 43 | print(results) 44 | # print(results.decode()) 45 | -------------------------------------------------------------------------------- /hand_lib/hand_mesh/minimal_hands/ik_model.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/2/14 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from apstone import ONNXModel 6 | import numpy as np 7 | from hand_lib.hand_mesh.minimal_hands.kinematics import xyz_to_delta, MPIIHandJoints, mano_to_mpii 8 | from cv2box import CVFile 9 | 10 | IK_UNIT_LENGTH = 0.09473151311686484 11 | mano_ref_xyz = CVFile('pretrain_models/digital_human/minimal_hands/hand_mesh_model.pkl').data['joints'] 12 | # convert the kinematic definition to MPII style, and normalize it 13 | mpii_ref_xyz = mano_to_mpii(mano_ref_xyz) / IK_UNIT_LENGTH 14 | mpii_ref_xyz -= mpii_ref_xyz[9:10] 15 | # get bone orientations in the reference pose 16 | mpii_ref_delta, mpii_ref_length = xyz_to_delta(mpii_ref_xyz, MPIIHandJoints) 17 | mpii_ref_delta = mpii_ref_delta * mpii_ref_length 18 | 19 | 20 | class IKModel: 21 | def __init__(self): 22 | self.ik_model = ONNXModel('pretrain_models/digital_human/minimal_hands/iknet/iknet.onnx') 23 | 24 | def forward_np(self, hand_np): 25 | # xyz = np.array(hand_np) 26 | delta, length = xyz_to_delta(hand_np, MPIIHandJoints) 27 | delta *= length 28 | pack = np.concatenate( 29 | [hand_np, delta, mpii_ref_xyz, mpii_ref_delta], 0 30 | ) 31 | return self.forward(pack) 32 | 33 | def forward(self, pack): 34 | pack = np.expand_dims(pack, 0) 35 | theta = self.ik_model.forward(pack.astype(np.float32))[0] 36 | # theta_mano = mpii_to_mano(theta) 37 | if len(theta.shape) == 3: 38 | theta = theta[0] 39 | return theta 40 | -------------------------------------------------------------------------------- /math_lib/wrap_affine.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/12/28 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import cv2 6 | import numpy as np 7 | from cv2box import CVImage, MyTimer 8 | import cvcuda 9 | import nvcv 10 | import torch 11 | 12 | # opencv default version 13 | img_p = '' 14 | img = CVImage(img_p).bgr 15 | crop_size = 256 16 | mat_ = np.array([[1.77893761e-01, 2.47390154e-03, -9.42742635e+01], [-2.47390154e-03, 1.77893761e-01, -3.40511541e+01]]) 17 | mat_rev = cv2.invertAffineTransform(mat_) 18 | with MyTimer() as mfc: 19 | for i in range(10000): # 31.5 fps 20 | warped = cv2.warpAffine(img, mat_, (crop_size, crop_size), borderValue=0.0) 21 | CVImage(warped).show() 22 | 23 | 24 | # opencv cuda version 25 | image_tensors = torch.tensor(CVImage(img_p).bgr).unsqueeze(0).cuda() 26 | # print(image_tensors) 27 | print(image_tensors.size()) 28 | cvcuda_input_tensor = cvcuda.as_tensor(image_tensors, "NHWC") 29 | cvcuda_output_tensor = cvcuda.Tensor([1, 256, 256, 3], np.uint8, "NHWC") 30 | print(cvcuda_input_tensor.shape) 31 | cvcuda_affine_tensor = cvcuda.warp_affine_into(src=cvcuda_input_tensor, dst=cvcuda_output_tensor, xform=mat_rev, 32 | flags=cvcuda.Interp.LINEAR, border_mode=cvcuda.Border.CONSTANT, 33 | border_value=[0]) 34 | print(cvcuda_affine_tensor.shape) 35 | print(type(cvcuda_affine_tensor)) 36 | print(np.array(cvcuda_affine_tensor)) 37 | 38 | # torch.tensor(cvcuda_output_tensor.cuda()).data_ptr() 39 | img_out = cvcuda.as_image(cvcuda_output_tensor.cuda(), format=cvcuda.Format.BGR8) 40 | img_out = img_out.cpu() 41 | print(img_out.shape) 42 | CVImage(img_out).show() 43 | -------------------------------------------------------------------------------- /gpt_lib/code_example/openai_azure_demo.py: -------------------------------------------------------------------------------- 1 | 2 | import openai # 需要pip安装 3 | import prompt_toolkit # 需要额外安装这个库,用于命令行交互 4 | 5 | openai.api_type = "azure" 6 | openai.api_base = "https://shanghai-free-test.openai.azure.com/" # 这里需要根据自己的资源进行更改 7 | # openai.api_version = "2022-12-01" 8 | openai.api_version = "2023-03-15-preview" 9 | 10 | # 配置OpenAI API密钥 11 | openai.api_key = '' # 这里根据自己的API KEY更改 12 | # 设定OpenAI的模型和引擎 13 | model_engine = "gpt-35-turbo" # 这里就是创建的模型名称更改 14 | prompt_prefix = "我: " 15 | response_prefix = "AI: " 16 | 17 | # import requests 18 | # url = openai.api_base + "/openai/deployments?api-version=2022-12-01" 19 | # r = requests.get(url, headers={"api-key": openai.api_key}) 20 | # print(r.text) 21 | 22 | # 定义一个函数,用于向OpenAI API发送请求并返回结果 23 | def generate_response(prompt): 24 | response = openai.Completion.create( 25 | engine=model_engine, 26 | prompt=prompt, 27 | max_tokens=1024, 28 | n=1, 29 | stop=["\n"], 30 | temperature=0, 31 | ) 32 | message = response.choices[0].text 33 | return message.strip() 34 | 35 | 36 | # 通过Prompt Toolkit库来实现命令行交互 37 | def prompt_user(): 38 | while True: 39 | try: 40 | # 读取用户输入的信息 41 | user_input = prompt_toolkit.prompt(prompt_prefix) 42 | # user_input = 'What\'s the difference between garbanzo beans and chickpeas? ' 43 | print(user_input) 44 | # 将用户输入发送给OpenAI API,并返回结果 45 | response = generate_response(user_input) 46 | # 打印OpenAI API返回的结果 47 | print(response_prefix + response) 48 | except KeyboardInterrupt: 49 | # 如果用户按下Ctrl-C,则退出程序 50 | print("\n再见!") 51 | break 52 | 53 | 54 | # 运行程序 55 | if __name__ == "__main__": 56 | prompt_user() 57 | -------------------------------------------------------------------------------- /seg_lib/carvekit/carvekit_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/6/27 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | ref https://github.com/OPHoperHPO/image-background-remove-tool 7 | """ 8 | import os 9 | import numpy as np 10 | from PIL import Image, ImageOps 11 | from carvekit.web.schemas.config import MLConfig 12 | from carvekit.web.utils.init_utils import init_interface 13 | 14 | SHOW_FULLSIZE = False # param {type:"boolean"} 15 | PREPROCESSING_METHOD = "none" # param ["stub", "none"] 16 | SEGMENTATION_NETWORK = "tracer_b7" # param ["u2net", "deeplabv3", "basnet", "tracer_b7"] 17 | POSTPROCESSING_METHOD = "fba" # param ["fba", "none"] 18 | SEGMENTATION_MASK_SIZE = 640 # param ["640", "320"] {type:"raw", allow-input: true} 19 | TRIMAP_DILATION = 30 # param {type:"integer"} 20 | TRIMAP_EROSION = 5 # param {type:"integer"} 21 | DEVICE = 'cuda' # 'cuda' 'cpu' 22 | 23 | config = MLConfig(segmentation_network=SEGMENTATION_NETWORK, 24 | preprocessing_method=PREPROCESSING_METHOD, 25 | postprocessing_method=POSTPROCESSING_METHOD, 26 | seg_mask_size=SEGMENTATION_MASK_SIZE, 27 | trimap_dilation=TRIMAP_DILATION, 28 | trimap_erosion=TRIMAP_EROSION, 29 | device=DEVICE) 30 | 31 | interface = init_interface(config) 32 | 33 | imgs = [''] 34 | 35 | images = interface(imgs) 36 | for i, im in enumerate(images): 37 | img = np.array(im) 38 | img = img[..., :3] # no transparency 39 | idx = (img[..., 0] == 130) & (img[..., 1] == 130) & (img[..., 2] == 130) # background 0 or 130, just try it 40 | img = np.ones(idx.shape) * 255 41 | img[idx] = 0 42 | im = Image.fromarray(np.uint8(img), 'L') 43 | im.save(f'./{imgs[i].split("/")[-1].split(".")[0]}.jpg') 44 | -------------------------------------------------------------------------------- /gpt_lib/models/llm_base.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/12 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import torch 6 | from transformers import GenerationConfig 7 | 8 | 9 | class LLM: 10 | def __init__(self, model_info, load_in_8bit, device_map): 11 | self.model_path = model_info['model_path'] 12 | if 'self.config' not in locals(): 13 | self.config = \ 14 | model_info['config'].from_pretrained(self.model_path, return_unused_kwargs=True, 15 | trust_remote_code=True)[0] 16 | self.model = model_info['model'].from_pretrained(self.model_path, trust_remote_code=True, 17 | load_in_8bit=load_in_8bit, device_map=device_map()) 18 | self.tokenizer = model_info['tokenizer'].from_pretrained(self.model_path, trust_remote_code=True) 19 | self.prompt_template = model_info['prompt_template'] 20 | 21 | def generate(self, *args, **kwargs): 22 | pass 23 | 24 | def generate_prompt(self, prompt_in): 25 | return self.prompt_template.format(instruction=prompt_in) 26 | 27 | def generate_base(self, prompt, generation_config=None, **kwargs, ): 28 | prompt_format = self.generate_prompt(prompt) 29 | inputs = self.tokenizer(prompt_format, return_tensors="pt") 30 | 31 | with torch.no_grad(): 32 | generation_output = self.model.generate( 33 | input_ids=inputs["input_ids"].cuda(), 34 | generation_config=generation_config, 35 | return_dict_in_generate=True, 36 | output_scores=True, 37 | ) 38 | s = generation_output.sequences[0] 39 | output = self.tokenizer.decode(s) 40 | return output.split("### Response:")[1].strip() 41 | -------------------------------------------------------------------------------- /mocap_lib/bbox_tracking/bbox_tracking.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/15 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from cv2box import CVBbox 7 | import numpy as np 8 | 9 | 10 | class BboxTracking: 11 | def __init__(self, image_shape_, batch=1): 12 | """ 13 | Args: 14 | image_shape_: (W,H) 15 | """ 16 | self.image_shape_ = image_shape_ 17 | self.batch_ = batch 18 | self.last_bbox_array = None 19 | 20 | def reset_condition(self, area_limit): 21 | """ 22 | judge bbox area by px^2 23 | """ 24 | if self.last_bbox_array is None: 25 | return self.last_bbox_array 26 | else: 27 | bbox_area = CVBbox(self.last_bbox_array).area() 28 | reset_index = np.where(bbox_area < area_limit) 29 | self.last_bbox_array[reset_index] = np.array([0, 0, self.image_shape_[0], self.image_shape_[1]]) 30 | return self.last_bbox_array 31 | 32 | def forward(self, keypoints_batch=None, margin=0.1, area_limit=1000): 33 | """ 34 | Args: 35 | keypoints_batch: N_view*N_kp*N_axis [N*3, N*3, ...] 36 | margin: 37 | area_limit: 38 | Returns: 39 | """ 40 | 41 | if not keypoints_batch: 42 | # part_w = int(self.image_shape_[0] * 1 / 3) 43 | # return [part_w, 0, 2 * part_w, self.image_shape_[1]] 44 | return np.array([[0, 0, self.image_shape_[0], self.image_shape_[1]]]).repeat(self.batch_, axis=0) 45 | else: 46 | self.last_bbox_array = np.array( 47 | [CVBbox(None).get_bbox_from_points(keypoints[:, :2], self.image_shape_, margin_ratio=margin) for 48 | keypoints in keypoints_batch]) 49 | return self.reset_condition(area_limit) 50 | -------------------------------------------------------------------------------- /gpt_lib/textsplitter/chinese_text_splitter.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/9 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | ref https://www.modelscope.cn/models/damo/nlp_bert_document-segmentation_chinese-base/summary 7 | """ 8 | from langchain.text_splitter import CharacterTextSplitter 9 | import re 10 | from typing import List 11 | from modelscope.pipelines import pipeline 12 | 13 | p = pipeline( 14 | task="document-segmentation", 15 | model='damo/nlp_bert_document-segmentation_chinese-base', 16 | device="cuda") 17 | 18 | 19 | class ChineseTextSplitter(CharacterTextSplitter): 20 | def __init__(self, pdf: bool = False, **kwargs): 21 | super().__init__(**kwargs) 22 | self.pdf = pdf 23 | 24 | def split_text(self, text: str, use_document_segmentation: bool = False) -> List[str]: 25 | # use_document_segmentation参数指定是否用语义切分文档,此处采取的文档语义分割模型为达摩院开源的nlp_bert_document-segmentation_chinese-base,论文见https://arxiv.org/abs/2107.09278 26 | # 如果使用模型进行文档语义切分,那么需要安装modelscope[nlp]:pip install "modelscope[nlp]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html 27 | if self.pdf: 28 | text = re.sub(r"\n{3,}", "\n", text) 29 | text = re.sub('\s', ' ', text) 30 | text = text.replace("\n\n", "") 31 | if use_document_segmentation: 32 | result = p(documents=text) 33 | sent_list = [i for i in result["text"].split("\n\t") if i] 34 | else: 35 | sent_sep_pattern = re.compile('([﹒﹔﹖﹗.。!?]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))') # del :; 36 | sent_list = [] 37 | for ele in sent_sep_pattern.split(text): 38 | if sent_sep_pattern.match(ele) and sent_list: 39 | sent_list[-1] += ele 40 | elif ele: 41 | sent_list.append(ele) 42 | return sent_list 43 | -------------------------------------------------------------------------------- /art_lib/inpainting/lama.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/7/28 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from cv2box import CVImage, MyFpsCounter 6 | from apstone import ModelBase 7 | import numpy as np 8 | 9 | MODEL_ZOO = { 10 | # ref https://github.com/Sanster/lama-cleaner/blob/main/lama_cleaner/model/lama.py 11 | # pytorch do not support ifft op: https://github.com/pytorch/pytorch/issues/81075 12 | # input : RGB 0-1 (1,3,H,W) (1,1,H,W) 13 | 'big_lama': { 14 | 'model_path': 'pretrain_models/art_lib/inpainting/big-lama.tjm' 15 | }, 16 | } 17 | 18 | 19 | class LAMA(ModelBase): 20 | def __init__(self, model_type='big_lama', provider='gpu'): 21 | super().__init__(MODEL_ZOO[model_type], provider) 22 | self.model_type = model_type 23 | 24 | def forward(self, image_, mask_): 25 | """ 26 | Args: 27 | image_: CVImage acceptable class (path BGR tensor byte PIL etc.) 28 | mask_: [H, W] 29 | Returns: [H, W, C] BGR 30 | """ 31 | 32 | image_in = CVImage(CVImage(image_).rgb()).tensor() 33 | mask_ = CVImage(CVImage(mask_).mask(rgb=True)).tensor() 34 | mask_in = (mask_ > 0) * 1 35 | inpainted_image_ = self.model.forward([image_in, mask_in]) 36 | inpainted_image_ = inpainted_image_[0].permute(1, 2, 0).detach().cpu().numpy() 37 | inpainted_image_ = np.clip(inpainted_image_ * 255, 0, 255).astype("uint8") 38 | inpainted_image_ = CVImage(inpainted_image_).rgb() # rgb2bgr 39 | return inpainted_image_ 40 | 41 | 42 | if __name__ == '__main__': 43 | image_p = 'resources/inpainting/dog_chair.png' 44 | mask_p = 'resources/inpainting/dog_chair_mask.png' 45 | 46 | fb_cur = LAMA(model_type='big_lama', provider='gpu') 47 | inpaint_result = fb_cur.forward(image_p, mask_p) 48 | print(inpaint_result.shape) 49 | CVImage(inpaint_result).show() 50 | -------------------------------------------------------------------------------- /hand_lib/hand_detect_and_estimate.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/3 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from hand_lib.hand_detector import HandDetectorYolox # MediapipeHand, IKModel, ThirdViewDetector 7 | from cv2box import CVVideoLoader, CVImage, CVFile, MyFpsCounter 8 | from tqdm import tqdm 9 | import cv2 10 | import numpy as np 11 | 12 | if __name__ == '__main__': 13 | video_p = '' 14 | 15 | hdy = HandDetectorYolox(thres=0.3) 16 | # hdd2 = ThirdViewDetector() 17 | # mph = MediapipeHand() 18 | # ikm = IKModel() 19 | 20 | with CVVideoLoader(video_p) as cvvl: 21 | for i in tqdm(range(len(cvvl))): 22 | _, frame = cvvl.get() 23 | # CVImage(frame).show() 24 | bboxs, show_image = hdy.forward(frame, show=True) 25 | # hdd2_results, show_image = hdd2.forward(frame, show=True) 26 | # CVImage(frame).show() 27 | # bboxs = np.array(hdd2_results["instances"].pred_boxes.tensor.to('cpu')) 28 | 29 | if len(bboxs) != 2: 30 | print(i) 31 | 32 | # frame_info = {'left': None, 'right': None} 33 | # for bbox_ in bboxs: 34 | # # print(bbox_) 35 | # frame_crop = crop_padding_and_resize(frame, bbox_) 36 | # # CVImage(frame_crop).show() 37 | # hand_xyz, side_label, side_label_score = mph.forward(frame_crop) 38 | # # print(hand_xyz, side_label) 39 | # if hand_xyz is None: 40 | # continue 41 | # theta = ikm.forward_np(np.array(hand_xyz)) 42 | # # print(theta) 43 | # 44 | # if side_label == 'Left': 45 | # frame_info['left'] = theta 46 | # elif side_label == 'Right': 47 | # frame_info['right'] = theta 48 | # CVFile('./yolox_mp_thres0_world_xyz/frame{}.pkl'.format(i)).pickle_write(frame_info) 49 | -------------------------------------------------------------------------------- /sd_lib/tagger/tagger_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/10/25 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from apstone import ModelBase 7 | from cv2box import CVImage, CVFile 8 | import numpy as np 9 | 10 | MODEL_ZOO = { 11 | # input_name:['input_1:0'], shape:[[1, 448, 448, 3]] 12 | # output_name:['predictions_sigmoid'], shape:[[1, 9083]] 13 | 'moat': { 14 | 'model_path': 'sd_models/tagger/SmilingWolf_wd-v1-4-moat-tagger-v2.onnx', 15 | 'tag_path': 'sd_models/tagger/selected_tags.csv', 16 | }, 17 | } 18 | 19 | del_list = ['no_humans', 'english_text', 'monochrome', 'greyscale', 'blurry', 'solo', 'horse'] 20 | 21 | 22 | class Tagger(ModelBase): 23 | def __init__(self, model_name='moat', provider='gpu'): 24 | super().__init__(MODEL_ZOO[model_name], provider) 25 | self.input_size = (448, 448) 26 | self.tags = CVFile(MODEL_ZOO[model_name]['tag_path']).data 27 | # print(self.tag_data) 28 | 29 | def forward(self, image_in_): 30 | image_in_ = CVImage(image_in_).resize(self.input_size).bgr 31 | image_in_ = image_in_[None, :].astype(np.float32) 32 | outputs = self.model.forward(image_in_)[0] 33 | outputs = 1 / (1 + np.exp(-outputs)) 34 | tags = {tag: float(conf) for tag, conf in zip(self.tags['name'][4:], outputs.flatten()[4:]) if 35 | float(conf) > 0.6} 36 | tags = sorted(tags.items(), key=lambda x: x[1], reverse=True) 37 | tags = [tag[0] for tag in tags] 38 | 39 | # for tag in tags: 40 | # if tag in del_list or tag.find('background') > 0: 41 | # tags.remove(tag) 42 | 43 | return ','.join(tags) 44 | 45 | 46 | if __name__ == '__main__': 47 | image_p = 'resources/for_sd/An_astronaut_is_riding_a_horse_on_Mars_seed-444264997.png' 48 | image_in = CVImage(image_p).bgr 49 | 50 | tagger = Tagger(model_name='moat') 51 | 52 | output = tagger.forward(image_in) 53 | print(output) 54 | -------------------------------------------------------------------------------- /mocap_lib/calibration/calibration_w_human.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/9/5 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | using human keypoints instead of charuco boards to do calibration 7 | """ 8 | import numpy as np 9 | from cv2box import CVFile, CVCamera 10 | import cv2 11 | import aniposelib 12 | 13 | human_body_height = 1.82 - 0.25 14 | side_bias = 1 / 15 * human_body_height 15 | 16 | used_kps_3d = np.array( 17 | [[0, -human_body_height, 0], [-side_bias, -1 / 2 * human_body_height, 0], 18 | [side_bias, -1 / 2 * human_body_height, 0], [-side_bias, -1 / 4 * human_body_height, 0], 19 | [side_bias, -1 / 4 * human_body_height, 0], [-side_bias, 0, 0], [side_bias, 0, 0], ]) 20 | 21 | frame = 120 22 | all_kps = [] 23 | used_kps_index = [1, 9, 12, 10, 13, 11, 14] 24 | cameras = [] 25 | cvc = CVCamera( 26 | multical_pkl_path='./0809cal/front_4_0809_window_1080.pkl') 27 | for camera_name in ['268', '617', '728', '886']: 28 | kp_p = './0906_pm/stand1/{}_2dkp.pkl'.format(camera_name) 29 | kps = CVFile(kp_p).data[frame][used_kps_index, :2] 30 | 31 | kp_p_2 = './0906_pm/walk2/{}_2dkp.pkl'.format( 32 | camera_name) 33 | all_kps.append(CVFile(kp_p_2).data) 34 | 35 | results = cv2.solvePnP(used_kps_3d, kps, cvc.intri_matrix()[camera_name], cvc.dist()[camera_name]) 36 | 37 | camera = aniposelib.cameras.Camera(name=camera_name, 38 | size=cvc.image_size()[camera_name], 39 | matrix=cvc.intri_matrix()[camera_name], 40 | rvec=results[1], 41 | tvec=results[2], 42 | dist=cvc.dist()[camera_name]) 43 | cameras.append(camera) 44 | 45 | cvc_empty = CVCamera() 46 | cvc_empty.camera_group = aniposelib.cameras.CameraGroup(cameras) 47 | final_camera_group = cvc_empty.bundle_adjust_iter(np.array(all_kps)) 48 | CVFile('./0906_pm/cgroup_from_human.pkl').pickle_write(final_camera_group) 49 | -------------------------------------------------------------------------------- /data_lib/dataset_convert/eric_yolov3_2_coco-mmdet.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/9/1 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | https://gitcode.net/EricLee/yolo_v3 7 | coco hand 8 | some error may exists, use 'yolo2coco.py' instead 9 | """ 10 | from cv2box import get_path_by_ext, CVFile, CVImage 11 | from tqdm import tqdm 12 | 13 | image_p = '' 14 | labels_p = '' 15 | coco_temp = '' 16 | 17 | coco_out = CVFile(coco_temp).data 18 | 19 | coco_out['images'] = [] 20 | coco_out['annotations'] = [] 21 | count = 10010 22 | count2 = 100101 23 | for image_p in tqdm(get_path_by_ext(image_p)): 24 | file_name = str(image_p.stem + image_p.suffix) 25 | image_p = str(image_p) 26 | height, width = CVImage(image_p).bgr.shape[0:2] 27 | image_label_p = image_p.replace('/images', '/labels').replace('.jpg', '.txt') 28 | # print(image_label_p) 29 | labels = CVFile(image_label_p).data 30 | # print(labels) 31 | coco_out['images'].append({ 32 | 'id': count, 33 | 'path': image_p[68:], 34 | 'width': width, 35 | 'height': height, 36 | 'file_name': file_name, 37 | }) 38 | for i in range(len(labels)): 39 | coco_out['annotations'].append({ 40 | 'image_id': count, 41 | 'id': count2, 42 | 'category_id': 0, 43 | 'bbox': [ 44 | int(float(str(labels[i]).split(' ')[1]) * width - float(str(labels[i]).split(' ')[3]) * width * 0.5), 45 | int(float(str(labels[i]).split(' ')[2]) * height - float(str(labels[i]).split(' ')[4][:-5]) * height * 0.5), 46 | int(float(str(labels[i]).split(' ')[3]) * width), 47 | int(float(str(labels[i]).split(' ')[4][:-5]) * height)], 48 | 'iscrowd': False, 49 | 'isbbox': True, 50 | 'area': int(float(str(labels[i]).split(' ')[3]) * width) * int(float(str(labels[i]).split(' ')[4][:-5]) * height) 51 | }) 52 | count2 += 1 53 | 54 | count += 1 55 | 56 | CVFile(coco_temp.replace('.json', '_out.json')).json_write(coco_out) 57 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/kp_detector_mmpose.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/17 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | https://modelscope.cn/models/damo/cv_hrnetv2w32_body-2d-keypoints_image/summary 7 | """ 8 | from cv2box import CVImage, MyFpsCounter 9 | from apstone import KpDetectorBase 10 | import cv2 11 | import numpy as np 12 | 13 | MODEL_ZOO = { 14 | # input_name:['input_1'], shape:[[1, 3, 128, 128]] 15 | # output_name:['output1'], shape:[[1, 15, 32, 32]] 16 | 'hrnetv2w32': { 17 | 'model_path': 'pretrain_models/body_lib/body_kp_detector/modelscope_hrnetv2w32.onnx', 18 | 'model_input_size': (128, 128) 19 | }, # w h 20 | } 21 | 22 | class BodyDetectorModelScope(KpDetectorBase): 23 | def __init__(self, model_type='r50', provider='gpu'): 24 | super().__init__(MODEL_ZOO[model_type], provider) 25 | self.dark_flag = model_type.find('dark') > 0 26 | 27 | def forward(self, image_in_, bbox_, show=False, mirror_test=False): 28 | if len(bbox_) == 0: 29 | return [[0, 0, 0]] * 133 30 | 31 | model_results = self.model.forward(self.preprocess(image_in_, bbox_)) 32 | 33 | kp_results = self.post_process_default(model_results[0], self.ratio, self.left, self.top) 34 | 35 | if show: 36 | self.show(image_in_, kp_results) 37 | 38 | return kp_results 39 | 40 | 41 | if __name__ == '__main__': 42 | image_path = 'resources/for_pose/t_pose_1080p.jpeg' 43 | image_in = CVImage(image_path).bgr 44 | bbox = [493, 75, 1427, 1044] 45 | 46 | bwd = BodyDetectorModelScope(model_type='hrnetv2w32', provider='gpu') 47 | kps = bwd.forward(image_in, bbox, show=True, mirror_test=False) 48 | # print(kps) 49 | 50 | # with MyFpsCounter('model forward 10 times fps: ') as mfc: 51 | # for i in range(10): 52 | # kps = bwd.forward(image_in, bbox) 53 | 54 | # # for video 55 | # from cv2box import CVVideoLoader 56 | # from tqdm import tqdm 57 | # 58 | # with CVVideoLoader('') as cvvl: 59 | # for _ in tqdm(range(len(cvvl))): 60 | # _, frame = cvvl.get() 61 | # kps = bwd.forward(image_in, bbox, show=True, mirror_test=False) 62 | -------------------------------------------------------------------------------- /art_lib/optical_flow_estimate/raft/raft_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/8/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | import numpy as np 7 | from art_lib.optical_flow_estimate.raft.utils import flow_to_image 8 | from cv2box import CVImage 9 | from apstone import ModelBase 10 | 11 | MODEL_ZOO = { 12 | # https://github.com/ibaiGorordo/ONNX-RAFT-Optical-Flow-Estimation 13 | # 0-255 RGB 14 | # input_name:['0', '1'], shape:[[1, 3, 480, 640], [1, 3, 480, 640]] 15 | # output_name:['23437', '23436'], shape:[[1, 2, 60, 80], [1, 2, 480, 640]] 16 | 'raft_kitti_iter20_480x640': { 17 | 'model_path': 'pretrain_models/art_lib/optical_flow_estimate/raft/iter20/raft_kitti_iter20_480x640.onnx' 18 | }, 19 | } 20 | 21 | 22 | class Raft(ModelBase): 23 | 24 | def __init__(self, model_name='raft_kitti_iter20_480x640', provider='gpu'): 25 | super(Raft, self).__init__(MODEL_ZOO[model_name], provider) 26 | self.input_width = 640 27 | self.input_height = 480 28 | self.mean = 0 29 | self.std = 1 30 | 31 | def forward(self, img1_, img2_): 32 | img_width, img_height = CVImage(img1_).bgr.shape[:-1][::-1] 33 | img1_input = CVImage(img1_).blob((self.input_width, self.input_height), self.mean, self.std, rgb=True) 34 | img2_input = CVImage(img2_).blob((self.input_width, self.input_height), self.mean, self.std, rgb=True) 35 | outputs = self.model.forward([img1_input, img2_input]) 36 | outputs = outputs[1][0].transpose(1, 2, 0) 37 | # draw 38 | flow_img_ = flow_to_image(outputs) 39 | flow_img_ = CVImage(flow_img_).resize((img_width, img_height)).rgb() 40 | return flow_img_ 41 | 42 | 43 | if __name__ == '__main__': 44 | # Initialize model 45 | model_name_ = 'raft_kitti_iter20_480x640' 46 | raft = Raft(model_name_) 47 | 48 | # Read inference image 49 | img1 = CVImage("resources/for_optical_flow/frame_0016.png").rgb() 50 | img2 = CVImage("resources/for_optical_flow/frame_0025.png").rgb() 51 | 52 | # Estimate flow and colorize it 53 | flow_map = raft.forward(img1, img2) 54 | combined_img = np.hstack((img1, img2, flow_map)) 55 | 56 | CVImage(combined_img).show(0, "Estimated flow") 57 | 58 | -------------------------------------------------------------------------------- /gpt_lib/langchain/model_config.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/9 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import torch.cuda 6 | import torch.backends 7 | import os 8 | 9 | embedding_model_dict = { 10 | "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", 11 | "ernie-base": "nghuyong/ernie-3.0-base-zh", 12 | "text2vec-base": "shibing624/text2vec-base-chinese", 13 | "text2vec": "GanymedeNil/text2vec-large-chinese", 14 | } 15 | 16 | # Embedding model name 17 | EMBEDDING_MODEL = "text2vec" 18 | 19 | # Embedding running device 20 | EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" 21 | 22 | # supported LLM models 23 | llm_model_dict = { 24 | "chatyuan": "ClueAI/ChatYuan-large-v2", 25 | "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe", 26 | "chatglm-6b-int4": "THUDM/chatglm-6b-int4", 27 | "chatglm-6b-int8": "THUDM/chatglm-6b-int8", 28 | "chatglm-6b": "/mnt/ljt/models/hugging_face/chatglm-6b", 29 | } 30 | 31 | # LLM model name 32 | LLM_MODEL = "chatglm-6b" 33 | 34 | # LLM lora path,默认为空,如果有请直接指定文件夹路径 35 | LLM_LORA_PATH = "" 36 | USE_LORA = True if LLM_LORA_PATH else False 37 | 38 | # LLM streaming reponse 39 | STREAMING = True 40 | 41 | # Use p-tuning-v2 PrefixEncoder 42 | USE_PTUNING_V2 = False 43 | 44 | # LLM running device 45 | LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" 46 | 47 | VS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "vector_store") 48 | 49 | UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content") 50 | 51 | # 基于上下文的prompt模版,请务必保留"{question}"和"{context}" 52 | PROMPT_TEMPLATE = """已知信息: 53 | {context} 54 | 55 | 根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}""" 56 | 57 | # PROMPT_TEMPLATE = """你是刘润,以刘润的语气回答问题,刘润是一个成功学大师,善于商业分析,喜欢使用商业术语解释问题,以下是他说过的一些话,尽量模仿这个口吻和说话方式: 58 | # {context} 59 | # 问题是:{question}""" 60 | 61 | # 匹配后单段上下文长度 62 | CHUNK_SIZE = 250 63 | 64 | # LLM input history length 65 | LLM_HISTORY_LEN = 3 66 | 67 | # return top-k text chunk from vector store 68 | VECTOR_SEARCH_TOP_K = 5 69 | 70 | NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data") 71 | -------------------------------------------------------------------------------- /art_lib/style_transfer/dct_net/dct_net.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/1/17 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import cv2 6 | import numpy as np 7 | from apstone import ModelBase 8 | from cv2box import CVImage 9 | 10 | from art_lib.style_transfer.dct_net.utils import resize_size, padTo16x 11 | """ 12 | ref https://www.modelscope.cn/models/damo/cv_unet_person-image-cartoon_compound-models/summary 13 | """ 14 | 15 | MODEL_ZOO = { 16 | '3d': { 17 | 'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/3d_h.onnx', 18 | 'input_dynamic_shape': (720, 720, 3), 19 | }, 20 | 'anime': { 21 | 'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/anime_h.onnx', 22 | 'input_dynamic_shape': (720, 720, 3), 23 | }, 24 | 'artstyle': { 25 | 'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/artstyle_h.onnx', 26 | 'input_dynamic_shape': (720, 720, 3), 27 | }, 28 | 'handdrawn': { 29 | 'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/handdrawn_h.onnx', 30 | 'input_dynamic_shape': (720, 720, 3), 31 | }, 32 | 'sketch': { 33 | 'model_path': 'pretrain_models/art_lib/style_transfer/dctnet/sketch_h.onnx', 34 | 'input_dynamic_shape': (720, 720, 3), 35 | }, 36 | } 37 | 38 | 39 | class DCTNet(ModelBase): 40 | def __init__(self, model_type='anime', provider='cpu'): 41 | super().__init__(MODEL_ZOO[model_type], provider) 42 | self.model_type = model_type 43 | 44 | def forward(self, img_in): 45 | # img: BGR input 46 | img_bgr = CVImage(img_in).bgr 47 | ori_h, ori_w, _ = img_bgr.shape 48 | img_bgr = resize_size(img_bgr, size=720).astype(np.float32) 49 | pad_bg, pad_h, pad_w = padTo16x(img_bgr) 50 | pad_bg = pad_bg.astype(np.float32) 51 | bg_res = self.model.forward(pad_bg)[0] 52 | res = bg_res[:pad_h, :pad_w, :] 53 | 54 | res = cv2.resize(res, (ori_w, ori_h), interpolation=cv2.INTER_AREA) 55 | res = np.clip(res, 0, 255).astype(np.uint8) 56 | return res 57 | 58 | 59 | if __name__ == '__main__': 60 | image_p = 'resources/test3.jpg' 61 | dct = DCTNet(model_type='3d') 62 | out_img = dct.forward(image_p) 63 | CVImage(out_img).show() 64 | -------------------------------------------------------------------------------- /mocap_lib/get_body_bbox_kps.py: -------------------------------------------------------------------------------- 1 | from body_lib import BodyBboxDetector 2 | from mocap_lib import BodyWholebodyDetector 3 | from mocap_lib.skeleton_transfer.cocowholebody_2_openpose import cocowb_2_openpose 4 | from cv2box import CVImage, CVFile 5 | from cv2box.cv_gears import CVVideoThread, Consumer, Linker, Queue 6 | import numpy as np 7 | 8 | 9 | class BodyBboxThread(Linker): 10 | def __init__(self, queue_list: list, fps_counter): 11 | super().__init__(queue_list, fps_counter=fps_counter) 12 | 13 | self.bbd = BodyBboxDetector(model='yolox_tiny_trt16', threshold=0.5, provider='gpu') 14 | 15 | def forward_func(self, something_in): 16 | # do your work here. 17 | image_in = something_in 18 | something_out = [image_in, self.bbd.forward(image_in, max_bbox_num=1)[0]] 19 | return something_out 20 | 21 | 22 | class BodyKpThread(Consumer): 23 | def __init__(self, queue_list: list, out_pkl_path_, fps_counter): 24 | super().__init__(queue_list, fps_counter=fps_counter) 25 | # add init here 26 | self.bwd = BodyWholebodyDetector(model_type='hrnet_w48_384_dark', provider='gpu') 27 | self.out_pkl_path = out_pkl_path_ 28 | self.kp_list = [] 29 | 30 | def exit_func(self): 31 | super(BodyKpThread, self).exit_func() 32 | if self.out_pkl_path: 33 | CVFile(self.out_pkl_path).pickle_write(np.array(self.kp_list)) 34 | 35 | def forward_func(self, something_in): 36 | # do your work here. 37 | something_out = self.bwd.forward(something_in[0], something_in[1], show=False, mirror_test=False) 38 | 39 | left_hand_kps, right_hand_kps, openpose_25_kps = cocowb_2_openpose(something_out) 40 | whole_kps = np.concatenate((openpose_25_kps, left_hand_kps, right_hand_kps), 0) 41 | if self.out_pkl_path: 42 | self.kp_list.append(whole_kps) 43 | # print(whole_kps.shape) 44 | # print(right_hand_kps) 45 | # print(openpose_25_kps) 46 | 47 | 48 | if __name__ == '__main__': 49 | # 4K 67 fps 50 | video_p = '' 51 | out_pkl_path = None 52 | 53 | q1 = Queue(5) 54 | q2 = Queue(5) 55 | c1 = CVVideoThread(video_p, [q1], fps_counter=False) 56 | b1 = BodyBboxThread([q1, q2], fps_counter=True) 57 | b2 = BodyKpThread([q2], out_pkl_path, fps_counter=True) 58 | c1.start() 59 | b1.start() 60 | b2.start() 61 | -------------------------------------------------------------------------------- /mocap_lib/triangulate/anipose_triangulate.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/15 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from cv2box import CVFile, CVCamera 7 | import numpy as np 8 | 9 | from mocap_lib.skeleton_transfer.bone_links import BONE_CONFIG 10 | 11 | 12 | class AniposeTriangulate: 13 | def __init__(self, pkl_path_, pkl_mode_='anipose'): 14 | if pkl_mode_ == 'multical': 15 | self.c_group = CVCamera(pkl_path_).load_camera_group() 16 | elif pkl_mode_ == 'anipose': 17 | self.c_group = CVFile(pkl_path_).data 18 | 19 | def triangulate(self, multi_view_kps): 20 | """ 21 | 22 | Args: 23 | multi_view_kps: N_view * N_kps * 3 24 | 25 | Returns: 26 | kps_3d: N_kps * 3 27 | 28 | """ 29 | # multi_view_kps = multi_view_kps[:, :, 0:2] 30 | 31 | # # default total 60fps 32 | # kps_3d = self.c_group.triangulate(multi_view_kps, undistort=True, progress=False) 33 | 34 | # # ransac slow total 10fps 35 | # kps_3d = self.c_group.triangulate_ransac(multi_view_kps, progress=False)[0] 36 | 37 | # # offline shape CxNxJx2 38 | nframes = multi_view_kps.shape[0] 39 | kps_3d = self.c_group.triangulate_optim(multi_view_kps, 40 | constraints=BONE_CONFIG['openpose_bodyhand67']['kintree'], 41 | verbose=True, init_progress=True) 42 | 43 | return kps_3d 44 | 45 | def project(self, kps_3d_): 46 | """ 47 | 48 | Args: 49 | kps_3d_: N_kps * 3 50 | 51 | Returns: 52 | kps_2d: N_view * N_kps * 2 53 | 54 | """ 55 | kps_2d = self.c_group.project(kps_3d_) 56 | return kps_2d 57 | 58 | 59 | if __name__ == '__main__': 60 | from cv2box.utils import get_path_by_ext 61 | 62 | # triangulate 63 | at = AniposeTriangulate('cgroup.pkl') 64 | kp2ds = [] 65 | for file in get_path_by_ext('', ['.pkl']): 66 | # 坐标变换 67 | kp2ds.append(np.load(file, allow_pickle=True)) 68 | # kp3ds.append(np.load(file, allow_pickle=True)[None]) 69 | kp2ds = np.array(kp2ds).transpose((1, 0, 2, 3))[:, 600:900, :, :2] 70 | results = at.triangulate(kp2ds) 71 | CVFile('3dkp_optim.pkl').pickle_write(results) 72 | 73 | -------------------------------------------------------------------------------- /body_lib/body_bbox_detector/body_bbox_detector_mmdet.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/6/27 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from cv2box import CVImage, MyFpsCounter 7 | from apstone.wrappers.mmlab_wrapper import BboxDetectorBase 8 | 9 | MODEL_ZOO = { 10 | # gpu 55fps 11 | 'yolox_tiny': { 12 | 'model_path': 'pretrain_models/body_lib/body_bbox_detector/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906_dynamic.onnx', 13 | 'model_input_size': (416, 416), 14 | }, 15 | # 207fps 16 | 'yolox_tiny_trt16': { 17 | 'model_path': 'pretrain_models/body_lib/body_bbox_detector/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906_static.engine', 18 | 'model_input_size': (416, 416), 19 | }, 20 | # gpu 49fps 21 | 'yolox_s': { 22 | 'model_path': 'pretrain_models/body_lib/body_bbox_detector/yolox_s_8x8_300e_coco_20211121_095711-4592a793_dynamic.onnx', 23 | 'model_input_size': (640, 640), 24 | }, 25 | } 26 | 27 | 28 | class BodyBboxDetector(BboxDetectorBase): 29 | def __init__(self, model='yolox_tiny', threshold=0.5, provider='gpu'): 30 | self.threshold = threshold 31 | super().__init__(MODEL_ZOO[model], provider) 32 | 33 | def forward(self, image_in_, show=False, max_bbox_num=1): 34 | """ 35 | Args: 36 | image_in_: 37 | show: 38 | max_bbox_num: 39 | Returns: N*4 40 | """ 41 | model_results = self.model.forward(self.preprocess(image_in_)) 42 | results_after = self.postprocess(model_results, self.threshold, max_bbox_num=max_bbox_num) 43 | if show: 44 | self.show(image_in_, results_after) 45 | return results_after 46 | 47 | 48 | if __name__ == '__main__': 49 | 50 | image_path = 'resources/for_pose/t_pose_1080p.jpeg' 51 | image_in = CVImage(image_path).bgr 52 | 53 | # yolox_tiny yolox_s yolox_tiny_static_trt 54 | bbd = BodyBboxDetector(model='yolox_s', provider='gpu') 55 | 56 | bboxes = bbd.forward(image_in, show=True, max_bbox_num=3) 57 | 58 | from cv2box import CVBbox 59 | 60 | bboxes = CVBbox(bboxes).area_center_filter(image_in.shape) 61 | print(bboxes) 62 | 63 | with MyFpsCounter('model forward 10 times fps:') as mfc: 64 | for i in range(10): 65 | bboxes = bbd.forward(image_in, max_bbox_num=3) 66 | -------------------------------------------------------------------------------- /seg_lib/segformer_b2_clothes/segformer_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/7/19 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from apstone import ModelBase 6 | import numpy as np 7 | from cv2box import CVImage 8 | from PIL import Image 9 | 10 | """ 11 | 0-1 RGB 12 | input_name:['pixel_values'], shape:[['batch', 'num_channels', 'height', 'width']] 13 | output_name:['last_hidden_state'], shape:[['batch', 'sequence', 'Transposelast_hidden_state_dim_2', 'Transposelast_hidden_state_dim_3']] 14 | 15 | background 0 16 | hat 1 17 | hair 2 18 | sunglass 3 19 | upper-clothes 4 20 | skirt 5 21 | pants 6 22 | dress 7 23 | belt 8 24 | left-shoe 9 25 | right-shoe 10 26 | face 11 27 | left-leg 12 28 | right-leg 13 29 | left-arm 14 30 | right-arm 15 31 | bag 16 32 | scarf 17 33 | """ 34 | 35 | MODEL_ZOO = { 36 | # https://huggingface.co/mattmdjaga/segformer_b2_clothes 37 | 'segformer_b2_clothes': { 38 | 'model_path': 'pretrain_models/seg_lib/segformer_clothes/segformer_b2_clothes.onnx' 39 | }, 40 | } 41 | 42 | 43 | class SegFormer(ModelBase): 44 | def __init__(self, model_name='segformer_b2_clothes', provider='gpu'): 45 | super(SegFormer, self).__init__(MODEL_ZOO[model_name], provider) 46 | self.mean = [0.485, 0.456, 0.406] 47 | self.std = [0.229, 0.224, 0.225] 48 | self.input_size = (512, 512) 49 | 50 | def forward(self, image_in): 51 | """ 52 | Args: 53 | image_in: CVImage class H*W*C 54 | Returns: h*w*1 55 | """ 56 | input_size_ = CVImage(image_in).bgr.shape[:2] 57 | input_image = CVImage(image_in).blob_innormal(self.input_size, input_mean=self.mean, input_std=self.std, 58 | rgb=True) 59 | pred_mask = self.model.forward(input_image)[0] 60 | pred_mask = np.transpose(pred_mask[0], (1, 2, 0)) 61 | pred_mask = CVImage(pred_mask).resize(input_size_[::-1]).bgr 62 | pred_mask = pred_mask.argmax(axis=2)[..., np.newaxis] 63 | return pred_mask.astype(np.int8) 64 | 65 | 66 | if __name__ == '__main__': 67 | img_p = 'resources/for_pose/yoga2.jpg' 68 | 69 | sf = SegFormer(model_name='segformer_b2_clothes') 70 | mask_img = sf.forward(img_p) 71 | CVImage(mask_img).show() 72 | print(mask_img.shape) 73 | -------------------------------------------------------------------------------- /gpt_lib/models/llama.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/12 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from transformers import LlamaForCausalLM, LlamaTokenizer 6 | import torch 7 | import os 8 | import platform 9 | from accelerate import init_empty_weights 10 | from accelerate.utils import get_balanced_memory, infer_auto_device_map 11 | from transformers import AutoConfig, GenerationConfig 12 | from transformers.dynamic_module_utils import get_class_from_dynamic_module 13 | from transformers.modeling_utils import no_init_weights 14 | from transformers.utils import ContextManagers 15 | 16 | from gpt_lib.models.llm_base import LLM 17 | 18 | MODEL_ZOO = { 19 | 'llama-7b': { 20 | 'model_path': '/mnt/ljt/models/hugging_face/llama-7b-hf', 21 | 'model': LlamaForCausalLM, 22 | 'tokenizer': LlamaTokenizer, 23 | 'config': AutoConfig, 24 | 'prompt_template': """Below is an instruction that describes a task. Write a response that appropriately completes the request. 25 | ### Instruction: 26 | {instruction} 27 | ### Response:""", 28 | }, 29 | } 30 | 31 | 32 | class LLAMA(LLM): 33 | def __init__(self, model_name, load_in_8bit): 34 | self.model_path = MODEL_ZOO[model_name]['model_path'] 35 | self.config = MODEL_ZOO[model_name]['config'].from_pretrained(self.model_path, return_unused_kwargs=True, 36 | trust_remote_code=True)[0] 37 | super().__init__(MODEL_ZOO[model_name], load_in_8bit, self.get_device_map) 38 | 39 | def get_device_map(self): 40 | return 'auto' 41 | 42 | def generate(self, prompt): 43 | generation_config = GenerationConfig(temperature=0.1, 44 | top_p=0.75, 45 | top_k=40, 46 | num_beams=4, 47 | max_new_tokens=512, 48 | do_sample=True, 49 | no_repeat_ngram_size=6, 50 | repetition_penalty=1.8, 51 | ) 52 | result = self.generate_base(prompt, generation_config) 53 | return result 54 | 55 | 56 | if __name__ == '__main__': 57 | llama = LLAMA('llama-7b', load_in_8bit=True) 58 | print(llama.generate('你好')) 59 | -------------------------------------------------------------------------------- /seg_lib/ppmattingv2/ppmattingv2_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/2/3 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from apstone import ModelBase 6 | import cv2 7 | import numpy as np 8 | import copy 9 | from cv2box import CVImage 10 | 11 | MODEL_ZOO = { 12 | # https://github.com/jiachen0212/pp_mattingv2 13 | '384x480': { 14 | 'model_path': 'pretrain_models/seg_lib/ppmattingv2/ppmattingv2_stdc1_human_384x480.onnx' 15 | }, 16 | } 17 | 18 | 19 | class PPMattingV2(ModelBase): 20 | def __init__(self, model_name='384x480', provider='gpu'): 21 | super(PPMattingV2, self).__init__(MODEL_ZOO[model_name], provider) 22 | self.conf_threshold = 0.65 23 | 24 | def prepare_input(self, image): 25 | input_image = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), dsize=(self.input_width, self.input_height)) 26 | input_image = input_image.astype(np.float32) / 255.0 27 | input_image = input_image.transpose(2, 0, 1) 28 | input_image = np.expand_dims(input_image, axis=0) 29 | return input_image 30 | 31 | def forward(self, image): 32 | input_image = self.prepare_input(image) 33 | 34 | # Perform inference on the image 35 | result = self.forward(input_image) 36 | 37 | # Post process:squeeze 38 | segmentation_map = result[0] 39 | segmentation_map = np.squeeze(segmentation_map) 40 | 41 | image_width, image_height = image.shape[1], image.shape[0] 42 | dst_image = copy.deepcopy(image) 43 | segmentation_map = cv2.resize( 44 | segmentation_map, 45 | dsize=(image_width, image_height), 46 | interpolation=cv2.INTER_LINEAR, 47 | ) 48 | 49 | # color list 50 | color_image_list = [] 51 | # ID 0:BackGround 52 | bg_image = np.zeros(image.shape, dtype=np.uint8) 53 | bg_image[:] = (0, 0, 0) 54 | color_image_list.append(bg_image) 55 | # ID 1:Human 56 | bg_image = np.zeros(image.shape, dtype=np.uint8) 57 | bg_image[:] = (0, 255, 0) 58 | color_image_list.append(bg_image) 59 | 60 | mask = np.where(segmentation_map > self.conf_threshold, 0, 1) 61 | mask = np.stack((mask,) * 3, axis=-1).astype('uint8') 62 | mask_image = np.where(mask, dst_image, color_image_list[1]) 63 | dst_image = cv2.addWeighted(dst_image, 0.5, mask_image, 0.5, 1.0) 64 | return dst_image 65 | 66 | 67 | if __name__ == '__main__': 68 | ppm = PPMattingV2() 69 | img_p = '' 70 | ppm.forward(CVImage(img_p).bgr) 71 | -------------------------------------------------------------------------------- /hand_lib/hand_detector/hand_detector_d2/hand_detector_d2_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/2/23 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from detectron2.config import get_cfg 6 | from detectron2.engine import DefaultPredictor 7 | from detectron2.data import DatasetCatalog, MetadataCatalog 8 | from detectron2.utils.visualizer import Visualizer 9 | from cv2box import CVImage 10 | import numpy as np 11 | 12 | class ThirdViewDetector: 13 | """ 14 | Hand Detector for third-view input.(https://github.com/ddshan/hand_detector.d2) 15 | """ 16 | 17 | def __init__(self): 18 | print("Loading Third View Hand Detector") 19 | self.__load_hand_detector() 20 | self.cfg = None 21 | 22 | def __load_hand_detector(self): 23 | # load cfg and model 24 | self.cfg = get_cfg() 25 | self.cfg.merge_from_file("pretrain_models/digital_human/hand_detector_d2/faster_rcnn_X_101_32x8d_FPN_3x_100DOH.yaml") 26 | self.cfg.MODEL.WEIGHTS = 'pretrain_models/digital_human/hand_detector_d2/model_0529999.pth' # add model weight here 27 | self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 # 0.5 , use low thresh to increase recall 28 | self.hand_detector = DefaultPredictor(self.cfg) 29 | 30 | def get_cfg(self): 31 | return self.cfg 32 | 33 | def forward(self, img, show=False): 34 | results = self.hand_detector(img) 35 | final_image = None 36 | if show: 37 | v = Visualizer(img[:, :, ::-1], MetadataCatalog.get("100DOH_hand_trainval"), scale=1.2) 38 | v = v.draw_instance_predictions(results["instances"].to("cpu")) 39 | final_image = v.get_image()[:, :, ::-1] 40 | CVImage(final_image).show(1) 41 | return results, final_image 42 | 43 | def get_hand_bbox(self, img): 44 | bbox_tensor = self.hand_detector(img)['instances'].pred_boxes 45 | bboxes = bbox_tensor.tensor.cpu().numpy() 46 | return bboxes 47 | 48 | 49 | if __name__ == '__main__': 50 | # data path 51 | test_img = 'test_img/test1.jpg' 52 | im = CVImage(test_img).bgr 53 | 54 | tvd = ThirdViewDetector() 55 | outputs = tvd.forward(im) 56 | 57 | v = Visualizer(im[:, :, ::-1], MetadataCatalog.get("100DOH_hand_trainval"), scale=1.2) 58 | v = v.draw_instance_predictions(outputs["instances"].to("cpu")) 59 | CVImage(v.get_image()[:, :, ::-1]).show() 60 | 61 | # print 62 | print(outputs["instances"].pred_classes) 63 | bboxs = np.array(outputs["instances"].pred_boxes.tensor.to('cpu')) 64 | print(bboxs) 65 | -------------------------------------------------------------------------------- /mocap_lib/skeleton_transfer/keypoints_map.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/6/15 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | 7 | # Just informative (from, e.g., https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch) 8 | COCO_JOINTS = { 9 | 0: "Nose", 10 | 1: "LEye", 11 | 2: "REye", 12 | 3: "LEar", 13 | 4: "REar", 14 | 5: "LShoulder", 15 | 6: "RShoulder", 16 | 7: "LElbow", 17 | 8: "RElbow", 18 | 9: "LWrist", 19 | 10: "RWrist", 20 | 11: "LHip", 21 | 12: "RHip", 22 | 13: "LKnee", 23 | 14: "RKnee", 24 | 15: "LAnkle", 25 | 16: "RAnkle" 26 | # It has no neck, you can add it (pos 17) for drawing or for converting to openpose 27 | } 28 | 29 | COCO_WHOLE_BODY_JOINTS = { 30 | 0: "Nose", 31 | 1: "LEye", 32 | 2: "REye", 33 | 3: "LEar", 34 | 4: "REar", 35 | 5: "LShoulder", 36 | 6: "RShoulder", 37 | 7: "LElbow", 38 | 8: "RElbow", 39 | 9: "LWrist", 40 | 10: "RWrist", 41 | 11: "LHip", 42 | 12: "RHip", 43 | 13: "LKnee", 44 | 14: "RKnee", 45 | 15: "LAnkle", 46 | 16: "RAnkle", 47 | 17: "LBigToe", 48 | 18: "LSmallToe", 49 | 19: "LHeel", 50 | 20: "RBigToe", 51 | 21: "RSmallToe", 52 | 22: "RHeel", 53 | # It has no neck, you can add it (pos 17) for drawing or for converting to openpose 54 | } 55 | 56 | HALPE_JOINTS = { 57 | 0: "Nose", 58 | 1: "LEye", 59 | 2: "REye", 60 | 3: "LEar", 61 | 4: "REar", 62 | 5: "LShoulder", 63 | 6: "RShoulder", 64 | 7: "LElbow", 65 | 8: "RElbow", 66 | 9: "LWrist", 67 | 10: "RWrist", 68 | 11: "LHip", 69 | 12: "RHip", 70 | 13: "LKnee", 71 | 14: "Rknee", 72 | 15: "LAnkle", 73 | 16: "RAnkle", 74 | 17: "Head", 75 | 18: "Neck", 76 | 19: "Hip", 77 | 20: "LBigToe", 78 | 21: "RBigToe", 79 | 22: "LSmallToe", 80 | 23: "RSmallToe", 81 | 24: "LHeel", 82 | 25: "RHeel", 83 | } 84 | 85 | POSE_BODY_25_BODY_PARTS = [ 86 | "Nose", 87 | "Neck", 88 | "RShoulder", 89 | "RElbow", 90 | "RWrist", 91 | "LShoulder", 92 | "LElbow", 93 | "LWrist", 94 | "MidHip", 95 | "RHip", 96 | "RKnee", 97 | "RAnkle", 98 | "LHip", 99 | "LKnee", 100 | "LAnkle", 101 | "REye", 102 | "LEye", 103 | "REar", 104 | "LEar", 105 | "LBigToe", 106 | "LSmallToe", 107 | "LHeel", 108 | "RBigToe", 109 | "RSmallToe", 110 | "RHeel", 111 | "Background" 112 | ] 113 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # my ignore 132 | cache/ 133 | pretrain_models/ 134 | private_models/ 135 | sd_models/ 136 | skeleton_imgs/ 137 | resources/ 138 | -------------------------------------------------------------------------------- /SPEEDTABLE.md: -------------------------------------------------------------------------------- 1 | ### Notes 2 | 3 | - '-' means use last result from top 4 | - empty means no test results 5 | - onnx convert based on [mmdeploy](https://github.com/open-mmlab/mmdeploy) 6 | - model infer fps does not contain pre/post-process time cost 7 | - trt & trt16 based on onnxruntime tensorrt EP 8 | - input array located on cpu (io-binding done by onnxruntime itself) 9 | - more info check [onnx test codes]() 10 | 11 | 12 | 13 | ### Environment 14 | 15 | | Name | Attr | 16 | | ---- | ----------------------------------------- | 17 | | Sys | Ubuntu 20.04 | 18 | | GPU | NVIDIA GeForce RTX 3080 10G | 19 | | CPU | Intel® Core™ i9-10850K CPU @ 3.60GHz × 20 | 20 | | MEM | 32G | 21 | | Libs | onnxruntime-gpu=1.13 | 22 | 23 | 24 | 25 | ### Pose Detect 26 | 27 | 28 | 29 | | MMPose | input shape | size | cpu infer fps | gpu infer fps | trt infer fps | trt16 infer fps | 30 | | ------------------------------------------------------------ | ---------------- | ------ | ------------- | ------------- | ------------- | --------------- | 31 | | [pvtv2-b2_static_coco](https://github.com/open-mmlab/mmpose/tree/master/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/pvtv2-b2_coco_256x192.py) | [1, 3, 256, 192] | 116.3m | 4.9 | 73 | 184 | 257 | 32 | | | [4, 3, 256, 192] | - | 2.5 | 47 | 106 | 178 | 33 | | | | | | | | | 34 | | [hrnet_w48_dark+_dynamic_cocowholebody](https://github.com/open-mmlab/mmpose/tree/master/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py) | [4, 3, 384, 288] | 254m | 2.9 | 31 | 39 | 83 | 35 | | | | | | | | | 36 | | **MMPose Post-process** | | | | | | | 37 | | gaussian_blur_k17 | [4, 133, 96, 72] | 0.19m | 7.9 | 119 | 147 | 142 | 38 | | | | | | | | | 39 | 40 | -------------------------------------------------------------------------------- /sd_lib/ip_adapter/ip_adapter_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/8/23 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import torch 6 | from PIL import Image 7 | from cv2box import CVImage 8 | from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, \ 9 | DDIMScheduler, AutoencoderKL 10 | 11 | from sd_lib.ip_adapter.models import IPAdapter, IPAdapterPlus 12 | 13 | SD_PRETRAIN = './sd_models/stable-diffusion-v1-5' 14 | VAE_PRETRAIN = './sd_models/stabilityai_sd-vae-ft-mse' 15 | CLIP_IMAGE_PRETRAIN = './sd_models/clip_image_encoder' 16 | IMAGE_PROJ_PRETRAIN = './sd_models/ip_adapter_image_proj/ip-adapter_sd15.bin' 17 | IMAGE_PROJ_PLUS_PRETRAIN = './sd_models/ip_adapter_image_proj/ip-adapter-plus_sd15.bin' 18 | 19 | 20 | def image_grid(imgs, rows, cols): 21 | assert len(imgs) == rows * cols 22 | 23 | w, h = imgs[0].size 24 | grid = Image.new('RGB', size=(cols * w, rows * h)) 25 | grid_w, grid_h = grid.size 26 | 27 | for i, img in enumerate(imgs): 28 | grid.paste(img, box=(i % cols * w, i // cols * h)) 29 | return grid 30 | 31 | 32 | class IpAdapterAPI: 33 | def __init__(self, device="cuda"): 34 | # load SD pipeline 35 | noise_scheduler = DDIMScheduler( 36 | num_train_timesteps=1000, 37 | beta_start=0.00085, 38 | beta_end=0.012, 39 | beta_schedule="scaled_linear", 40 | clip_sample=False, 41 | set_alpha_to_one=False, 42 | steps_offset=1, 43 | ) 44 | vae = AutoencoderKL.from_pretrained(VAE_PRETRAIN).to(dtype=torch.float16) 45 | 46 | sd_pipe = StableDiffusionPipeline.from_pretrained( 47 | SD_PRETRAIN, 48 | torch_dtype=torch.float16, 49 | scheduler=noise_scheduler, 50 | vae=vae, 51 | feature_extractor=None, 52 | safety_checker=None 53 | ) 54 | 55 | # load ip-adapter 56 | # self.ip_model = IPAdapter(sd_pipe, CLIP_IMAGE_PRETRAIN, IMAGE_PROJ_PRETRAIN, device) 57 | self.ip_model = IPAdapterPlus(sd_pipe, CLIP_IMAGE_PRETRAIN, IMAGE_PROJ_PLUS_PRETRAIN, device, num_tokens=16) 58 | 59 | def forward(self, image_pil): 60 | # generate image variations 61 | images = self.ip_model.generate(pil_image=image_pil, num_samples=4, num_inference_steps=50, seed=42) 62 | grid = image_grid(images, 1, 4) 63 | grid.show() 64 | return grid 65 | 66 | 67 | if __name__ == '__main__': 68 | # read image prompt 69 | ia = IpAdapterAPI() 70 | image = Image.open('resources/for_sd/girl_reading_512_crop.png') 71 | image.resize((256, 256)) 72 | out_image = ia.forward(image) 73 | -------------------------------------------------------------------------------- /data_lib/dataset_convert/coco-annotator_2_coco-mmdet.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | for hand detect 7 | """ 8 | from cv2box import CVFile 9 | from tqdm import tqdm 10 | 11 | 12 | def del_other_category(json_in_, json_out_): 13 | json_data = CVFile(json_in_).data 14 | out_data = json_data.copy() 15 | out_data['annotations'] = [] 16 | out_data['categories'] = json_data['categories'][:1] 17 | out_data['categories'][0]['name'] = 'hand' 18 | out_data['categories'][0]['id'] = 1 19 | print(len(json_data['annotations'])) 20 | for i in tqdm(range(len(json_data['annotations']))): 21 | dummy = json_data['annotations'][i] 22 | if dummy['category_id'] != 1 and 'bbox' in dummy.keys(): 23 | dummy['category_id'] = 1 24 | out_data['annotations'].append(dummy) 25 | 26 | print(len(out_data['annotations'])) 27 | CVFile(json_out_).json_write(out_data) 28 | 29 | 30 | def del_some_name(json_in_, json_out_): 31 | json_data = CVFile(json_in_).data 32 | out_data = json_data.copy() 33 | out_data['annotations'] = [] 34 | out_data['images'] = [] 35 | print(len(json_data['images'])) 36 | print(len(json_data['annotations'])) 37 | del_image_id_list = [] 38 | 39 | for i in tqdm(range(len(json_data['images']))): 40 | dummy = json_data['images'][i] 41 | if '0707_4_' in dummy['file_name']: 42 | del_image_id_list.append(dummy['id']) 43 | else: 44 | out_data['images'].append(dummy) 45 | 46 | for i in tqdm(range(len(json_data['annotations']))): 47 | dummy = json_data['annotations'][i] 48 | if dummy['image_id'] not in del_image_id_list: 49 | out_data['annotations'].append(dummy) 50 | 51 | print(len(out_data['images'])) 52 | print(len(out_data['annotations'])) 53 | CVFile(json_out_).json_write(out_data) 54 | 55 | 56 | def concat_2_json(json_in_1, json_in_2, json_out_): 57 | json_data_1 = CVFile(json_in_1).data 58 | json_data_2 = CVFile(json_in_2).data 59 | out_data = json_data_1.copy() 60 | out_data['annotations'] += json_data_2['annotations'] 61 | out_data['images'] += json_data_2['images'] 62 | print(len(out_data['annotations'])) 63 | print(len(out_data['images'])) 64 | CVFile(json_out_).json_write(out_data) 65 | 66 | 67 | if __name__ == '__main__': 68 | json_in = '/datasets_TVCOCO_hand_train/annotations/train.json' 69 | json_out = '/datasets_TVCOCO_hand_train/annotations/train_out.json' 70 | del_other_category(json_in, json_out) 71 | 72 | # json_in = '' 73 | # json_out = '' 74 | # del_some_name(json_in, json_out) 75 | 76 | # json_in_1 = '' 77 | # json_in_2 = '' 78 | # json_out = '' 79 | # concat_2_json(json_in_1, json_in_2, json_out) 80 | -------------------------------------------------------------------------------- /mocap_lib/smooth_filter/smooth_filter.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/6 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from .smoothnet_api import SmoothNetFilter 6 | from .one_euro_api import OneEuroFilter 7 | import numpy as np 8 | 9 | SMOOTH_NET_8 = 'pretrain_models/smooth_filter/smoothnet_ws8_h36m.pth' 10 | SMOOTH_NET_16 = 'pretrain_models/smooth_filter/smoothnet_ws16_h36m.pth' 11 | SMOOTH_NET_32 = 'pretrain_models/smooth_filter/smoothnet_ws32_h36m.pth' 12 | SMOOTH_NET_64 = 'pretrain_models/smooth_filter/smoothnet_ws64_h36m.pth' 13 | 14 | 15 | class SmoothFilter: 16 | def __init__(self, filter_type, **kwargs): 17 | 18 | if filter_type == 'one_euro': 19 | self.filter = OneEuroFilter() 20 | # can not use ! 21 | # if filter_type == 'smooth_net_8': 22 | # self.window = 8 23 | # self.filter = SmoothNetFilter(8, SMOOTH_NET_8, root_index=kwargs['root_index']) 24 | # elif filter_type == 'smooth_net_16': 25 | # self.window = 16 26 | # self.filter = SmoothNetFilter(16, SMOOTH_NET_16, root_index=kwargs['root_index']) 27 | # elif filter_type == 'smooth_net_32': 28 | # self.window = 32 29 | # self.filter = SmoothNetFilter(32, SMOOTH_NET_32, root_index=kwargs['root_index']) 30 | # elif filter_type == 'smooth_net_64': 31 | # self.window = 64 32 | # self.filter = SmoothNetFilter(64, SMOOTH_NET_64, root_index=kwargs['root_index']) 33 | 34 | # self.history_list = [[], [], []] 35 | # self.thres_list = [[], [], []] 36 | 37 | def forward(self, x): 38 | """ 39 | 40 | Args: 41 | x: [N, 2] or [N, 3] 42 | 43 | Returns: 44 | 45 | """ 46 | # history_now = self.history_list[id] 47 | # thres_list_now = self.thres_list[id] 48 | 49 | # if x.shape[1] == 3: 50 | # x_new = x[:, :2] 51 | # thres = x[:, 2] 52 | # else: 53 | # x_new = x 54 | # thres = 0 55 | 56 | results = self.filter.forward(np.array([x.copy()])) 57 | return results[0] 58 | # return np.concatenate((results[0], thres.reshape(-1, 1)), 1) 59 | 60 | # if len(self.history_list[id]) < self.window: 61 | # self.history_list[id].append(x_new) 62 | # self.thres_list[id].append(thres) 63 | # return x 64 | # else: 65 | # self.history_list[id].append(x_new) 66 | # self.thres_list[id].append(thres) 67 | # self.history_list[id] = self.history_list[id][-self.window:] 68 | # self.thres_list[id] = self.thres_list[id][-self.window:] 69 | # results = self.filter.forward(np.array(self.history_list[id].copy())) 70 | # 71 | # return np.concatenate((results[-1], self.thres_list[id][-1].reshape(-1,1)), 1) 72 | -------------------------------------------------------------------------------- /gpt_lib/chatglm6b_finetune/tokenize_dataset_rows.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/4/24 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | ref https://github.com/mymusise/ChatGLM-Tuning/blob/master/tokenize_dataset_rows.py 7 | """ 8 | import argparse 9 | import json 10 | from tqdm import tqdm 11 | import datasets 12 | import transformers 13 | from cv2box import CVFile 14 | 15 | # init chatglm-6b model 16 | # model_name = 'THUDM/chatglm-6b' 17 | model_name = '/mnt/ljt/models/hugging_face/chatglm-6b' 18 | tokenizer = transformers.AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) 19 | config = transformers.AutoConfig.from_pretrained(model_name, trust_remote_code=True, device_map='auto') 20 | 21 | # global init 22 | prompt_row_name = 'instruction' 23 | target_row_name = 'output' 24 | 25 | 26 | def preprocess(example: dict, max_seq_length): 27 | prompt = example[prompt_row_name] 28 | target = example[target_row_name] 29 | prompt_ids = tokenizer.encode(prompt, max_length=max_seq_length, truncation=True) 30 | target_ids = tokenizer.encode( 31 | target, 32 | max_length=max_seq_length, 33 | truncation=True, 34 | add_special_tokens=False) 35 | input_ids = prompt_ids + target_ids + [config.eos_token_id] 36 | return {"input_ids": input_ids, "seq_len": len(prompt_ids)} 37 | 38 | 39 | # def read_jsonl(path, max_seq_length, skip_overlength=False): 40 | # with open(path, "r") as f: 41 | # for line in tqdm(f.readlines()): 42 | # example = json.loads(line) 43 | # feature = preprocess(example, max_seq_length) 44 | # if skip_overlength and len(feature["input_ids"]) > max_seq_length: 45 | # continue 46 | # feature["input_ids"] = feature["input_ids"][:max_seq_length] 47 | # yield feature 48 | 49 | 50 | def read_json(path, max_seq_length, skip_overlength=False): 51 | """ 52 | for alpaca-COT(https://github.com/PhoebusSi/Alpaca-CoT) format datasets 53 | """ 54 | json_data = CVFile(path).data 55 | for example in tqdm(json_data): 56 | feature = preprocess(example, max_seq_length) 57 | if skip_overlength and len(feature["input_ids"]) > max_seq_length: 58 | continue 59 | feature["input_ids"] = feature["input_ids"][:max_seq_length] 60 | yield feature 61 | 62 | 63 | 64 | def main(): 65 | parser = argparse.ArgumentParser() 66 | parser.add_argument("--data_path", type=str, default="/mnt/ljt/dataset/NLP/liurun_99.json") 67 | parser.add_argument("--save_path", type=str, default="/mnt/ljt/dataset/NLP/liurun_99") 68 | parser.add_argument("--max_seq_length", type=int, default=384) 69 | parser.add_argument("--skip_overlength", type=bool, default=False) 70 | args = parser.parse_args() 71 | 72 | dataset = datasets.Dataset.from_generator( 73 | lambda: read_json(args.data_path, args.max_seq_length, args.skip_overlength) 74 | ) 75 | dataset.save_to_disk(args.save_path) 76 | 77 | 78 | if __name__ == "__main__": 79 | main() 80 | -------------------------------------------------------------------------------- /gpt_lib/models/chatglm_6b.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/9 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from transformers import AutoModel, AutoTokenizer 6 | from transformers import AutoConfig 7 | from transformers.dynamic_module_utils import get_class_from_dynamic_module 8 | from transformers.modeling_utils import no_init_weights 9 | from transformers.utils import ContextManagers 10 | from accelerate import init_empty_weights 11 | from accelerate.utils import get_balanced_memory, infer_auto_device_map 12 | 13 | import torch 14 | import os 15 | import platform 16 | import signal 17 | 18 | MODEL_ZOO = { 19 | 'moss': { 20 | 'model_path': '/mnt/ljt/models/hugging_face/moss-moon-003-sft', 21 | 'model': AutoModel, 22 | 'tokenizer': AutoTokenizer, 23 | 'config': AutoConfig, 24 | }, 25 | } 26 | 27 | os_name = platform.system() 28 | clear_command = 'cls' if os_name == 'Windows' else 'clear' 29 | stop_stream = False 30 | 31 | 32 | def build_prompt(history): 33 | prompt = "欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序" 34 | for query, response in history: 35 | prompt += f"\n\n用户:{query}" 36 | prompt += f"\n\nChatGLM-6B:{response}" 37 | return prompt 38 | 39 | 40 | def signal_handler(signal, frame): 41 | global stop_stream 42 | stop_stream = True 43 | 44 | 45 | class ChatGLM(LLM): 46 | def __init__(self, model_name, load_in_8bit=False): 47 | self.model_path = MODEL_ZOO[model_name]['model_path'] 48 | self.config = MODEL_ZOO[model_name]['config'].from_pretrained(self.model_path, return_unused_kwargs=True, 49 | trust_remote_code=True)[0] 50 | super().__init__(MODEL_ZOO[model_name], load_in_8bit, self.get_device_map) 51 | self.model = self.model.half.cuda() 52 | 53 | def get_device_map(self): 54 | return 'auto' 55 | 56 | def stream_chat(self): 57 | self.model = self.model.eval() 58 | history = [] 59 | global stop_stream 60 | print("欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序") 61 | while True: 62 | query = input("\n用户:") 63 | if query.strip() == "stop": 64 | break 65 | if query.strip() == "clear": 66 | history = [] 67 | os.system(clear_command) 68 | print("欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序") 69 | continue 70 | count = 0 71 | for response, history in self.model.stream_chat(self.tokenizer, query, history=history): 72 | if stop_stream: 73 | stop_stream = False 74 | break 75 | else: 76 | count += 1 77 | if count % 8 == 0: 78 | os.system(clear_command) 79 | print(build_prompt(history), flush=True) 80 | signal.signal(signal.SIGINT, signal_handler) 81 | os.system(clear_command) 82 | print(build_prompt(history), flush=True) 83 | -------------------------------------------------------------------------------- /sr_lab/realesrgan/realesrgan_onnx_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/11/11 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import cv2 6 | from cv2box import CVImage, MyFpsCounter 7 | from apstone import ModelBase 8 | 9 | MODEL_ZOO = { 10 | # https://github.com/xinntao/Real-ESRGAN 11 | # input_name: ['input_1'], shape: [[1, 3, w, h]] 12 | # output_name: ['output_1'], shape: [[1, 3, w*4, h*4]] 13 | 'realesr-general-x4v3': { 14 | 'model_path': 'pretrain_models/sr_lib/realesr-general-x4v3-dynamic.onnx' 15 | }, 16 | # onnx will raise alloc memory error when big image input 17 | 'RealESRGAN_x4plus-dynamic': { 18 | 'model_path': 'pretrain_models/sr_lib/RealESRGAN_x4plus-dynamic.onnx' 19 | }, 20 | 'RealESRGAN_x2plus-dynamic': { 21 | 'model_path': 'pretrain_models/sr_lib/RealESRGAN_x2plus-dynamic.onnx' 22 | }, 23 | } 24 | 25 | 26 | class GFPGAN(ModelBase): 27 | def __init__(self, model_type='realesr-general-x4v3', provider='gpu'): 28 | super().__init__(MODEL_ZOO[model_type], provider) 29 | self.model_type = model_type 30 | self.input_std = 255 31 | self.input_mean = 0 32 | self.mod_pad_h = 0 33 | self.mod_pad_w = 0 34 | self.scale = 4 35 | 36 | def pad_for_scale_2(self, image_in_): 37 | self.scale = 2 38 | h, w, _ = image_in_.shape 39 | if h % self.scale != 0: 40 | self.mod_pad_h = (self.scale - h % self.scale) 41 | if w % self.scale != 0: 42 | self.mod_pad_w = (self.scale - w % self.scale) 43 | image_out_ = cv2.copyMakeBorder(image_in_, 0, self.mod_pad_h, 0, self.mod_pad_w, cv2.BORDER_REPLICATE) 44 | return image_out_ 45 | 46 | def forward(self, input_image): 47 | """ 48 | Args: 49 | input_image: cv2 image 0-255 BGR 50 | Returns: 51 | BGR 512x512x3 0-1 52 | """ 53 | if self.model_type == 'RealESRGAN_x2plus-dynamic': 54 | input_image = self.pad_for_scale_2(CVImage(input_image).bgr) 55 | ori_size = CVImage(input_image).bgr.shape[:2][::-1] 56 | # print(ori_size) 57 | image_in = CVImage(input_image).blob(ori_size, self.input_mean, self.input_std, rgb=True) 58 | image_out = self.model.forward(image_in) 59 | output_image = (image_out[0][0])[::-1].transpose(1, 2, 0).clip(0, 1) 60 | if self.model_type == 'RealESRGAN_x2plus-dynamic': 61 | output_h, output_w, _ = output_image.shape 62 | output_image = output_image[0:output_h - self.mod_pad_h * self.scale, 0:output_w - self.mod_pad_w * self.scale, :] 63 | # https://docs.opencv.org/4.x/da/d54/group__imgproc__transform.html 64 | output_image = CVImage(output_image).resize(ori_size, interpolation=cv2.INTER_LANCZOS4).bgr 65 | return output_image 66 | 67 | 68 | if __name__ == '__main__': 69 | face_img_p = 'resources/test1.jpg' 70 | fa = GFPGAN(model_type='RealESRGAN_x2plus-dynamic', provider='gpu') 71 | face = fa.forward(face_img_p) 72 | # CVImage(face, image_format='cv2').save('./gfpgan.jpg') 73 | CVImage(face, image_format='cv2').show() 74 | -------------------------------------------------------------------------------- /mocap_lib/middleware/VMCApi.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/2/14 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from osc4py3.as_eventloop import * # osc module 6 | from osc4py3 import oscbuildparse 7 | 8 | # import time 9 | 10 | LEFT_MPII_HAND_LABELS = [ 11 | 'LEFT_WRIST', # 0 12 | 'LEFT_THUMB_CMC', 'LEFT_THUMB_MCP', 'LEFT_THUMB_IP', 'LEFT_THUMB_TIP', 13 | 'LEFT_INDEX_FINGER_MCP', 'LEFT_INDEX_FINGER_PIP', 'LEFT_INDEX_FINGER_DIP', 'LEFT_INDEX_FINGER_TIP', 14 | 'LEFT_MIDDLE_FINGER_MCP', 'LEFT_MIDDLE_FINGER_PIP', 'LEFT_MIDDLE_FINGER_DIP', 'LEFT_MIDDLE_FINGER_TIP', 15 | 'LEFT_RING_FINGER_MCP', 'LEFT_RING_FINGER_PIP', 'LEFT_RING_FINGER_DIP', 'LEFT_RING_FINGER_TIP', 16 | 'LEFT_PINKY_MCP', 'LEFT_PINKY_PIP', 'LEFT_PINKY_DIP', 'LEFT_PINKY_TIP', 17 | ] 18 | 19 | RIGHT_MPII_HAND_LABELS = [ 20 | 'RIGHT_WRIST', # 0 21 | 'RIGHT_THUMB_CMC', 'RIGHT_THUMB_MCP', 'RIGHT_THUMB_IP', 'RIGHT_THUMB_TIP', 22 | 'RIGHT_INDEX_FINGER_MCP', 'RIGHT_INDEX_FINGER_PIP', 'RIGHT_INDEX_FINGER_DIP', 'RIGHT_INDEX_FINGER_TIP', 23 | 'RIGHT_MIDDLE_FINGER_MCP', 'RIGHT_MIDDLE_FINGER_PIP', 'RIGHT_MIDDLE_FINGER_DIP', 'RIGHT_MIDDLE_FINGER_TIP', 24 | 'RIGHT_RING_FINGER_MCP', 'RIGHT_RING_FINGER_PIP', 'RIGHT_RING_FINGER_DIP', 'RIGHT_RING_FINGER_TIP', 25 | 'RIGHT_PINKY_MCP', 'RIGHT_PINKY_PIP', 'RIGHT_PINKY_DIP', 'RIGHT_PINKY_TIP', 26 | ] 27 | 28 | LEFT_UNI_HAND_LABELS = [ 29 | 'LEFT_WRIST', # 0 30 | 'LEFT_THUMB_CMC', 'LeftThumbProximal', 'LeftThumbIntermediate', 'LeftThumbDistal', 31 | 'LEFT_INDEX_FINGER_MCP', 'LeftIndexProximal', 'LeftIndexIntermediate', 'LeftIndexDistal', 32 | 'LEFT_MIDDLE_FINGER_MCP', 'LeftMiddleProximal', 'LeftMiddleIntermediate', 'LeftMiddleDistal', 33 | 'LEFT_RING_FINGER_MCP', 'LeftRingProximal', 'LeftRingIntermediate', 'LeftRingDistal', 34 | 'LEFT_PINKY_MCP', 'LeftLittleProximal', 'LeftLittleIntermediate', 'LeftLittleDistal', 35 | ] 36 | 37 | RIGHT_UNI_HAND_LABELS = [ 38 | 'Right_WRIST', # 0 39 | 'Right_THUMB_CMC', 'RightThumbProximal', 'RightThumbIntermediate', 'RightThumbDistal', 40 | 'Right_INDEX_FINGER_MCP', 'RightIndexProximal', 'RightIndexIntermediate', 'RightIndexDistal', 41 | 'Right_MIDDLE_FINGER_MCP', 'RightMiddleProximal', 'RightMiddleIntermediate', 'RightMiddleDistal', 42 | 'Right_RING_FINGER_MCP', 'RightRingProximal', 'RightRingIntermediate', 'RightRingDistal', 43 | 'Right_PINKY_MCP', 'RightLittleProximal', 'RightLittleIntermediate', 'RightLittleDistal', 44 | ] 45 | 46 | 47 | class VMCApi: 48 | def __init__(self, ip_address, ip_port): 49 | # ip = '192.168.4.13' # ip address 50 | # port = 39539 # port number 51 | 52 | osc_startup() # starts osc protocol 53 | osc_udp_client(ip_address, ip_port, "VroidPoser") # initializes osc client 54 | 55 | def sendosc(self, bone, x, y, z, w): # condensed OSC message function 56 | 57 | msg = oscbuildparse.OSCMessage("/VMC/Ext/Bone/Pos", None, 58 | [bone, float(0), float(0), float(0), float(x), 59 | float(y), float(z), float(w)]) 60 | # print(msg) 61 | osc_send(msg, "VroidPoser") 62 | osc_process() 63 | -------------------------------------------------------------------------------- /data_lib/dataset_preprocess/gen_dataset_thumbnail.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/1/7 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | import os 7 | 8 | import PIL.Image as Image 9 | from cv2box.utils.util import get_path_by_ext 10 | 11 | 12 | def resize_by_width(infile, image_size): 13 | """按照宽度进行所需比例缩放""" 14 | im = Image.open(infile) 15 | (x, y) = im.size 16 | lv = round(x / image_size, 2) + 0.01 17 | x_s = int(x // lv) 18 | y_s = int(y // lv) 19 | print("x_s", x_s, y_s) 20 | out = im.resize((x_s, y_s), Image.ANTIALIAS) 21 | return out 22 | 23 | 24 | def get_new_img_xy(infile, image_size): 25 | """返回一个图片的宽、高像素""" 26 | im = Image.open(infile) 27 | (x, y) = im.size 28 | lv = round(x / image_size, 2) + 0.01 29 | x_s = x // lv 30 | y_s = y // lv 31 | # print("x_s", x_s, y_s) 32 | # out = im.resize((x_s, y_s), Image.ANTIALIAS) 33 | return x_s, y_s 34 | 35 | 36 | # 定义图像拼接函数 37 | def image_compose(image_colnum, image_size, image_rownum, image_names, image_save_path, x_new, y_new): 38 | to_image = Image.new('RGB', (image_colnum * x_new, image_rownum * y_new)) # 创建一个新图 39 | # 循环遍历,把每张图片按顺序粘贴到对应位置上 40 | total_num = 0 41 | for y in range(1, image_rownum + 1): 42 | for x in range(1, image_colnum + 1): 43 | from_image = resize_by_width(image_names[image_colnum * (y - 1) + x - 1], image_size) 44 | # from_image = Image.open(image_names[image_colnum * (y - 1) + x - 1]).resize((image_size,image_size ), Image.ANTIALIAS) 45 | to_image.paste(from_image, ((x - 1) * x_new, (y - 1) * y_new)) 46 | total_num += 1 47 | if total_num == len(image_names): 48 | break 49 | return to_image.save(image_save_path) # 保存新图 50 | 51 | 52 | def merge_images(image_dir_path,image_size,image_colnum): 53 | # 获取图片集地址下的所有图片名称 54 | image_fullpath_list = get_path_by_ext(image_dir_path)[:100] 55 | print("image_fullpath_list", len(image_fullpath_list), image_fullpath_list) 56 | 57 | image_save_path = r'{}_thumbnail.jpg'.format(image_dir_path) # 图片转换后的地址 58 | # image_rownum = 4 # 图片间隔,也就是合并成一张图后,一共有几行 59 | image_rownum_yu = len(image_fullpath_list) % image_colnum 60 | if image_rownum_yu == 0: 61 | image_rownum = len(image_fullpath_list) // image_colnum 62 | else: 63 | image_rownum = len(image_fullpath_list) // image_colnum + 1 64 | 65 | x_list = [] 66 | y_list = [] 67 | for img_file in image_fullpath_list: 68 | img_x, img_y = get_new_img_xy(str(img_file), image_size) 69 | x_list.append(img_x) 70 | y_list.append(img_y) 71 | 72 | print("x_list", sorted(x_list)) 73 | print("y_list", sorted(y_list)) 74 | x_new = int(x_list[len(x_list) // 5 * 4]) 75 | y_new = int(x_list[len(y_list) // 5 * 4]) 76 | image_compose(image_colnum, image_size, image_rownum, image_fullpath_list, image_save_path, x_new, y_new) # 调用函数 77 | 78 | 79 | if __name__ == '__main__': 80 | image_dir_path = '' # 图片集地址 81 | image_size = 128 # 每张小图片的大小 82 | image_colnum = 10 # 合并成一张图后,一行有几个小图 83 | merge_images(image_dir_path, image_size, image_colnum) 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /seg_lib/cihp_pgn/cihp_pgn_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/6/28 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from apstone import ModelBase 6 | import numpy as np 7 | from cv2box import CVImage 8 | from PIL import Image 9 | 10 | """ 11 | input_name:['create_inputs/sub:0'], shape:[['unk__40886', 'unk__40887', 3]] 12 | output_name:['ExpandDims_1:0', 'Max:0', 'Sigmoid:0'], shape:[[1, 'unk__40888', 'unk__40889', 1], [1, 'unk__40890', 'unk__40891'], [1, 'unk__40892', 'unk__40893', 1]] 13 | """ 14 | MODEL_ZOO = { 15 | # https://github.com/Engineering-Course/CIHP_PGN 16 | 'cihp_pgn': { 17 | 'model_path': 'pretrain_models/seg_lib/cihp_pgn/cihp_pgn.onnx' 18 | }, 19 | } 20 | 21 | label_colours = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0), (0, 0, 85), (0, 119, 221), 22 | (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128), (0, 128, 0), (0, 0, 255), (51, 170, 221), 23 | (0, 255, 255), (85, 255, 170), (170, 255, 85), (255, 255, 0), (255, 170, 0)] 24 | N_CLASSES = 20 25 | 26 | 27 | def decode_labels(mask, num_images=1, num_classes=21): 28 | """Decode batch of segmentation masks. 29 | 30 | Args: 31 | mask: result of inference after taking argmax. 32 | num_images: number of images to decode from the batch. 33 | num_classes: number of classes to predict (including background). 34 | 35 | Returns: 36 | A batch with num_images RGB images of the same size as the input. 37 | """ 38 | n, h, w, c = mask.shape 39 | assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % ( 40 | n, num_images) 41 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) 42 | for i in range(num_images): 43 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i]))) 44 | pixels = img.load() 45 | for j_, j in enumerate(mask[i, :, :, 0]): 46 | for k_, k in enumerate(j): 47 | if k < num_classes: 48 | pixels[k_, j_] = label_colours[k] 49 | outputs[i] = np.array(img) 50 | return outputs 51 | 52 | 53 | class CIHPPGN(ModelBase): 54 | def __init__(self, model_name='cihp_pgn', provider='gpu'): 55 | super(CIHPPGN, self).__init__(MODEL_ZOO[model_name], provider) 56 | self.mean = [125.0, 114.4, 107.9] 57 | self.std = [1, 1, 1] 58 | 59 | def forward(self, image_in): 60 | input_size_ = CVImage(image_in).bgr.shape[:2] 61 | input_image = CVImage(image_in).blob_innormal(input_size_, input_mean=self.mean, input_std=self.std) 62 | # h,w,3 63 | input_image = input_image[0].transpose(1, 2, 0) 64 | parsing_, scores, edge_ = self.model.forward(input_image) 65 | mask_ = decode_labels(parsing_, num_classes=N_CLASSES) 66 | return mask_[0], parsing_[0].astype(np.uint8), (edge_[0] * 255).astype(np.uint8) 67 | 68 | 69 | if __name__ == '__main__': 70 | cihp = CIHPPGN(model_name='cihp_pgn', provider='gpu') 71 | 72 | img_p = 'resources/for_pose/girl_640x480.jpg' 73 | # decrease size to reduce GPU mem 74 | img_p = CVImage(img_p).resize((320, 180)).bgr 75 | mask, parsing, edge = cihp.forward(img_p) 76 | print(mask.shape) 77 | print(parsing.shape) 78 | print(edge.shape) 79 | CVImage(mask).show() 80 | CVImage(parsing).show() 81 | CVImage(edge).show() 82 | -------------------------------------------------------------------------------- /hand_lib/hand_detector/hand_detector_mediapipe/hand_detector_mediapipe.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/2/23 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | import mediapipe as mp 7 | import numpy as np 8 | from cv2box import CVImage, MyFpsCounter, mfc 9 | 10 | 11 | class MediapipeHand: 12 | def __init__(self): 13 | # mp_drawing = mp.solutions.drawing_utils 14 | # mp_drawing_styles = mp.solutions.drawing_styles 15 | self.mp_hands = mp.solutions.hands.Hands( 16 | static_image_mode=True, 17 | model_complexity=1, 18 | max_num_hands=1, 19 | min_detection_confidence=0) 20 | 21 | # @mfc('mediapipe') 22 | def forward(self, image): 23 | # height, width = image.shape[0], image.shape[1] 24 | 25 | image.flags.writeable = False 26 | 27 | results = self.mp_hands.process(image) 28 | 29 | try: 30 | hand = results.multi_hand_world_landmarks[0] 31 | multi_handedness = results.multi_handedness 32 | except TypeError: 33 | return None, None, None 34 | # print(len(hand)) 35 | # print(multi_handedness[0].classification[0].label) 36 | hand_np = [] 37 | for i in range(21): 38 | hand_np.append( 39 | [hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z]) 40 | return hand_np, multi_handedness[0].classification[0].label, multi_handedness[0].classification[0].score 41 | 42 | 43 | class MediapipeHolistic: 44 | def __init__(self): 45 | # mp_drawing = mp.solutions.drawing_utils 46 | # mp_drawing_styles = mp.solutions.drawing_styles 47 | self.mp_holistic = mp.solutions.holistic 48 | 49 | def forward(self, image): 50 | 51 | height, width = image.shape[0], image.shape[1] 52 | 53 | with self.mp_holistic.Holistic( 54 | model_complexity=1, 55 | min_detection_confidence=0.5, 56 | min_tracking_confidence=0.5) as holistic: 57 | # To improve performance, optionally mark the image as not writeable to 58 | # pass by reference. 59 | image.flags.writeable = False 60 | 61 | results = holistic.process(image) 62 | 63 | left_hand = results.left_hand_landmarks 64 | right_hand = results.right_hand_landmarks 65 | results = [] 66 | for hand in [right_hand, left_hand]: 67 | if hand is not None: 68 | hand_np = [] 69 | for i in range(21): 70 | hand_np.append( 71 | [hand.landmark[i].x * width, hand.landmark[i].y * height, hand.landmark[i].z * width]) 72 | box_left_top_x = np.min(hand_np, axis=0)[0] 73 | box_left_top_y = np.min(hand_np, axis=0)[1] 74 | box_right_bottle_x = np.max(hand_np, axis=0)[0] 75 | box_right_bottle_y = np.max(hand_np, axis=0)[1] 76 | results.append([box_left_top_x, box_left_top_y, box_right_bottle_x, box_right_bottle_y]) 77 | return np.array(results) 78 | 79 | 80 | if __name__ == '__main__': 81 | image_in = CVImage('').rgb 82 | # [[1113.7602996826172, 539.147379398346, 1374.1822814941406, 850.5021500587463]] 83 | # CVImage('').show() 84 | hdm = MediapipeHand() 85 | print(hdm.forward(image_in)) 86 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_origin.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import numpy as np 6 | from cv2box import CVImage, MyFpsCounter 7 | 8 | from apstone import ONNXModel 9 | from body_lib.body_kp_detector.blazepose_mediapipe.blaze_utils import postprocess, denormalize_landmarks, detection2roi, \ 10 | extract_roi 11 | from body_lib.body_kp_detector.blazepose_mediapipe.body_bbox_detector import BodyDetector 12 | 13 | # input 1*256*256*3 output , 1*1 , , , 14 | LITE_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_lite.onnx' 15 | FULL_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_full.onnx' 16 | HEAVY_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_heavy.onnx' 17 | 18 | 19 | class LandmarkDetectorOrigin: 20 | def __init__(self, model_complexity=0, provider='gpu'): 21 | self.bd = BodyDetector(provider=provider) 22 | 23 | model_path_list = [LITE_MODEL, FULL_MODEL, HEAVY_MODEL] 24 | self.model = ONNXModel(model_path_list[model_complexity], provider=provider) 25 | 26 | def forward(self, image_in_, show=False): 27 | """ 28 | 29 | Args: 30 | image_in_: 31 | 32 | Returns: 33 | landmarks: 33*4 34 | 35 | """ 36 | filtered_detections = self.bd.forward(image_in_) 37 | if show: 38 | print(filtered_detections) 39 | if filtered_detections.shape == (0, 13): 40 | return np.zeros((33, 4)) 41 | elif len(filtered_detections) > 1: 42 | filtered_detections = filtered_detections[0].reshape(1, 13) 43 | 44 | xc, yc, scale, theta = detection2roi(filtered_detections, detection2roi_method='alignment') 45 | img, affine, box = extract_roi(CVImage(image_in_).bgr, xc, yc, theta, scale) 46 | if show: 47 | CVImage(img[0]).show(0, 'img_in') 48 | normalized_landmarks, f, _, _, _ = self.model.forward(img.astype(np.float32)) 49 | normalized_landmarks = postprocess(normalized_landmarks) 50 | landmarks_ = denormalize_landmarks(normalized_landmarks, affine)[0] 51 | 52 | # CVImage(img[0].cpu().numpy().transpose(2, 1, 0)).show() 53 | # print(normalized_landmarks) 54 | if show: 55 | show_img = CVImage(image_in_).draw_landmarks(landmarks_) 56 | CVImage(show_img).show(0, 'results') 57 | 58 | return landmarks_ 59 | 60 | 61 | if __name__ == '__main__': 62 | image_path = 'resources/for_pose/girl_640x480.jpg' 63 | image_in = CVImage(image_path).bgr 64 | 65 | """ 66 | model 0 67 | gpu 70fps trt 133-196fps trt16 235-278fps t_pose_1500x 68 | gpu trt 221fps trt16 269fps t_pose_1080p 69 | model 1 282fps 70 | """ 71 | ld = LandmarkDetectorOrigin(model_complexity=2, provider='gpu') 72 | 73 | # landmarks = ld.forward(image_in, show=True) 74 | # print(landmarks) 75 | 76 | # with MyFpsCounter('model forward 10 times fps: ') as mfc: 77 | # for i in range(10): 78 | # filtered_detections = ld.forward(image_in) 79 | 80 | # video tracking test 81 | from cv2box import CVVideoLoader 82 | with CVVideoLoader('') as cvvl: 83 | for _ in range(len(cvvl)): 84 | _, frame = cvvl.get() 85 | landmarks = ld.forward(frame, show=True) 86 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/body_kp_detector_kapao/body_kp_detector_kapao.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/29 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | model from https://github.com/wmcnally/kapao 7 | """ 8 | import torch 9 | from cv2box import CVImage, MyFpsCounter, mfc 10 | import numpy as np 11 | from torchvision import transforms 12 | 13 | from apstone.wrappers.mmlab_wrapper import KpDetectorBase 14 | from body_lib.body_kp_detector.body_kp_detector_kapao.utils import non_max_suppression_kp, post_process_batch, letterbox 15 | 16 | MODEL_ZOO = { 17 | # gpu 30fps trt 39fps trt16 43fps 18 | # input_name:['actual_input_1'], shape:[[1, 3, 768, 1280]] 19 | # output_name:['output1'], shape:[[1, 61200, 57]] 20 | 'kapao_s_coco_1080': { 21 | 'model_path': 'pretrain_models/body_lib/body_kp_detector/kapao/kapao_s_coco_static_1280x768.onnx', 22 | 'model_input_size': (1280, 768) 23 | }, 24 | } 25 | 26 | 27 | class KaPao(KpDetectorBase): 28 | def __init__(self, model, provider): 29 | super().__init__(MODEL_ZOO[model], provider) 30 | self.origin_shape = None 31 | 32 | def preprocess(self, image_in_, bbox_, mirror=False): 33 | image_in_ = CVImage(image_in_).bgr 34 | # Padded resize 35 | image_in_ = letterbox(image_in_, self.model_input_size[::-1], stride=64)[0] 36 | 37 | if self.model_type == 'trt': 38 | transform = transforms.Compose([ 39 | transforms.ToTensor(), 40 | ]) 41 | image_in_ = dict(input=CVImage(image_in_.astype(np.float32)).tensor(transform).cuda()) 42 | else: 43 | # HWC -> CHW BGR -> RGB 44 | image_in_ = image_in_.astype(np.float32).transpose(2, 0, 1)[::-1][np.newaxis, :]/255 45 | 46 | return image_in_ 47 | 48 | @mfc('postprocess') 49 | def postprocess(self, model_results): 50 | kp_flip = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 51 | data = {'num_coords': 34, 'use_kp_dets': True, 'conf_thres_kp_person': 0.3, 52 | 'overwrite_tol': 50, 'count_fused': False} 53 | # lazy to rewrite torch nms to numpy 54 | model_results = torch.Tensor(model_results[0]) 55 | person_dets = non_max_suppression_kp(model_results, conf_thres=0.7, iou_thres=0.45, 56 | classes=[0], 57 | num_coords=34) 58 | kp_dets = non_max_suppression_kp(model_results, conf_thres=0.5, iou_thres=0.45, 59 | classes=list(range(1, 1 + len(kp_flip))), 60 | num_coords=34) 61 | _, poses, _, _, _ = post_process_batch(data, self.model_input_size, self.origin_shape, person_dets, kp_dets) 62 | 63 | return poses[0] 64 | 65 | def forward(self, image_in_, show=False, max_bbox_num=1): 66 | self.origin_shape = image_in_.shape 67 | model_results = self.model.forward(self.preprocess(image_in_, None)) 68 | results_after = self.postprocess(model_results) 69 | if show: 70 | self.show(image_in_, results_after) 71 | return results_after 72 | 73 | 74 | if __name__ == '__main__': 75 | 76 | image_path = 'resources/for_pose/t_pose_1080p.jpeg' 77 | image_in = CVImage(image_path).bgr 78 | kp = KaPao(model='kapao_s_coco_1080', provider='gpu') 79 | 80 | kps = kp.forward(image_in, show=True, max_bbox_num=3) 81 | 82 | -------------------------------------------------------------------------------- /mocap_lib/body_wholebody/mediapipe_holistic.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/6/6 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | import numpy as np 7 | import mediapipe as mp 8 | from cv2box import CVImage, CVVideoLoader 9 | import cv2 10 | from tqdm import tqdm 11 | 12 | 13 | class MediapipeHolistic: 14 | def __init__(self): 15 | self.mp_drawing = mp.solutions.drawing_utils 16 | self.mp_drawing_styles = mp.solutions.drawing_styles 17 | self.mp_holistic = mp.solutions.holistic 18 | self.holistic = self.mp_holistic.Holistic( 19 | # model_complexity=2, 20 | smooth_landmarks=True, 21 | # refine_face_landmarks=True, 22 | min_detection_confidence=0.5, 23 | min_tracking_confidence=0.5) 24 | 25 | def draw_show(self, image_in_, results): 26 | self.mp_drawing.draw_landmarks( 27 | image_in_, 28 | results.face_landmarks, 29 | self.mp_holistic.FACEMESH_CONTOURS, 30 | landmark_drawing_spec=None, 31 | connection_drawing_spec=self.mp_drawing_styles.get_default_face_mesh_contours_style()) 32 | self.mp_drawing.draw_landmarks( 33 | image_in_, 34 | results.pose_landmarks, 35 | self.mp_holistic.POSE_CONNECTIONS, 36 | landmark_drawing_spec=self.mp_drawing_styles.get_default_pose_landmarks_style()) 37 | self.mp_drawing.draw_landmarks( 38 | image_in_, 39 | results.left_hand_landmarks, 40 | self.mp_holistic.HAND_CONNECTIONS, 41 | landmark_drawing_spec=self.mp_drawing_styles.get_default_hand_landmarks_style()) 42 | self.mp_drawing.draw_landmarks( 43 | image_in_, 44 | results.right_hand_landmarks, 45 | self.mp_holistic.HAND_CONNECTIONS, 46 | landmark_drawing_spec=self.mp_drawing_styles.get_default_hand_landmarks_style()) 47 | CVImage(image_in_).show(wait_time=1) 48 | # CVImage(image).save( 49 | # '/{}.jpg'.format( 50 | # i), create_path=True) 51 | 52 | @staticmethod 53 | def result_convert(results_in, image_in_shape): 54 | results_out = [] 55 | for i in range(len(results_in)): 56 | # print(results_in[i].x) 57 | results_out.append([results_in[i].x * image_in_shape[1], results_in[i].y * image_in_shape[0], 1.]) 58 | return results_out 59 | 60 | def forward(self, image_in_, draw_show=False): 61 | image_in_ = CVImage(image_in_).rgb 62 | image_in_.flags.writeable = False 63 | results = self.holistic.process(image_in_) 64 | image_in_.flags.writeable = True 65 | image_in_ = cv2.cvtColor(image_in_, cv2.COLOR_RGB2BGR) 66 | 67 | if draw_show: 68 | self.draw_show(image_in_, results) 69 | 70 | body_kp = self.result_convert(results.pose_landmarks.landmark, image_in_.shape) 71 | left_hd_kp = self.result_convert(results.left_hand_landmarks.landmark, image_in_.shape) 72 | right_hd_kp = self.result_convert(results.right_hand_landmarks.landmark, image_in_.shape) 73 | 74 | return body_kp, left_hd_kp, right_hd_kp 75 | 76 | 77 | if __name__ == '__main__': 78 | mh = MediapipeHolistic() 79 | 80 | with CVVideoLoader( 81 | '') as cvvl: 82 | for i in tqdm(range(len(cvvl))): 83 | _, image = cvvl.get() 84 | body_kp, left_h, right_h = mh.forward(image) 85 | print(len(left_h)) 86 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/body_detector_movenet/movenet_api_onnx.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/16 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from cv2box import CVImage, CVVideoLoader 7 | import numpy as np 8 | from tqdm import tqdm 9 | from body_lib.body_kp_detector.body_detector_movenet.movenet_utils import crop_and_resize, init_crop_region, \ 10 | determine_crop_region, \ 11 | draw_prediction_on_image 12 | from apstone import ONNXModel 13 | 14 | # https://tfhub.dev/s?q=movenet 15 | 16 | class MoveNet: 17 | def __init__(self, image_height, image_width): 18 | self.crop_region = init_crop_region(image_height, image_width) 19 | 20 | # ONNX 21 | self.movenet = ONNXModel( 22 | 'pretrain_models/digital_human/body_detector_movenet/movenet_singlepose_thunder_4.onnx') 23 | 24 | def forward(self, image): 25 | image_height, image_width, _ = image.shape 26 | 27 | image = crop_and_resize( 28 | np.expand_dims(image, axis=0), self.crop_region, crop_size=(256, 256)) 29 | 30 | input_image = np.array(image, dtype=np.int32) 31 | 32 | outputs = self.movenet.forward(input_image) 33 | # Output is a [1, 1, 17, 3] tensor. 34 | # keypoints_with_scores = np.array(outputs['output_0']) 35 | keypoints_with_scores = np.array(outputs[0]) 36 | 37 | # print(keypoints_with_scores) 38 | 39 | for idx in range(17): 40 | keypoints_with_scores[0, 0, idx, 0] = ( 41 | self.crop_region['y_min'] * image_height + 42 | self.crop_region['height'] * image_height * 43 | keypoints_with_scores[0, 0, idx, 0]) / image_height 44 | keypoints_with_scores[0, 0, idx, 1] = ( 45 | self.crop_region['x_min'] * image_width + 46 | self.crop_region['width'] * image_width * 47 | keypoints_with_scores[0, 0, idx, 1]) / image_width 48 | self.crop_region = determine_crop_region( 49 | keypoints_with_scores, image_height, image_width) 50 | return keypoints_with_scores, self.crop_region 51 | 52 | 53 | if __name__ == '__main__': 54 | 55 | mn = MoveNet(image_height=1920, image_width=1080) 56 | 57 | with CVVideoLoader('') as cvvl: 58 | for i in tqdm(range(len(cvvl))): 59 | _, image_bgr = cvvl.get() 60 | 61 | keypoints_with_scores, crop_region = mn.forward(image_bgr) 62 | 63 | # Visualize the predictions with image. 64 | display_image = np.expand_dims(image_bgr, axis=0) 65 | # display_image = tf.cast(tf.image.resize_with_pad( 66 | # display_image, 1080, 1080), dtype=tf.int32) 67 | output_overlay = draw_prediction_on_image( 68 | np.squeeze(display_image, axis=0), keypoints_with_scores, crop_region=crop_region) 69 | 70 | CVImage(output_overlay).save( 71 | ''.format(i), create_path=True) 72 | 73 | # kp_list = [] 74 | # for kp in kps: 75 | # kp_list.append([kp[1] * 1080, kp[0] * 1920]) 76 | # 77 | # image_bgr = cv2.drawKeypoints(image_bgr, cv2.KeyPoint_convert(kp_list), None, color=(0, 0, 255), flags=0) 78 | # 79 | # CVImage(output_overlay).show(1) 80 | -------------------------------------------------------------------------------- /audio_lib/svc/sovits_infer.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/5/25 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | https://github.com/voicepaw/so-vits-svc-fork 7 | svc infer -c logs/44k/config.json -m logs/44k/G_2400.pth "*.wav" 8 | """ 9 | 10 | import json 11 | import os 12 | import subprocess 13 | from pathlib import Path 14 | 15 | # import gradio as gr 16 | import librosa 17 | import numpy as np 18 | import torch 19 | from demucs.apply import apply_model 20 | from demucs.pretrained import DEFAULT_MODEL, get_model 21 | from huggingface_hub import hf_hub_download, list_repo_files 22 | import soundfile as sf 23 | 24 | from so_vits_svc_fork.hparams import HParams 25 | from so_vits_svc_fork.inference.core import Svc 26 | 27 | # Limit on duration of audio at inference time. increase if you can 28 | # In this parent app, we set the limit with an env var to 30 seconds 29 | # If you didnt set env var + you go OOM try changing 9e9 to <=300ish 30 | duration_limit = int(os.environ.get("MAX_DURATION_SECONDS", 9e9)) 31 | 32 | 33 | class SoVits: 34 | def __init__(self, generator_path, config_path, cluster_model_path): 35 | hparams = HParams(**json.loads(Path(config_path).read_text())) 36 | self.speaker = list(hparams.spk.keys())[0] 37 | device = "cuda" if torch.cuda.is_available() else "cpu" 38 | self.model = Svc(net_g_path=generator_path, config_path=config_path, device=device, 39 | cluster_model_path=cluster_model_path) 40 | 41 | def forward(self, 42 | audio, 43 | output_path, 44 | transpose: int = 0, 45 | auto_predict_f0: bool = False, 46 | cluster_infer_ratio: float = 0, 47 | noise_scale: float = 0.4, 48 | f0_method: str = "crepe", 49 | db_thresh: int = -40, 50 | pad_seconds: float = 0.5, 51 | chunk_seconds: float = 0.5, 52 | absolute_thresh: bool = False, 53 | ): 54 | audio, _ = librosa.load(audio, sr=self.model.target_sample, duration=duration_limit) 55 | audio = self.model.infer_silence( 56 | audio.astype(np.float32), 57 | speaker=self.speaker, 58 | transpose=transpose, 59 | auto_predict_f0=auto_predict_f0, 60 | cluster_infer_ratio=cluster_infer_ratio, 61 | noise_scale=noise_scale, 62 | f0_method=f0_method, 63 | db_thresh=db_thresh, 64 | pad_seconds=pad_seconds, 65 | chunk_seconds=chunk_seconds, 66 | absolute_thresh=absolute_thresh, 67 | ) 68 | 69 | sf.write(output_path, audio, self.model.target_sample, 'PCM_24') 70 | return audio 71 | 72 | 73 | if __name__ == "__main__": 74 | generator_path = './G_329600.pth' 75 | config_path = "./config.json" 76 | cluster_model_path = None 77 | sv = SoVits(generator_path, config_path, cluster_model_path) 78 | 79 | input_path = 'test.wav' 80 | 81 | # output_path = input_path.replace('.wav', '_swap.wav') 82 | # _ = sv.forward(input_path, output_path, auto_predict_f0=False) 83 | 84 | for f0_predict_method in ['crepe', 'parselmouth', 'dio', 'harvest']: 85 | output_path = input_path.replace('.wav', f'_swap_auto_predict_{f0_predict_method}.wav') 86 | _ = sv.forward(input_path, output_path, auto_predict_f0=True, f0_method=f0_predict_method) 87 | 88 | for i in range(-2, 2, 4): 89 | output_path = input_path.replace('.wav', f'_swap_t_{i}.wav') 90 | _ = sv.forward(input_path, output_path, auto_predict_f0=False, transpose=i) 91 | -------------------------------------------------------------------------------- /gpt_lib/lora_finetune/chatglm6b_lora_deepspeed.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/4/20 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import os 6 | import tqdm 7 | import json 8 | import torch 9 | import loralib as lora 10 | # import lora_utils.insert_lora 11 | # import dataset.GLM as GLM_Data 12 | from torch.utils.data import DataLoader 13 | from transformers import AutoTokenizer, AutoModel 14 | from accelerate import Accelerator, DeepSpeedPlugin 15 | from transformers import get_linear_schedule_with_warmup 16 | 17 | checkpoint = "THUDM/chatglm-6b" 18 | mixed_precision = 'bf16' 19 | lora_config = { 20 | 'r': 32, 21 | 'lora_alpha': 32, 22 | 'lora_dropout': 0.1, 23 | 'enable_lora': [True, True, True], 24 | } 25 | max_length = 256 26 | LR = 2e-5 27 | NUM_EPOCHS = 2 28 | batch = 1 29 | accumulate_step = 8 30 | warm_up_ratio = 0.1 31 | 32 | tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True, revision='main') 33 | model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True, revision='main') 34 | model = lora_utils.insert_lora.get_lora_model(model, lora_config) 35 | 36 | deepspeed_plugin = DeepSpeedPlugin(zero_stage=2, gradient_accumulation_steps=accumulate_step) 37 | accelerator = Accelerator(mixed_precision=mixed_precision, gradient_accumulation_steps=accumulate_step, 38 | deepspeed_plugin=deepspeed_plugin) 39 | device = accelerator.device 40 | GLM_Data.device = device 41 | 42 | import dataset.Alpaca as Alpaca_Data 43 | 44 | pairs = Alpaca_Data.load('./data/alpaca_data.json') 45 | pairs_encoded = GLM_Data.encode_pairs(pairs, tokenizer) 46 | pairs_encoded = list(filter(lambda pair: len(pair['prompt']) + len(pair['completion']) <= max_length, pairs_encoded)) 47 | train_dataset = GLM_Data.GLMDataset(pairs_encoded) 48 | train_dataloader = DataLoader(dataset=train_dataset, collate_fn=GLM_Data.collate_fn, shuffle=True, batch_size=batch) 49 | 50 | optimizer = torch.optim.AdamW(model.parameters(), lr=LR) 51 | 52 | lr_scheduler = get_linear_schedule_with_warmup( 53 | optimizer=optimizer, 54 | num_warmup_steps=int(len(train_dataloader) / accumulate_step * warm_up_ratio), 55 | num_training_steps=(int(len(train_dataloader) / accumulate_step) * NUM_EPOCHS), 56 | ) 57 | 58 | model, optimizer, train_dataloader = accelerator.prepare(model, optimizer, train_dataloader) 59 | model.to(device).train() 60 | 61 | total_step = 0 62 | effective_step = 0 63 | 64 | for epoch in range(NUM_EPOCHS): 65 | epoch_loss_local = 0 66 | for step, batch in enumerate(t := tqdm.tqdm(train_dataloader)): 67 | outputs = model(**batch) 68 | loss_d = outputs.loss.detach() 69 | epoch_loss_local += loss_d 70 | t.set_description(f"loss: {epoch_loss_local.cpu().float() / step}") 71 | loss = outputs.loss / accumulate_step 72 | accelerator.backward(loss) 73 | if (step + 1) % accumulate_step == 0: 74 | optimizer.step() 75 | lr_scheduler.step() 76 | optimizer.zero_grad() 77 | 78 | accelerator.wait_for_everyone() 79 | all_epoch_loss, all_step = accelerator.gather((epoch_loss_local, torch.tensor(step, device=device))) 80 | 81 | if accelerator.is_main_process: 82 | model_id = f"finetune_{epoch}" 83 | accelerator.save(lora.lora_state_dict(accelerator.unwrap_model(model)), '/saved/' + model_id + '.pt') 84 | 85 | epoch_loss = all_epoch_loss.float().sum() / (all_step + 1).sum() 86 | total_step += (all_step + 1).sum() 87 | effective_step += ((all_step + 1) // accumulate_step).sum() 88 | print(f'epoch: {epoch}, step {effective_step.cpu().numpy()}, training_loss: {epoch_loss.cpu().numpy()}') 89 | 90 | accelerator.wait_for_everyone() -------------------------------------------------------------------------------- /mocap_lib/visualize/poseviz_demo/holistic_demo.py: -------------------------------------------------------------------------------- 1 | import poseviz 2 | import numpy as np 3 | import mediapipe as mp 4 | from cv2box import CVImage 5 | 6 | mp_drawing = mp.solutions.drawing_utils 7 | mp_drawing_styles = mp.solutions.drawing_styles 8 | mp_holistic = mp.solutions.holistic 9 | 10 | 11 | def main(): 12 | joint_names = ['nose', 13 | 'left_eye_inner', 'left_eye', 'left_eye_outer', 14 | "right_eye_inner", "right_eye", "right_eye_outer", 15 | "left_ear", "right_ear", 16 | "mouth_left", "mouth_right", 17 | "left_shoulder", "right_shoulder", 18 | "left_elbow", "right_elbow", 19 | "left_wrist", "right_wrist", 20 | "left_pinky", "right_pinky", 21 | "left_index", "right_index", 22 | "left_thumb", "right_thumb", 23 | "left_hip", "right_hip", 24 | "left_knee", "right_knee", 25 | "left_ankle", "right_ankle", 26 | "left_heel", "right_heel", 27 | "left_foot_index", "right_foot_index"] 28 | # joint_edges = [[0, 1], [0, 4], [1, 2], [2, 3], [3, 7], [4, 5], [5, 6], [6, 8], [9, 10], [18, 20], [20, 16], 29 | # [18, 16], [16, 22], [16, 14], [14, 12], [12, 11], [11, 13], [13, 15], [15, 21], [15, 17], [17, 19], 30 | # [12, 24], [11, 23], [23, 24], [24, 26], [23, 25], [26, 28], [25, 27], [28, 32], [28, 30], [30, 32], 31 | # [27, 29], [27, 31], [29, 31]] 32 | 33 | viz = poseviz.PoseViz(joint_names, mp_holistic.HAND_CONNECTIONS, world_up=(0, -1, 0)) 34 | 35 | with mp_holistic.Holistic( 36 | model_complexity=1, 37 | min_detection_confidence=0.5, 38 | min_tracking_confidence=0.5) as holistic: 39 | image = CVImage('').rgb 40 | height = image.shape[0] 41 | width = image.shape[1] 42 | print(height, width) 43 | results = holistic.process(image) 44 | # pose_33 = results.pose_world_landmarks.landmark 45 | right_hand_21 = results.right_hand_landmarks.landmark 46 | # pose_33 = results.pose_world_landmarks.landmark 47 | # pose_33_np = [] 48 | right_hand_21_np = [] 49 | # for i in range(33): 50 | # pose_33_np.append([pose_33[i].x*1000, pose_33[i].y*1000, pose_33[i].z*1000+3000]) 51 | 52 | for i in range(21): 53 | right_hand_21_np.append( 54 | [right_hand_21[i].x * 5 * width - (width // 2), right_hand_21[i].y * 5 * height - (height // 2), 55 | right_hand_21[i].z * width * 5]) 56 | 57 | print(right_hand_21_np) 58 | # center_x = pose_33_np[23][0] - pose_33_np[24][0] 59 | # center_y = pose_33_np[23][1] - pose_33_np[24][1] 60 | # center_z = pose_33_np[24][2] + (pose_33_np[23][2] - pose_33_np[24][2]) 61 | # print(center_x, center_y, center_z) 62 | 63 | # mp_drawing.plot_landmarks( 64 | # results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) 65 | 66 | # # Iterate over the frames of e.g. a video 67 | for i in range(1): 68 | # # Get the current frame 69 | # frame = np.zeros([512, 512, 3], np.uint8) 70 | frame = image 71 | 72 | # Make predictions here 73 | # ... 74 | 75 | # Update the visualization 76 | viz.update( 77 | frame=frame, 78 | boxes=np.array([[10, 20, 100, 100]], np.float32), 79 | poses=[right_hand_21_np], 80 | camera=poseviz.Camera.from_fov(55, frame.shape[:2])) 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /sd_lib/inversion_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/9/4 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import torch 6 | from cv2box import CVImage 7 | 8 | from diffusers import StableDiffusionPipeline, AutoencoderKL, DDPMScheduler, DDIMScheduler 9 | from diffusers.image_processor import VaeImageProcessor 10 | 11 | from sd_lib.prompt2prompt import ddim_inversion, null_optimization, EmptyControl 12 | 13 | SD_PRETRAIN = './sd_models/stable-diffusion-v1-5' 14 | 15 | 16 | class DDIMInversion: 17 | def __init__(self, device='cuda', num_inv_steps=50): 18 | self.device = device 19 | self.num_inv_steps = num_inv_steps 20 | noise_scheduler = DDIMScheduler.from_pretrained(SD_PRETRAIN, subfolder='scheduler') 21 | noise_scheduler.set_timesteps(self.num_inv_steps) 22 | # noise_scheduler = DDIMScheduler( 23 | # num_train_timesteps=1000, 24 | # beta_start=0.00085, 25 | # beta_end=0.012, 26 | # beta_schedule="linear", 27 | # clip_sample=False, 28 | # set_alpha_to_one=False, 29 | # steps_offset=1, 30 | # ) 31 | self.sd_pipe = StableDiffusionPipeline.from_pretrained( 32 | SD_PRETRAIN, 33 | torch_dtype=torch.float16, 34 | scheduler=noise_scheduler, 35 | # unet=unet, 36 | feature_extractor=None, 37 | safety_checker=None 38 | ).to(self.device) 39 | 40 | # self.sd_pipe.enable_model_cpu_offload() 41 | 42 | self.vae_scale_factor = 2 ** (len(self.sd_pipe.vae.config.block_out_channels) - 1) 43 | self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) 44 | 45 | def forward(self, image_in, prompt, null_optim=True): 46 | image_in_pil = CVImage(image_in).pillow() 47 | image_in_pt = self.image_processor.preprocess(image_in_pil) 48 | 49 | latents = self.sd_pipe.vae.encode( 50 | image_in_pt.to(self.device, dtype=self.sd_pipe.vae.dtype)).latent_dist.sample() 51 | latents = latents * 0.18215 52 | 53 | ddim_inv_latents = ddim_inversion( 54 | self.sd_pipe, self.sd_pipe.scheduler, latents=latents, 55 | num_inv_steps=self.num_inv_steps, prompt=prompt) 56 | 57 | if null_optim: 58 | num_inner_steps = 10 59 | uncond_embeddings = null_optimization(self.sd_pipe, self.sd_pipe.scheduler, ddim_inv_latents, self.num_inv_steps, num_inner_steps, prompt) 60 | # null_text_rec, _ = ptp_utils.text2image_ldm_stable(StableDiffuser, [prompt], EmptyControl(), latent=x_t, 61 | # uncond_embeddings=uncond_embeddings) 62 | # ptp_utils.view_images(null_text_rec) 63 | return ddim_inv_latents[-1], uncond_embeddings 64 | else: 65 | return ddim_inv_latents[-1], None 66 | 67 | 68 | if __name__ == '__main__': 69 | image_p = 'resources/for_sd/girl_reading_512_crop.png' 70 | blip_prompt = 'a woman reading a book' 71 | ddimi = DDIMInversion(device='cuda', num_inv_steps=25) 72 | latent_out, uncond_embedding = ddimi.forward(image_p, blip_prompt, null_optim=False) 73 | 74 | print(latent_out.shape) 75 | print(uncond_embedding.shape) 76 | 77 | regenerate_image = ddimi.sd_pipe( 78 | height=512, 79 | width=512, 80 | prompt=blip_prompt, 81 | num_inference_steps=25, 82 | guidance_scale=7.5, 83 | generator=EmptyControl(), 84 | negative_prompt_embeds=uncond_embedding, 85 | latents=latent_out, 86 | return_dict=False, 87 | )[0][0] 88 | print(regenerate_image.size) # pillow 89 | CVImage(regenerate_image, 'pillow').show() 90 | -------------------------------------------------------------------------------- /data_lib/dataset_vis/coco_detect_vis.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/9/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | https://github.com/wukaishuns/Coco-datasets-Visualization-and-change-tools/blob/main/viscoco.py 7 | """ 8 | import os 9 | import sys 10 | 11 | # if "/opt/ros/kinetic/lib/python2.7/dist-packages" in sys.path: 12 | # sys.path.remove("/opt/ros/kinetic/lib/python2.7/dist-packages") 13 | import cv2 14 | import numpy as np 15 | from skimage import io 16 | import matplotlib 17 | import matplotlib.pyplot as plt 18 | from matplotlib import patches, lines 19 | from matplotlib.patches import Polygon, Rectangle 20 | from matplotlib.collections import PatchCollection 21 | 22 | from pycocotools.coco import COCO 23 | 24 | matplotlib.use('TkAgg') 25 | annfile = '/annotations/annotations.json' 26 | imgroot = '/images' 27 | 28 | 29 | def showAnns(anns): 30 | if len(anns) == 0: 31 | return 0 32 | ax = plt.gca() 33 | ax.set_autoscale_on(False) 34 | captions = [] 35 | polygons = [] 36 | rectangles = [] 37 | color = [] 38 | for ann in anns: 39 | c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0] 40 | if 'segmentation' in ann: 41 | if type(ann['segmentation']) == list: 42 | # polygon 43 | for seg in ann['segmentation']: 44 | # print(132131,ann['category_id']) 45 | # print(cat_names[0]) 46 | captions.append(cat_names[ann['category_id'] - 1]) 47 | poly = np.array(seg).reshape((int(len(seg) / 2), 2)) 48 | l_corner, w, h = (ann['bbox'][0], ann['bbox'][1]), ann['bbox'][2], ann['bbox'][3] 49 | rectangles.append(Rectangle(l_corner, w, h)) 50 | polygons.append(Polygon(poly)) 51 | color.append(c) 52 | 53 | p = PatchCollection(rectangles, facecolor='none', edgecolors=color, alpha=1, linestyle='--', linewidths=2) 54 | ax.add_collection(p) 55 | 56 | for i in range(len(captions)): 57 | x = rectangles[i].xy[0] 58 | y = rectangles[i].xy[1] 59 | ax.text(x, y, captions[i], size=10, verticalalignment='top', color='w', backgroundcolor="none") 60 | 61 | p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.6) 62 | ax.add_collection(p) 63 | # p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) 64 | p = PatchCollection(polygons, facecolor='none', edgecolors='b', linewidths=0.5) 65 | ax.add_collection(p) 66 | print('Ok!') 67 | 68 | 69 | import random 70 | 71 | coco = COCO(annfile) 72 | cats = coco.loadCats(coco.getCatIds()) 73 | cat_names = [cat['name'] for cat in cats] 74 | print(cat_names) 75 | catids = coco.getCatIds(catNms=random.randint(0, len(cat_names) - 1)) 76 | imgids = coco.getImgIds(catIds=catids) 77 | 78 | 79 | def draw(m, n, i): 80 | img = coco.loadImgs(imgids[np.random.randint(0, len(imgids))])[0] 81 | I = io.imread(os.path.join(imgroot, img['file_name'])) 82 | plt.subplot(m, n, i) 83 | plt.axis('off') 84 | plt.title(img['file_name'], fontsize=8, color='blue') 85 | plt.imshow(I, aspect='equal') 86 | annids = coco.getAnnIds(imgIds=img['id']) 87 | anns = coco.loadAnns(annids) 88 | showAnns(anns) 89 | 90 | 91 | if 1: 92 | m = 4 93 | n = 4 94 | plt.figure(figsize=(m * 6, n * 4)) 95 | plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0, wspace=0) 96 | plt.margins(0, 0) 97 | # fig = plt.figure(figsize=(18*m,12*n)) 98 | for i in range(1, m * n + 1): 99 | draw(m, n, i) 100 | plt.savefig('detect_example.png') 101 | plt.show() 102 | -------------------------------------------------------------------------------- /math_lib/gaussian_filter.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/12 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from scipy import ndimage 10 | import numpy as np 11 | 12 | 13 | def make_gaussian_kernel(sigma, kernel_size): 14 | """ 15 | Args: 16 | sigma: 17 | kernel_size: 18 | Returns: torch tensor 1*kernel_size 19 | """ 20 | ts = torch.linspace(-kernel_size // 2, kernel_size // 2 + 1, kernel_size) 21 | gauss = torch.exp((-(ts / sigma) ** 2 / 2)) 22 | kernel = gauss / gauss.sum() 23 | return kernel 24 | 25 | 26 | def init_model(num_keypoints, kernel): 27 | seq = nn.Sequential( 28 | nn.ReflectionPad2d(kernel // 2), 29 | nn.Conv2d(num_keypoints, num_keypoints, kernel, stride=1, padding=0, bias=None, 30 | groups=num_keypoints)) 31 | return seq 32 | 33 | 34 | class GaussianLayer(nn.Module): 35 | def __init__(self, num_keypoints, kernel): 36 | """ 37 | batch gaussian layer 38 | Args: 39 | num_keypoints: cocowholebody 133 40 | kernel: 41 | """ 42 | super(GaussianLayer, self).__init__() 43 | self.kernel = kernel 44 | self.seq = nn.Sequential( 45 | nn.ReflectionPad2d(kernel // 2), 46 | nn.Conv2d(num_keypoints, num_keypoints, kernel, stride=1, padding=0, bias=None, 47 | groups=num_keypoints)) 48 | self.weights_init() 49 | 50 | def forward(self, x): 51 | """ 52 | Args: 53 | x: N keypoints number B*N*H*W 54 | Returns: 55 | """ 56 | return self.seq(x) 57 | 58 | def weights_init(self): 59 | # check mmpose /mmpose/mmpose/core/evaluation/top_down_eval.py 60 | sigma = 0.3 * ((self.kernel - 1) * 0.5 - 1) + 0.8 61 | n = np.zeros((self.kernel, self.kernel)) 62 | n[self.kernel // 2, self.kernel // 2] = 1 63 | k = ndimage.gaussian_filter(n, sigma=sigma) 64 | for name, f in self.named_parameters(): 65 | f.data.copy_(torch.from_numpy(k)) 66 | 67 | 68 | class GaussianLayerPicklable(nn.Module): 69 | def __init__(self, num_keypoints, kernel): 70 | super().__init__() 71 | self.num_keypoints = num_keypoints 72 | self.kernel = kernel 73 | # self.seq = init_model(self.num_keypoints, self.kernel).cuda() 74 | # self.weights_init() 75 | 76 | def forward(self, x): 77 | return self.seq(x) 78 | 79 | def weights_init(self): 80 | # check mmpose /mmpose/mmpose/core/evaluation/top_down_eval.py 81 | sigma = 0.3 * ((self.kernel - 1) * 0.5 - 1) + 0.8 82 | n = np.zeros((self.kernel, self.kernel)) 83 | n[self.kernel // 2, self.kernel // 2] = 1 84 | k = ndimage.gaussian_filter(n, sigma=sigma) 85 | for name, f in self.named_parameters(): 86 | # f.data.copy_(k) 87 | f.data.copy_(torch.from_numpy(k).cuda()) 88 | 89 | def __getstate__(self): 90 | return { 91 | 'num_keypoints': self.num_keypoints, 92 | 'kernel': self.kernel, 93 | } 94 | 95 | def __setstate__(self, values): 96 | super().__init__() 97 | self.num_keypoints = values['num_keypoints'] 98 | self.kernel = values['kernel'] 99 | self.seq = init_model(self.num_keypoints, self.kernel).cuda() 100 | self.weights_init() 101 | 102 | 103 | if __name__ == '__main__': 104 | sigma = 2.9 105 | kernel = 17 106 | n = np.zeros((kernel, kernel)) 107 | n[kernel // 2, kernel // 2] = 1 108 | k = ndimage.gaussian_filter(n, sigma=sigma) 109 | print(k) 110 | -------------------------------------------------------------------------------- /seg_lib/u2net/u2net_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/1/13 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from cv2box import CVImage, MyFpsCounter 7 | from apstone import ModelBase 8 | import cv2 9 | import numpy as np 10 | 11 | MODEL_ZOO = { 12 | # https://github.com/danielgatis/rembg 13 | # input_name:['input.1'], shape:[[1, 3, 320, 320]] 14 | # output_name:['1959', '1960', '1961', '1962', '1963', '1964', '1965'], shape:[[1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320], [1, 1, 320, 320]] 15 | 'u2net': { 16 | 'model_path': 'pretrain_models/seg_lib/u2net/u2net.onnx' 17 | }, 18 | 'u2net_human_seg': { 19 | 'model_path': 'pretrain_models/seg_lib/u2net/u2net_human_seg.onnx' 20 | }, 21 | # same as u2net, smaller 22 | 'u2netp': { 23 | 'model_path': 'pretrain_models/seg_lib/u2net/u2netp.onnx' 24 | }, 25 | # quantization from onnx-runtime 26 | # https://github.com/xuebinqin/U-2-Net/issues/295#issuecomment-1083041216 27 | 'silueta': { 28 | 'model_path': 'pretrain_models/seg_lib/u2net/silueta.onnx' 29 | }, 30 | # from https://www.modelscope.cn/models/damo/cv_u2net_salient-detection/summary 31 | 'u2net-salient-detection_damo': { 32 | 'model_path': 'pretrain_models/seg_lib/u2net/u2net-salient-detection_damo.onnx' 33 | }, 34 | } 35 | 36 | 37 | class U2netSeg(ModelBase): 38 | def __init__(self, model_type='u2net', provider='gpu'): 39 | super().__init__(MODEL_ZOO[model_type], provider) 40 | self.model_type = model_type 41 | 42 | self.input_mean = (0.485, 0.456, 0.406) 43 | self.input_std = (0.229, 0.224, 0.225) 44 | self.input_size = (320, 320) 45 | 46 | def forward(self, image_in, post_process=False): 47 | """ 48 | Args: 49 | image_in: CVImage access type 50 | post_process: Post Process the mask for a smooth boundary by applying Morphological Operations 51 | Research based on paper: https://www.sciencedirect.com/science/article/pii/S2352914821000757 52 | Returns: mask 0-1 53 | """ 54 | image_in_size = CVImage(image_in).bgr.shape 55 | image_in_pre = CVImage(image_in).blob_innormal(self.input_size, self.input_mean, self.input_std, rgb=True, 56 | interpolation=cv2.INTER_LANCZOS4) 57 | pred = self.model.forward(image_in_pre)[0][:, 0, :, :].transpose(1, 2, 0) 58 | ma = np.max(pred) 59 | mi = np.min(pred) 60 | pred = (pred - mi) / (ma - mi) 61 | pred = (pred*255).astype(np.uint8) 62 | if post_process: 63 | # 开操作 平滑mask边缘 64 | pred = cv2.morphologyEx(pred, cv2.MORPH_OPEN, 65 | cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))) 66 | pred = cv2.GaussianBlur(pred, (5, 5), sigmaX=2, sigmaY=2, borderType=cv2.BORDER_DEFAULT) 67 | pred = np.where(pred < 0.5, 0, 1)[..., np.newaxis].astype(np.float32) 68 | pred = CVImage(pred).resize(image_in_size[:-1][::-1], interpolation=cv2.INTER_LANCZOS4).bgr 69 | 70 | # First create the image with alpha channel 71 | rgba = cv2.cvtColor(CVImage(image_in).bgr, cv2.COLOR_RGB2RGBA) 72 | # Then assign the mask to the last channel of the image 73 | rgba[:, :, 3] = pred 74 | # CVImage(rgba).show() 75 | 76 | # rgb = cv2.bitwise_and(rgba, rgba, mask=mask) 77 | 78 | return pred, rgba 79 | 80 | 81 | if __name__ == '__main__': 82 | fb_cur = U2netSeg(model_type='u2net-salient-detection_damo', provider='gpu') 83 | mask, rgba = fb_cur.forward('resources/test1.jpg', post_process=False) 84 | CVImage(mask).show() 85 | CVImage(rgba).save('output.png') 86 | -------------------------------------------------------------------------------- /sd_lib/clip_encoder.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/8/19 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import numpy as np 6 | import torch 7 | from PIL import Image 8 | 9 | from transformers import CLIPTextModel, CLIPTokenizer 10 | from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor 11 | 12 | """ 13 | CLIP from openai 14 | clip image encoder from: 15 | https://github.com/tencent-ailab/IP-Adapter/blob/00cbac222600928f68103c16ed9931074fca9edd/ip_adapter/ip_adapter.py#L45 16 | """ 17 | 18 | CLIP_TEXT_PRETRAIN = './sd_models/stable-diffusion-v1-5' 19 | CLIP_IMAGE_PRETRAIN = './sd_models/clip_image_encoder' 20 | IMAGE_PROJ_PRETRAIN = './sd_models/ip_adapter_image_proj/ip-adapter_sd15.bin' 21 | 22 | 23 | class ClipText: 24 | def __init__(self): 25 | self.tokenizer = CLIPTokenizer.from_pretrained(CLIP_TEXT_PRETRAIN, subfolder="tokenizer") 26 | self.text_encoder = CLIPTextModel.from_pretrained(CLIP_TEXT_PRETRAIN, subfolder="text_encoder").cuda() 27 | self.text_encoder.requires_grad_(False) 28 | 29 | def forward(self, prompt: list[str]): 30 | # (b,77) 31 | prompt_ids = self.tokenizer( 32 | prompt, max_length=self.tokenizer.model_max_length, padding="max_length", truncation=True, 33 | return_tensors="pt" 34 | ).input_ids 35 | # (b,77,768) 36 | encoder_hidden_states = self.text_encoder(prompt_ids[0][np.newaxis, :].cuda())[0] 37 | return encoder_hidden_states 38 | 39 | 40 | class ClipImage: 41 | def __init__(self): 42 | self.device = 'cuda' 43 | self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(CLIP_IMAGE_PRETRAIN).to(self.device, 44 | dtype=torch.float16) 45 | self.clip_image_processor = CLIPImageProcessor() 46 | 47 | def forward(self, pil_image): 48 | """ 49 | Args: 50 | pil_image: RGB 51 | Returns: torch.Size([1, 1024]) 52 | """ 53 | if isinstance(pil_image, Image.Image): 54 | pil_image = [pil_image] 55 | clip_image = self.clip_image_processor(images=pil_image, return_tensors="pt").pixel_values 56 | clip_image_embeds = self.image_encoder(clip_image.to(self.device, dtype=torch.float16)).image_embeds 57 | return clip_image_embeds 58 | 59 | 60 | class ImageProj: 61 | def __init__(self, num_tokens=4): 62 | from sd_lib.models import ImageProjModel 63 | self.device = "cuda" 64 | self.image_proj_model = ImageProjModel( 65 | cross_attention_dim=768, # self.pipe.unet.config.cross_attention_dim 66 | clip_embeddings_dim=1024, # self.image_encoder.config.projection_dim 67 | clip_extra_context_tokens=num_tokens, 68 | ).to(self.device, dtype=torch.float16) 69 | state_dict = torch.load(IMAGE_PROJ_PRETRAIN, map_location="cpu") 70 | self.image_proj_model.load_state_dict(state_dict["image_proj"]) 71 | 72 | def forward(self, clip_image_embeds): 73 | """ 74 | Args: 75 | clip_image_embeds: torch.Size([1, 1024]) 76 | Returns: torch.Size([1, 4, 768]) 77 | """ 78 | image_prompt_embeds = self.image_proj_model(clip_image_embeds) 79 | uncond_image_prompt_embeds = self.image_proj_model(torch.zeros_like(clip_image_embeds)) 80 | return image_prompt_embeds, uncond_image_prompt_embeds 81 | 82 | 83 | if __name__ == '__main__': 84 | image_p = 'resources/for_sd/girl_reading_512_crop.png' 85 | clip_image = ClipImage() 86 | image_embedding = clip_image.forward(Image.open(image_p)) 87 | print(image_embedding.shape) 88 | 89 | ip = ImageProj() 90 | image_proj_embedding, _ = ip.forward(image_embedding) 91 | print(image_proj_embedding.shape) 92 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/body_detector_lightweight/body_detector_lightweight_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/10 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch 7 | """ 8 | import cv2 9 | import numpy as np 10 | from cv2box import CVImage 11 | import math 12 | from apstone import ONNXModel 13 | 14 | model_path = 'pretrain_models/digital_human/body_detector_lightweight/body_detector_dynamic.onnx' 15 | 16 | 17 | class BodyDetectorLightweight: 18 | def __init__(self, input_height_size=256, pad_value=(0, 0, 0), stride=8, upsample_ratio=4): 19 | self.input_height_size = input_height_size 20 | self.pad_value = pad_value 21 | self.stride = stride 22 | self.upsample_ratio = upsample_ratio 23 | self.input_std = 256 24 | self.input_mean = 128 25 | 26 | self.model = ONNXModel(model_path) 27 | 28 | @staticmethod 29 | def pad_width(img, stride, pad_value, min_dims): 30 | h, w, _ = img.shape 31 | h = min(min_dims[0], h) 32 | min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride 33 | min_dims[1] = max(min_dims[1], w) 34 | min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride 35 | pad = [] 36 | pad.append(int(math.floor((min_dims[0] - h) / 2.0))) 37 | pad.append(int(math.floor((min_dims[1] - w) / 2.0))) 38 | pad.append(int(min_dims[0] - h - pad[0])) 39 | pad.append(int(min_dims[1] - w - pad[1])) 40 | padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3], 41 | cv2.BORDER_CONSTANT, value=pad_value) 42 | return padded_img, pad 43 | 44 | # def post_process(self): 45 | # total_keypoints_num = 0 46 | # all_keypoints_by_type = [] 47 | # num_keypoints = 18 48 | # for kpt_idx in range(num_keypoints): # 19th for bg 49 | # total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, 50 | # total_keypoints_num) 51 | # 52 | # pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) 53 | # for kpt_id in range(all_keypoints.shape[0]): 54 | # all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale 55 | # all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale 56 | 57 | def forward(self, img): 58 | height, width, _ = img.shape 59 | scale = self.input_height_size / height 60 | scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 61 | 62 | min_dims = [self.input_height_size, max(scaled_img.shape[1], self.input_height_size)] 63 | padded_img, pad = self.pad_width(scaled_img, self.stride, self.pad_value, min_dims) 64 | 65 | stages_output = self.model.forward( 66 | CVImage(padded_img).set_blob(self.input_std, self.input_mean, input_size=None).blob_rgb) 67 | 68 | stage2_heatmaps = stages_output[-2] 69 | heatmaps = np.transpose(stage2_heatmaps[0], (1, 2, 0)) 70 | heatmaps = cv2.resize(heatmaps, (0, 0), fx=self.upsample_ratio, fy=self.upsample_ratio, 71 | interpolation=cv2.INTER_CUBIC) 72 | 73 | stage2_pafs = stages_output[-1] 74 | pafs = np.transpose(stage2_pafs.squeeze(), (1, 2, 0)) 75 | pafs = cv2.resize(pafs, (0, 0), fx=self.upsample_ratio, fy=self.upsample_ratio, interpolation=cv2.INTER_CUBIC) 76 | 77 | return heatmaps, pafs, scale, pad 78 | 79 | if __name__ == '__main__': 80 | img_p = 'test_img/t_pose.jpeg' 81 | bdl = BodyDetectorLightweight() 82 | results = bdl.forward(CVImage(img_p).bgr) 83 | print(results) 84 | -------------------------------------------------------------------------------- /data_lib/dataset_preprocess/gen_dataset_txt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import sys 4 | import random 5 | from pathlib import Path 6 | from utils import get_path_by_ext 7 | from cv2box import CVFile 8 | 9 | 10 | def gen_txt_from_path(base_path, img_format='jpg', train_ratio=0.8): 11 | train_data_path = os.path.join(base_path, 'dataset') 12 | 13 | labels = os.listdir(train_data_path) 14 | 15 | for index, label in enumerate(labels): 16 | print('label: {}\t index: {}'.format(label, index)) 17 | # img_list = glob.glob(os.path.join(train_data_path, label, '*.{}'.format(img_format))) 18 | img_list = list(Path(os.path.join(train_data_path, label)).glob('*/*.{}'.format(img_format))) 19 | random.shuffle(img_list) 20 | print(len(img_list)) 21 | train_list = img_list[:int(train_ratio * len(img_list))] 22 | val_list = img_list[(int(train_ratio * len(img_list)) + 1):] 23 | with open(os.path.join(base_path, 'train.txt'), 'a') as f: 24 | for img in train_list: 25 | img = str(img).replace(base_path, '') 26 | # print(img) 27 | f.write(img + ' ' + str(index)) 28 | f.write('\n') 29 | 30 | with open(os.path.join(base_path, 'val.txt'), 'a') as f: 31 | for img in val_list: 32 | img = str(img).replace(base_path, '') 33 | # print(img + ' ' + str(index)) 34 | f.write(img + ' ' + str(index)) 35 | f.write('\n') 36 | 37 | # imglist = glob.glob(os.path.join(valdata_path, '*.jpg')) 38 | # with open(txtpath + 'test.txt', 'a') as f: 39 | # for img in imglist: 40 | # f.write(img) 41 | # f.write('\n') 42 | 43 | 44 | """ 45 | 'female','male', 46 | 'front', 'side', 47 | 'clean','occlusion', 48 | 'super_hq', 'hq', 'blur', 49 | 'nonhuman' 50 | """ 51 | 52 | 53 | def gen_txt_from_json(base_path, train_ratio=0.8): 54 | # multi label , labelme 55 | train_list = {} 56 | test_list = {} 57 | 58 | for img_path in get_path_by_ext(base_path): 59 | label = '' 60 | img_path_str = str(img_path)[70:] 61 | json_path = str(img_path.parent / (str(img_path.stem) + '.json')) 62 | # print(json_path) 63 | json_data = CVFile(json_path).data 64 | try: 65 | label += '01' if json_data['flags']['男'] else '10' 66 | label += '01' if json_data['flags']['侧脸'] else '10' 67 | label += '01' if json_data['flags']['遮挡'] else '10' 68 | if json_data['flags']['非常清晰']: 69 | label += '100' 70 | elif json_data['flags']['清晰']: 71 | label += '010' 72 | else: 73 | label += '001' 74 | label += '1' if json_data['flags']['非人脸'] else '0' 75 | except TypeError: 76 | print(json_path, json_data) 77 | continue 78 | 79 | if random.random() > train_ratio: 80 | test_list[img_path_str] = label 81 | else: 82 | train_list[img_path_str] = label 83 | 84 | with open(os.path.join(base_path, 'train.txt'), 'a') as f: 85 | for k, v in train_list.items(): 86 | f.write(k + ' ' + v) 87 | f.write('\n') 88 | 89 | with open(os.path.join(base_path, 'val.txt'), 'a') as f: 90 | for k, v in test_list.items(): 91 | f.write(k + ' ' + v) 92 | f.write('\n') 93 | 94 | 95 | if __name__ == '__main__': 96 | gen_txt_from_json('') 97 | 98 | # from tqdm import tqdm 99 | # for img_path in tqdm(get_path_by_ext('')): 100 | # json_p = str(img_path).replace('.jpg', '.json') 101 | # if not Path(json_p).exists(): 102 | # os.remove(str(img_path)) 103 | # # raise '11' 104 | # # ef93c75f17c544ccbb436f5cfeb6e656.json -------------------------------------------------------------------------------- /hand_lib/hand_mesh/minimal_hands/kinematics.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/2/11 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import numpy as np 6 | 7 | 8 | class MANOHandJoints: 9 | n_joints = 21 10 | 11 | labels = [ 12 | 'W', # 0 13 | 'I0', 'I1', 'I2', # 3 14 | 'M0', 'M1', 'M2', # 6 15 | 'L0', 'L1', 'L2', # 9 16 | 'R0', 'R1', 'R2', # 12 17 | 'T0', 'T1', 'T2', # 15 18 | 'I3', 'M3', 'L3', 'R3', 'T3' # 20, tips are manually added (not in MANO) 19 | ] 20 | 21 | # finger tips are not joints in MANO, we label them on the mesh manually 22 | mesh_mapping = {16: 333, 17: 444, 18: 672, 19: 555, 20: 744} 23 | 24 | parents = [ 25 | None, 26 | 0, 1, 2, 27 | 0, 4, 5, 28 | 0, 7, 8, 29 | 0, 10, 11, 30 | 0, 13, 14, 31 | 3, 6, 9, 12, 15 32 | ] 33 | 34 | 35 | class MPIIHandJoints: 36 | n_joints = 21 37 | 38 | labels = [ 39 | 'W', # 0 40 | 'T0', 'T1', 'T2', 'T3', # 4 41 | 'I0', 'I1', 'I2', 'I3', # 8 42 | 'M0', 'M1', 'M2', 'M3', # 12 43 | 'R0', 'R1', 'R2', 'R3', # 16 44 | 'L0', 'L1', 'L2', 'L3', # 20 45 | ] 46 | 47 | parents = [ 48 | None, 49 | 0, 1, 2, 3, 50 | 0, 5, 6, 7, 51 | 0, 9, 10, 11, 52 | 0, 13, 14, 15, 53 | 0, 17, 18, 19 54 | ] 55 | 56 | 57 | def mpii_to_mano(mpii): 58 | """ 59 | Map data from MPIIHandJoints order to MANOHandJoints order. 60 | Parameters 61 | ---------- 62 | mpii : np.ndarray, [21, ...] 63 | Data in MPIIHandJoints order. Note that the joints are along axis 0. 64 | Returns 65 | ------- 66 | np.ndarray 67 | Data in MANOHandJoints order. 68 | """ 69 | mano = [] 70 | for j in range(MANOHandJoints.n_joints): 71 | mano.append( 72 | mpii[MPIIHandJoints.labels.index(MANOHandJoints.labels[j])] 73 | ) 74 | mano = np.stack(mano, 0) 75 | return mano 76 | 77 | 78 | def mano_to_mpii(mano): 79 | """ 80 | Map data from MANOHandJoints order to MPIIHandJoints order. 81 | Parameters 82 | ---------- 83 | mano : np.ndarray, [21, ...] 84 | Data in MANOHandJoints order. Note that the joints are along axis 0. 85 | Returns 86 | ------- 87 | np.ndarray 88 | Data in MPIIHandJoints order. 89 | """ 90 | mpii = [] 91 | for j in range(MPIIHandJoints.n_joints): 92 | mpii.append( 93 | mano[MANOHandJoints.labels.index(MPIIHandJoints.labels[j])] 94 | ) 95 | mpii = np.stack(mpii, 0) 96 | return mpii 97 | 98 | 99 | def xyz_to_delta(xyz, joints_def): 100 | """ 101 | Compute bone orientations from joint coordinates (child joint - parent joint). 102 | The returned vectors are normalized. 103 | For the root joint, it will be a zero vector. 104 | Parameters 105 | ---------- 106 | xyz : np.ndarray, shape [J, 3] 107 | Joint coordinates. 108 | joints_def : object 109 | An object that defines the kinematic skeleton, e.g. MPIIHandJoints. 110 | Returns 111 | ------- 112 | np.ndarray, shape [J, 3] 113 | The **unit** vectors from each child joint to its parent joint. 114 | For the root joint, it's are zero vector. 115 | np.ndarray, shape [J, 1] 116 | The length of each bone (from child joint to parent joint). 117 | For the root joint, it's zero. 118 | """ 119 | delta = [] 120 | for j in range(joints_def.n_joints): 121 | p = joints_def.parents[j] 122 | if p is None: 123 | delta.append(np.zeros(3)) 124 | else: 125 | delta.append(xyz[j] - xyz[p]) 126 | delta = np.stack(delta, 0) 127 | lengths = np.linalg.norm(delta, axis=-1, keepdims=True) 128 | delta /= np.maximum(lengths, np.finfo(xyz.dtype).eps) 129 | return delta, lengths 130 | -------------------------------------------------------------------------------- /mocap_lib/smooth_filter/one_euro_api.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Adapted from https://github.com/HoBeom/OneEuroFilter-Numpy 3 | # Original licence: Copyright (c) HoBeom Jeon, under the MIT License. 4 | # ------------------------------------------------------------------------------ 5 | import math 6 | 7 | import numpy as np 8 | 9 | 10 | def smoothing_factor(t_e, cutoff): 11 | r = 2 * math.pi * cutoff * t_e 12 | return r / (r + 1) 13 | 14 | 15 | def exponential_smoothing(a, x, x_prev): 16 | return a * x + (1 - a) * x_prev 17 | 18 | 19 | class OneEuro: 20 | 21 | def __init__(self, t0, x0, dx0, min_cutoff, beta, d_cutoff=1.0): 22 | super(OneEuro, self).__init__() 23 | """Initialize the one euro filter.""" 24 | # The parameters. 25 | self.min_cutoff = float(min_cutoff) 26 | self.beta = float(beta) 27 | self.d_cutoff = float(d_cutoff) 28 | # Previous values. 29 | self.x_prev = x0 30 | self.dx_prev = dx0 31 | self.t_prev = t0 32 | 33 | def __call__(self, x, t=None): 34 | """Compute the filtered signal.""" 35 | 36 | if t is None: 37 | # Assume input is feed frame by frame if not specified 38 | t = self.t_prev + 1 39 | 40 | t_e = t - self.t_prev 41 | 42 | # The filtered derivative of the signal. 43 | a_d = smoothing_factor(t_e, self.d_cutoff) # [k, c] 44 | dx = (x - self.x_prev) / t_e 45 | dx_hat = exponential_smoothing(a_d, dx, self.dx_prev) 46 | 47 | # The filtered signal. 48 | cutoff = self.min_cutoff + self.beta * np.abs(dx_hat) 49 | a = smoothing_factor(t_e, cutoff) 50 | x_hat = exponential_smoothing(a, x, self.x_prev) 51 | # Memorize the previous values. 52 | self.x_prev = x_hat 53 | self.dx_prev = dx_hat 54 | self.t_prev = t 55 | return x_hat 56 | 57 | 58 | class OneEuroFilter: 59 | """Oneeuro filter, source code: https://github.com/mkocabas/VIBE/blob/c0 60 | c3f77d587351c806e901221a9dc05d1ffade4b/lib/utils/smooth_pose.py. 61 | 62 | Args: 63 | min_cutoff (float, optional): Decreasing the minimum cutoff frequency 64 | decreases slow speed jitter 65 | beta (float, optional): Increasing the speed coefficient(beta) 66 | decreases speed lag. 67 | """ 68 | 69 | # # Not shareable because the filter holds status of a specific target 70 | # _shareable: bool = False 71 | 72 | def __init__(self, min_cutoff=0.004, beta=0.7): 73 | # OneEuroFilter has Markov Property and maintains status variables 74 | # within the class, thus has a windows_size of 1 75 | # super().__init__(window_size=1) 76 | self.min_cutoff = min_cutoff 77 | self.beta = beta 78 | self._one_euro = None 79 | 80 | def forward(self, x: np.ndarray): 81 | assert x.ndim == 3, ('Input should be an array with shape [T, K, C]' 82 | f', but got invalid shape {x.shape}') 83 | 84 | pred_pose_hat = x.copy() 85 | 86 | if self._one_euro is None: 87 | # The filter is invoked for the first time 88 | # Initialize the filter 89 | self._one_euro = OneEuro( 90 | np.zeros_like(x[0]), 91 | x[0], 92 | dx0=0.0, 93 | min_cutoff=self.min_cutoff, 94 | beta=self.beta, 95 | ) 96 | t0 = 1 97 | else: 98 | # The filter has been invoked 99 | t0 = 0 100 | 101 | for t, pose in enumerate(x): 102 | if t < t0: 103 | # If the filter is invoked for the first time 104 | # set pred_pose_hat[0] = x[0] 105 | continue 106 | pose = self._one_euro(pose) 107 | pred_pose_hat[t] = pose 108 | 109 | return pred_pose_hat 110 | -------------------------------------------------------------------------------- /mocap_lib/body_wholebody/wholebody_kp_detector_mmpose.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/17 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from cv2box import CVImage, MyFpsCounter 7 | from apstone.wrappers.mmlab_wrapper import KpDetectorBase 8 | 9 | MODEL_ZOO = { 10 | # API 62fps trt16 154fps 11 | 'r50': { 12 | 'model_path': 'pretrain_models/mocap_lib/coco_whole_body/res50_coco_wholebody_256x192-9e37ed88_20201004_remove_initializer.onnx', 13 | 'model_input_size': (192, 256) 14 | }, # w h 15 | # 195fps 16 | 'r50_trt': { 17 | 'model_path': 'pretrain_models/mocap_lib/coco_whole_body/res50_coco_wholebody_256x192-9e37ed88_20201004.engine', 18 | 'model_input_size': (192, 256) 19 | }, 20 | # API 34fps 21 | 'vipnas_mbv3_dark': { 22 | 'model_path': 'pretrain_models/mocap_lib/coco_whole_body/vipnas_mbv3_coco_wholebody_256x192_dark-e2158108_20211205_remove_initializer.onnx', 23 | 'model_input_size': (192, 256) 24 | }, 25 | # API 38fps 26 | 'vipnas_r50_dark': { 27 | 'model_path': 'pretrain_models/mocap_lib/coco_whole_body/vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112_remove_initializer.onnx', 28 | 'model_input_size': (192, 256) 29 | }, 30 | # trt16 50fps 31 | 'hrnet_w48_384_dark': { 32 | 'model_path': 'pretrain_models/mocap_lib/coco_whole_body/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918_remove_initializer.onnx', 33 | 'model_input_size': (288, 384), 34 | 'kernel': 17}, 35 | 'hrnet_w48_384_dark_dynamic': { 36 | 'model_path': 'pretrain_models/mocap_lib/coco_whole_body/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918_dynamic.onnx', 37 | 'model_input_size': (288, 384), 38 | 'input_dynamic_shape': (4, 3, 288, 384), 39 | 'kernel': 17}, 40 | # 48fps 41 | 'hrnet_w48_384_dark_trt': { 42 | 'model_path': 'pretrain_models/mocap_lib/coco_whole_body/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.engine', 43 | 'model_input_size': (288, 384), 44 | 'kernel': 17}, 45 | } 46 | 47 | # 用于镜像翻转的pair对 48 | flip_pairs = [ 49 | [1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 20], [18, 21], [19, 22] 50 | ] 51 | for i in range(91, 112): 52 | flip_pairs.append([i, i + 21]) 53 | 54 | 55 | class BodyWholebodyDetector(KpDetectorBase): 56 | def __init__(self, model_type='r50', provider='gpu'): 57 | super().__init__(MODEL_ZOO[model_type], provider) 58 | self.dark_flag = model_type.find('dark') > 0 59 | 60 | def forward(self, image_in_, bbox_, show=False, mirror_test=False): 61 | if len(bbox_) == 0: 62 | return [[0, 0, 0]] * 133 63 | 64 | outputs = self.model.forward(self.preprocess(image_in_, bbox_)) 65 | 66 | if mirror_test: 67 | outputs_mirror = self.model.forward(self.preprocess(image_in_, bbox_, mirror=mirror_test)) 68 | kp_results = self.postprocess_mirror(outputs, outputs_mirror, flip_pairs) 69 | else: 70 | kp_results = self.postprocess(outputs) 71 | 72 | if show: 73 | self.show(image_in_, kp_results) 74 | 75 | return kp_results 76 | 77 | 78 | if __name__ == '__main__': 79 | image_path = 'resources/for_pose/t_pose_1080p.jpeg' 80 | image_in = CVImage(image_path).bgr 81 | bbox = [493, 75, 1427, 1044] 82 | 83 | bwd = BodyWholebodyDetector(model_type='hrnet_w48_384_dark_dynamic', provider='trt') 84 | kps = bwd.forward(image_in, bbox, show=True, mirror_test=False) 85 | # print(kps) 86 | 87 | with MyFpsCounter('model forward 10 times fps: ') as mfc: 88 | for i in range(10): 89 | kps = bwd.forward(image_in, bbox) 90 | 91 | # # for video 92 | # from cv2box import CVVideoLoader 93 | # from tqdm import tqdm 94 | # 95 | # with CVVideoLoader('') as cvvl: 96 | # for _ in tqdm(range(len(cvvl))): 97 | # _, frame = cvvl.get() 98 | # kps = bwd.forward(image_in, bbox, show=True, mirror_test=False) 99 | -------------------------------------------------------------------------------- /art_lib/optical_flow_estimate/raft/utils.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/8/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | # Ref: https://github.com/liruoteng/OpticalFlowToolkit/blob/5cf87b947a0032f58c922bbc22c0afb30b90c418/lib/flowlib.py#L249 6 | 7 | import numpy as np 8 | 9 | UNKNOWN_FLOW_THRESH = 1e7 10 | 11 | 12 | def make_color_wheel(): 13 | """ 14 | Generate color wheel according Middlebury color code 15 | :return: Color wheel 16 | """ 17 | RY = 15 18 | YG = 6 19 | GC = 4 20 | CB = 11 21 | BM = 13 22 | MR = 6 23 | 24 | ncols = RY + YG + GC + CB + BM + MR 25 | 26 | colorwheel = np.zeros([ncols, 3]) 27 | 28 | col = 0 29 | 30 | # RY 31 | colorwheel[0:RY, 0] = 255 32 | colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY)) 33 | col += RY 34 | 35 | # YG 36 | colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG)) 37 | colorwheel[col:col + YG, 1] = 255 38 | col += YG 39 | 40 | # GC 41 | colorwheel[col:col + GC, 1] = 255 42 | colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC)) 43 | col += GC 44 | 45 | # CB 46 | colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB)) 47 | colorwheel[col:col + CB, 2] = 255 48 | col += CB 49 | 50 | # BM 51 | colorwheel[col:col + BM, 2] = 255 52 | colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM)) 53 | col += + BM 54 | 55 | # MR 56 | colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) 57 | colorwheel[col:col + MR, 0] = 255 58 | 59 | return colorwheel 60 | 61 | 62 | colorwheel = make_color_wheel() 63 | 64 | 65 | def compute_color(u, v): 66 | """ 67 | compute optical flow color map 68 | :param u: optical flow horizontal map 69 | :param v: optical flow vertical map 70 | :return: optical flow in color code 71 | """ 72 | [h, w] = u.shape 73 | img = np.zeros([h, w, 3]) 74 | nanIdx = np.isnan(u) | np.isnan(v) 75 | u[nanIdx] = 0 76 | v[nanIdx] = 0 77 | 78 | ncols = np.size(colorwheel, 0) 79 | 80 | rad = np.sqrt(u ** 2 + v ** 2) 81 | 82 | a = np.arctan2(-v, -u) / np.pi 83 | 84 | fk = (a + 1) / 2 * (ncols - 1) + 1 85 | 86 | k0 = np.floor(fk).astype(int) 87 | 88 | k1 = k0 + 1 89 | k1[k1 == ncols + 1] = 1 90 | f = fk - k0 91 | 92 | for i in range(0, np.size(colorwheel, 1)): 93 | tmp = colorwheel[:, i] 94 | col0 = tmp[k0 - 1] / 255 95 | col1 = tmp[k1 - 1] / 255 96 | col = (1 - f) * col0 + f * col1 97 | 98 | idx = rad <= 1 99 | col[idx] = 1 - rad[idx] * (1 - col[idx]) 100 | notidx = np.logical_not(idx) 101 | 102 | col[notidx] *= 0.75 103 | img[:, :, i] = np.uint8(np.floor(255 * col * (1 - nanIdx))) 104 | 105 | return img 106 | 107 | 108 | def flow_to_image(flow): 109 | """ 110 | Convert flow into middlebury color code image 111 | :param flow: optical flow map 112 | :return: optical flow image in middlebury color 113 | """ 114 | u = flow[:, :, 0] 115 | v = flow[:, :, 1] 116 | 117 | # maxu = -999. 118 | # maxv = -999. 119 | # minu = 999. 120 | # minv = 999. 121 | 122 | idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) 123 | u[idxUnknow] = 0 124 | v[idxUnknow] = 0 125 | 126 | # maxu = max(maxu, np.max(u)) 127 | # minu = min(minu, np.min(u)) 128 | # 129 | # maxv = max(maxv, np.max(v)) 130 | # minv = min(minv, np.min(v)) 131 | 132 | rad = np.sqrt(u ** 2 + v ** 2) 133 | maxrad = max(-1, np.max(rad)) 134 | 135 | u = u / (maxrad + np.finfo(float).eps) 136 | v = v / (maxrad + np.finfo(float).eps) 137 | 138 | img = compute_color(u, v) 139 | 140 | idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) 141 | img[idx] = 0 142 | 143 | return np.uint8(img) 144 | -------------------------------------------------------------------------------- /hand_lib/hand_detector/hand_detector_yolox/hand_detector_yolox.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/1/7 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | from cv2box import CVImage, MyFpsCounter, CVVideoLoader, CVVideoMaker, CVFile 6 | from apstone.mmlab_wrapper import BboxDetectorBase 7 | from tqdm import tqdm 8 | 9 | # input 1*3*640*640 output 1*N*5 1*N 10 | MODEL_ZOO = { 11 | # gpu 68fps 12 | 'yolox_s': { 13 | 'model_path': 'private_models/hand_lib/hand_detector_yolox/yolox_s_100DOH_epoch90_mmdeploy_dynamic.onnx', 14 | 'input_dynamic_shape': (1, 3, 640, 640), 15 | 'model_input_size': (640, 640), 16 | 'label': 1, 17 | }, 18 | 'yolox_s_local': { 19 | 'model_path': 'private_models/hand_lib/hand_detector_yolox/yolox_s_2dataset_epoch127_0922_dynamic.onnx', 20 | 'input_dynamic_shape': (1, 3, 640, 640), 21 | 'model_input_size': (640, 640), 22 | 'label': 0, 23 | }, 24 | # 260 fps 25 | 'yolox_s_trt16': { 26 | 'model_path': 'private_models/hand_lib/hand_detector_yolox/yolox_s_100DOH_epoch90_mmdeploy_static.engine', 27 | 'input_dynamic_shape': (1, 3, 640, 640), 28 | 'model_input_size': (640, 640), 29 | 'label': 1, 30 | }, 31 | } 32 | 33 | 34 | class HandDetectorYolox(BboxDetectorBase): 35 | def __init__(self, model='yolox_s', threshold=0.5, provider='gpu'): 36 | super().__init__(MODEL_ZOO[model], provider) 37 | self.threshold = threshold 38 | self.label = MODEL_ZOO[model]['label'] 39 | 40 | def forward(self, image_in_, show=False): 41 | model_results = self.model.forward(self.preprocess(image_in_)) 42 | results_after = self.postprocess(model_results, self.threshold, label=self.label, max_bbox_num=5) 43 | if show: 44 | _ = self.show(image_in_, results_after) 45 | return results_after 46 | 47 | 48 | if __name__ == '__main__': 49 | # image_p = 'resources/for_pose/t_pose_1080p.jpeg' 50 | # img_bgr = CVImage(image_p).bgr 51 | # hd = HandDetectorYolox(model='yolox_s_trt16', threshold=0.5, provider='gpu') # yolox_s_trt16 52 | # hd_result = hd.forward(img_bgr, show=True) 53 | # print(hd_result) 54 | # 55 | # with MyFpsCounter('model forward 10 times fps:') as mfc: 56 | # for i in range(10): 57 | # bboxes = hd.forward(img_bgr) 58 | 59 | # # video detect and show 60 | # hd = HandDetectorYolox(model='yolox_s_local', threshold=0.5) 61 | # with CVVideoLoader('') as cvvl: 62 | # for _ in tqdm(range(len(cvvl))): 63 | # _, img = cvvl.get() 64 | # hd_result = hd.forward(img, show=True) 65 | 66 | # video detect and show to video 67 | hd = HandDetectorYolox(model='yolox_s_local', threshold=0.5) 68 | count = 0 69 | with CVVideoLoader('') as cvvl: 70 | for _ in tqdm(range(len(cvvl))): 71 | _, img = cvvl.get() 72 | hd_result = hd.forward(img, show=False) 73 | out_img = hd.show(img, hd_result) 74 | CVImage(out_img).save(f'./cache/hand_out/{count}.jpg', create_path=True) 75 | count += 1 76 | 77 | # # video 2 pkl 78 | # from cv2box import CVFile 79 | # 80 | # for video_name in ['268', '617', '728', '886']: 81 | # result_list = [] 82 | # video_p = '/{}.mp4'.format( 83 | # video_name) 84 | # cap = cv2.VideoCapture(video_p) 85 | # hd = HandDetectorYolox(0.5) 86 | # while True: 87 | # success, img = cap.read() 88 | # if not success: 89 | # break 90 | # hd_result = hd.forward(img, show=True) 91 | # 92 | # person_results = [] 93 | # for bbox in hd_result[0]: 94 | # person = {'bbox': np.concatenate([bbox, [1]])} 95 | # person_results.append(person) 96 | # result_list.append(person_results) 97 | # 98 | # # result_list.append(hd_result[0]) 99 | # 100 | # CVFile(video_p.replace('.mp4', '_hand_bbox_out.pkl')).pickle_write(result_list) 101 | -------------------------------------------------------------------------------- /sd_lib/ip_adapter/models/resampler.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/8/23 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | # modified from https://github.com/mlfoundations/open_flamingo/blob/main/open_flamingo/src/helpers.py 6 | import math 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | 12 | # FFN 13 | def FeedForward(dim, mult=4): 14 | inner_dim = int(dim * mult) 15 | return nn.Sequential( 16 | nn.LayerNorm(dim), 17 | nn.Linear(dim, inner_dim, bias=False), 18 | nn.GELU(), 19 | nn.Linear(inner_dim, dim, bias=False), 20 | ) 21 | 22 | 23 | def reshape_tensor(x, heads): 24 | bs, length, width = x.shape 25 | # (bs, length, width) --> (bs, length, n_heads, dim_per_head) 26 | x = x.view(bs, length, heads, -1) 27 | # (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head) 28 | x = x.transpose(1, 2) 29 | # (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head) 30 | x = x.reshape(bs, heads, length, -1) 31 | return x 32 | 33 | 34 | class PerceiverAttention(nn.Module): 35 | def __init__(self, *, dim, dim_head=64, heads=8): 36 | super().__init__() 37 | self.scale = dim_head ** -0.5 38 | self.dim_head = dim_head 39 | self.heads = heads 40 | inner_dim = dim_head * heads 41 | 42 | self.norm1 = nn.LayerNorm(dim) 43 | self.norm2 = nn.LayerNorm(dim) 44 | 45 | self.to_q = nn.Linear(dim, inner_dim, bias=False) 46 | self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False) 47 | self.to_out = nn.Linear(inner_dim, dim, bias=False) 48 | 49 | def forward(self, x, latents): 50 | """ 51 | Args: 52 | x (torch.Tensor): image features 53 | shape (b, n1, D) 54 | latent (torch.Tensor): latent features 55 | shape (b, n2, D) 56 | """ 57 | x = self.norm1(x) 58 | latents = self.norm2(latents) 59 | 60 | b, l, _ = latents.shape 61 | 62 | q = self.to_q(latents) 63 | kv_input = torch.cat((x, latents), dim=-2) 64 | k, v = self.to_kv(kv_input).chunk(2, dim=-1) 65 | 66 | q = reshape_tensor(q, self.heads) 67 | k = reshape_tensor(k, self.heads) 68 | v = reshape_tensor(v, self.heads) 69 | 70 | # attention 71 | scale = 1 / math.sqrt(math.sqrt(self.dim_head)) 72 | weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards 73 | weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) 74 | out = weight @ v 75 | 76 | out = out.permute(0, 2, 1, 3).reshape(b, l, -1) 77 | 78 | return self.to_out(out) 79 | 80 | 81 | class Resampler(nn.Module): 82 | def __init__( 83 | self, 84 | dim=1024, 85 | depth=8, 86 | dim_head=64, 87 | heads=16, 88 | num_queries=8, 89 | embedding_dim=768, 90 | output_dim=1024, 91 | ff_mult=4, 92 | ): 93 | super().__init__() 94 | 95 | self.latents = nn.Parameter(torch.randn(1, num_queries, dim) / dim ** 0.5) 96 | 97 | self.proj_in = nn.Linear(embedding_dim, dim) 98 | 99 | self.proj_out = nn.Linear(dim, output_dim) 100 | self.norm_out = nn.LayerNorm(output_dim) 101 | 102 | self.layers = nn.ModuleList([]) 103 | for _ in range(depth): 104 | self.layers.append( 105 | nn.ModuleList( 106 | [ 107 | PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads), 108 | FeedForward(dim=dim, mult=ff_mult), 109 | ] 110 | ) 111 | ) 112 | 113 | def forward(self, x): 114 | 115 | latents = self.latents.repeat(x.size(0), 1, 1) 116 | 117 | x = self.proj_in(x) 118 | 119 | for attn, ff in self.layers: 120 | latents = attn(x, latents) + latents 121 | latents = ff(latents) + latents 122 | 123 | latents = self.proj_out(latents) 124 | return self.norm_out(latents) 125 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/blazepose_mediapipe/body_bbox_detector.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | """ 6 | based on 7 | https://github.com/PINTO0309/PINTO_model_zoo/tree/main/053_BlazePose 8 | https://github.com/positive666/mediapipe_PoseEstimation_pytorch/blob/main/blazebase.py 9 | https://github.com/Azzallon/teste/tree/DPR/pose_estimation_3d/blazepose-fullbody 10 | """ 11 | 12 | import numpy as np 13 | from apstone import ONNXModel 14 | 15 | from cv2box import CVImage, MyFpsCounter 16 | from body_lib.body_kp_detector.blazepose_mediapipe.blaze_utils import denormalize_detections, \ 17 | resize_pad, raw_output_to_detections, weighted_non_max_suppression 18 | 19 | # from body_lib.body_kp_detector.blazepose_mediapipe.utils.blazepose_utils_numpy import raw_output_to_detections, \ 20 | # weighted_non_max_suppression 21 | 22 | # ANCHORS_128 = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/anchors/anchors_896_128.npy' 23 | ANCHORS_224 = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/anchors/anchors_2254_224.npy' 24 | 25 | # input: 1*3*224*224 output: score 1*2254*1 box 1*2254*12 26 | LITE_BLAZEPOSE_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/densify_full_body_detector.onnx' 27 | 28 | 29 | class BodyDetector: 30 | def __init__(self, provider='gpu'): 31 | super().__init__() 32 | self.anchors = np.load(ANCHORS_224) 33 | self.model = ONNXModel(LITE_BLAZEPOSE_MODEL, provider=provider) 34 | 35 | # self.input_std = 127.5 36 | # self.input_mean = 127.5 37 | # self.input_size = (224, 224) 38 | # 39 | # self.x_scale = self.y_scale = 224 40 | # self.w_scale = self.h_scale = 224 41 | # self.num_keypoints = 4 42 | # self.score_clipping_thresh = 100.0 43 | self.min_score_thresh = 0.5 44 | # self.min_suppression_threshold = 0.3 45 | # self.num_coords = 12 46 | 47 | # # These settings are for converting detections to ROIs which can then 48 | # # be extracted and feed into the landmark network 49 | # # use mediapipe/calculators/util/alignment_points_to_rects_calculator.cc 50 | # self.detection2roi_method = 'alignment' 51 | # self.kp1 = 2 52 | # self.kp2 = 3 53 | # self.theta0 = 90 * np.pi / 180 54 | # self.dscale = 1.5 55 | # self.dy = 0. 56 | 57 | def forward(self, img_in_, show=False): 58 | img_crop, scale, pad = resize_pad(CVImage(img_in_).bgr) 59 | image_blob = img_crop.astype(np.float32) / 255 60 | 61 | out = self.model.forward(image_blob.transpose((2, 1, 0))[np.newaxis, :]) 62 | 63 | detections = raw_output_to_detections(out[1], out[0], self.anchors, self.min_score_thresh) 64 | 65 | filtered_detections = [] 66 | for i in range(len(detections)): 67 | # faces = self._weighted_non_max_suppression(detections[i]) 68 | faces = weighted_non_max_suppression(detections[i]) 69 | faces = np.stack(faces) if len(faces) > 0 else np.zeros((0, 13)) 70 | filtered_detections.append(faces) 71 | 72 | filtered_detections = denormalize_detections(filtered_detections[0], scale, pad) 73 | 74 | if show and len(filtered_detections) > 0: 75 | print(filtered_detections) 76 | # kps 77 | image_show = CVImage(img_in_).draw_landmarks(filtered_detections[0, 4:12].reshape((4, 2))[:, ::-1]) 78 | # box 79 | image_show = CVImage(image_show).draw_landmarks(filtered_detections[0, 0:4].reshape((2, 2)), 80 | color=(0, 255, 255)) 81 | CVImage(image_show).show(0) 82 | return filtered_detections 83 | 84 | 85 | if __name__ == '__main__': 86 | image_path = 'resources/yoga2.webp' 87 | image_in = CVImage(image_path).bgr 88 | pd = BodyDetector(provider='gpu') 89 | filtered_detections = pd.forward(image_in, show=True) 90 | 91 | # with MyFpsCounter('model forward 10 times fps: ') as mfc: 92 | # for i in range(10): 93 | # filtered_detections = pd.forward(image_in) 94 | 95 | # img_in, ratio, pad_w, pad_h = CVImage('resources/t_pose.jpeg').resize_keep_ratio((128, 128)) 96 | # CVImage(img_in).show(0) 97 | -------------------------------------------------------------------------------- /gpt_lib/chatglm6b_finetune/finetune.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/4/25 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from transformers.integrations import TensorBoardCallback 7 | from torch.utils.tensorboard import SummaryWriter 8 | from transformers import TrainingArguments 9 | from transformers import Trainer, HfArgumentParser 10 | from transformers import AutoTokenizer, AutoModel 11 | import torch 12 | import torch.nn as nn 13 | from peft import get_peft_model, LoraConfig, TaskType 14 | from dataclasses import dataclass, field 15 | import datasets 16 | import os 17 | 18 | 19 | tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 20 | 21 | 22 | @dataclass 23 | class FinetuneArguments: 24 | dataset_path: str = field(default="data/alpaca") 25 | model_path: str = field(default="output") 26 | lora_rank: int = field(default=8) 27 | 28 | 29 | class CastOutputToFloat(nn.Sequential): 30 | def forward(self, x): 31 | return super().forward(x).to(torch.float32) 32 | 33 | 34 | def data_collator(features: list) -> dict: 35 | len_ids = [len(feature["input_ids"]) for feature in features] 36 | longest = max(len_ids) 37 | input_ids = [] 38 | labels_list = [] 39 | for ids_l, feature in sorted(zip(len_ids, features), key=lambda x: -x[0]): 40 | ids = feature["input_ids"] 41 | seq_len = feature["seq_len"] 42 | labels = ( 43 | [-100] * (seq_len - 1) + ids[(seq_len - 1) :] + [-100] * (longest - ids_l) 44 | ) 45 | ids = ids + [tokenizer.pad_token_id] * (longest - ids_l) 46 | _ids = torch.LongTensor(ids) 47 | labels_list.append(torch.LongTensor(labels)) 48 | input_ids.append(_ids) 49 | input_ids = torch.stack(input_ids) 50 | labels = torch.stack(labels_list) 51 | return { 52 | "input_ids": input_ids, 53 | "labels": labels, 54 | } 55 | 56 | 57 | # class ModifiedTrainer(Trainer): 58 | # def compute_loss(self, model, inputs, return_outputs=False): 59 | # return model( 60 | # input_ids=inputs["input_ids"], 61 | # labels=inputs["labels"], 62 | # ).loss 63 | # 64 | # def save_model(self, output_dir=None, _internal_call=False): 65 | # from transformers.trainer import TRAINING_ARGS_NAME 66 | # 67 | # os.makedirs(output_dir, exist_ok=True) 68 | # torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME)) 69 | # saved_params = { 70 | # k: v.to("cpu") for k, v in self.model.named_parameters() if v.requires_grad 71 | # } 72 | # torch.save(saved_params, os.path.join(output_dir, "adapter_model.bin")) 73 | 74 | 75 | def main(): 76 | writer = SummaryWriter() 77 | finetune_args, training_args = HfArgumentParser( 78 | (FinetuneArguments, TrainingArguments) 79 | ).parse_args_into_dataclasses() 80 | 81 | # init model 82 | model = AutoModel.from_pretrained( 83 | "THUDM/chatglm-6b", load_in_8bit=True, trust_remote_code=True, device_map="auto" 84 | ) 85 | model.gradient_checkpointing_enable() 86 | model.enable_input_require_grads() 87 | model.is_parallelizable = True 88 | model.model_parallel = True 89 | model.lm_head = CastOutputToFloat(model.lm_head) 90 | model.config.use_cache = ( 91 | False # silence the warnings. Please re-enable for inference! 92 | ) 93 | 94 | # setup peft 95 | peft_config = LoraConfig( 96 | task_type=TaskType.CAUSAL_LM, 97 | inference_mode=False, 98 | r=finetune_args.lora_rank, 99 | lora_alpha=32, 100 | lora_dropout=0.1, 101 | ) 102 | model = get_peft_model(model, peft_config) 103 | 104 | # load dataset 105 | dataset = datasets.load_from_disk(finetune_args.dataset_path) 106 | print(f"\n{len(dataset)=}\n") 107 | 108 | # start train 109 | trainer = Trainer( 110 | model=model, 111 | train_dataset=dataset, 112 | args=training_args, 113 | callbacks=[TensorBoardCallback(writer)], 114 | data_collator=data_collator, 115 | ) 116 | trainer.train() 117 | writer.close() 118 | # save model 119 | model.save_pretrained(training_args.output_dir) 120 | 121 | 122 | if __name__ == "__main__": 123 | main() -------------------------------------------------------------------------------- /seg_lib/u2net/u2net_cloth_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/1/13 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from cv2box import CVImage, MyFpsCounter 7 | from apstone import ModelBase 8 | import cv2 9 | import numpy as np 10 | 11 | MODEL_ZOO = { 12 | # https://github.com/danielgatis/rembg/blob/main/rembg/session_cloth.py 13 | # input_name:['input'], shape:[['batch_size', 3, 768, 768]] 14 | # output_name:['output', 'd1', 'onnx::Concat_1876', 'onnx::Concat_1896', 'onnx::Concat_1916', 'onnx::Concat_1936', 'onnx::Concat_1956'], shape:[['batch_size', 4, 768, 768], ['Convd1_dim_0', 4, 768, 768], ['Resizeonnx::Concat_1876_dim_0', 'Resizeonnx::Concat_1876_dim_1', 'Resizeonnx::Concat_1876_dim_2', 'Resizeonnx::Concat_1876_dim_3'], ['Resizeonnx::Concat_1896_dim_0', 'Resizeonnx::Concat_1896_dim_1', 'Resizeonnx::Concat_1896_dim_2', 'Resizeonnx::Concat_1896_dim_3'], ['Resizeonnx::Concat_1916_dim_0', 'Resizeonnx::Concat_1916_dim_1', 'Resizeonnx::Concat_1916_dim_2', 'Resizeonnx::Concat_1916_dim_3'], ['Resizeonnx::Concat_1936_dim_0', 'Resizeonnx::Concat_1936_dim_1', 'Resizeonnx::Concat_1936_dim_2', 'Resizeonnx::Concat_1936_dim_3'], ['Resizeonnx::Concat_1956_dim_0', 'Resizeonnx::Concat_1956_dim_1', 'Resizeonnx::Concat_1956_dim_2', 'Resizeonnx::Concat_1956_dim_3']] 15 | 'u2net_cloth_seg': { 16 | 'model_path': 'pretrain_models/seg_lib/u2net/u2net_cloth_seg.onnx', 17 | 'input_dynamic_shape': (1, 3, 768, 768), 18 | }, 19 | } 20 | 21 | 22 | class U2netClothSeg(ModelBase): 23 | def __init__(self, model_type='u2net_cloth_seg', provider='gpu'): 24 | super().__init__(MODEL_ZOO[model_type], provider) 25 | self.model_type = model_type 26 | 27 | self.input_mean = (0.485, 0.456, 0.406) 28 | self.input_std = (0.229, 0.224, 0.225) 29 | self.input_size = (768, 768) 30 | 31 | def forward(self, image_in, **kwargs): 32 | """ 33 | Args: 34 | image_in: CVImage access type 35 | post_process: Post Process the mask for a smooth boundary by applying Morphological Operations 36 | Research based on paper: https://www.sciencedirect.com/science/article/pii/S2352914821000757 37 | Returns: mask 0-1 38 | """ 39 | image_in_size = CVImage(image_in).bgr.shape 40 | image_in_pre = CVImage(image_in).blob_innormal(self.input_size, self.input_mean, self.input_std, rgb=True, 41 | interpolation=cv2.INTER_LANCZOS4) 42 | pred_mask = self.model.forward(image_in_pre) 43 | from scipy.special import log_softmax 44 | pred_mask = log_softmax(pred_mask[0], 1) 45 | pred_mask = np.argmax(pred_mask, axis=1, keepdims=True) 46 | pred_mask = np.squeeze(pred_mask, 0) 47 | pred_mask = np.squeeze(pred_mask, 0) 48 | pred_mask = pred_mask.astype(np.uint8) 49 | 50 | pred_mask = CVImage(pred_mask).resize(image_in_size[:-1][::-1], interpolation=cv2.INTER_LANCZOS4).bgr 51 | 52 | # First create the image with alpha channel 53 | rgba = CVImage(image_in).bgr 54 | rgba = cv2.cvtColor(rgba, cv2.COLOR_BGR2RGBA) 55 | # Then assign the mask to the last channel of the image 56 | rgba[:, :, 3] = pred_mask 57 | 58 | upper_body_mask = pred_mask.copy() 59 | upper_body_mask[np.where(upper_body_mask != 1)] = 0 60 | upper_body_mask[np.where(upper_body_mask == 1)] = 255 61 | 62 | lower_body_mask = pred_mask.copy() 63 | lower_body_mask[np.where(lower_body_mask != 2)] = 0 64 | lower_body_mask[np.where(lower_body_mask == 2)] = 255 65 | 66 | full_body_mask = pred_mask.copy() 67 | full_body_mask[np.where(full_body_mask != 3)] = 0 68 | full_body_mask[np.where(full_body_mask == 3)] = 255 69 | 70 | return [upper_body_mask, lower_body_mask, full_body_mask] 71 | 72 | 73 | if __name__ == '__main__': 74 | fb_cur = U2netClothSeg(model_type='u2net_cloth_seg', provider='gpu') 75 | mask = fb_cur.forward('', post_process=False) 76 | CVImage(mask[0]).show() 77 | CVImage(mask[1]).show() 78 | CVImage(mask[2]).show() 79 | 80 | combined_mask = np.maximum(mask[0], mask[1]) 81 | # # reverse 82 | # combined_mask[np.where(combined_mask == 255)] = 233 83 | # combined_mask[np.where(combined_mask == 0)] = 255 84 | # combined_mask[np.where(combined_mask == 233)] = 0 85 | CVImage(combined_mask).save('') 86 | -------------------------------------------------------------------------------- /body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_yolox.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/21 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | import numpy as np 6 | from cv2box import CVImage, MyFpsCounter, CVBbox 7 | 8 | from apstone import ONNXModel 9 | from body_lib.body_kp_detector.blazepose_mediapipe.blaze_utils import postprocess, denormalize_landmarks, detection2roi, \ 10 | extract_roi 11 | from body_lib.body_bbox_detector import BodyBboxDetector 12 | 13 | # input 1*256*256*3 output , 1*1 , , , 14 | LITE_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_lite.onnx' 15 | FULL_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_full.onnx' 16 | HEAVY_MODEL = 'pretrain_models/body_lib/body_kp_detector/blazepose_mediapipe/pose_landmark_heavy.onnx' 17 | 18 | 19 | class LandmarkDetectorYolox: 20 | def __init__(self, model_complexity=0, provider='gpu'): 21 | self.bbd = BodyBboxDetector(model='yolox_tiny_trt16', threshold=0.5) 22 | 23 | model_path_list = [LITE_MODEL, FULL_MODEL, HEAVY_MODEL] 24 | self.model = ONNXModel(model_path_list[model_complexity], provider=provider) 25 | 26 | self.need_bbox_flag = True 27 | self.history = [] 28 | 29 | def forward(self, image_in_, show=False): 30 | """ 31 | 32 | Args: 33 | image_in_: 34 | show: 35 | Returns: 36 | landmarks: 33*4 37 | 38 | """ 39 | bbox_result = self.bbd.forward(image_in_, show=False, max_bbox_num=1)[0] 40 | img, ratio, left, top = CVImage(image_in_).crop_keep_ratio(bbox_result, (256, 256), padding_ratio=1.) 41 | 42 | if show: 43 | CVImage(img).show(0, 'img_crop') 44 | 45 | blob = (img / 256).astype(np.float32)[np.newaxis, :] 46 | normalized_landmarks, f, ee, rr, tt = self.model.forward(blob) 47 | normalized_landmarks = postprocess(normalized_landmarks)[0] 48 | landmarks_ = CVImage(None).recover_from_crop(normalized_landmarks, ratio, left, top, (256, 256)) 49 | 50 | if show: 51 | show_img = CVImage(image_in_).draw_landmarks(landmarks_) 52 | CVImage(show_img).show(0, 'results') 53 | return landmarks_ 54 | 55 | def forward_w_tracking(self, image_in_, show=False): 56 | if self.need_bbox_flag: 57 | bbox_result = self.bbd.forward(image_in_, show=False, max_bbox_num=1)[0] 58 | else: 59 | reserve_points = [0, 7, 8, 11, 12, 23, 24, 25, 26, 27, 28] 60 | bbox_result = CVBbox(None).get_bbox_from_points(self.history[-1][reserve_points], image_in_.shape, 61 | margin_ratio=0.2) 62 | 63 | img, ratio, left, top = CVImage(image_in_).crop_keep_ratio(bbox_result, (256, 256), padding_ratio=1.) 64 | 65 | if show: 66 | CVImage(img).show(0, 'img_crop') 67 | 68 | blob = (img / 256).astype(np.float32)[np.newaxis, :] 69 | normalized_landmarks, f, _, _, _ = self.model.forward(blob) 70 | normalized_landmarks = postprocess(normalized_landmarks)[0] 71 | landmarks_ = CVImage(None).recover_from_crop(normalized_landmarks, ratio, left, top, (256, 256)) 72 | 73 | self.need_bbox_flag = False 74 | self.history.append(landmarks_) 75 | self.history = self.history[-2:] 76 | 77 | if show: 78 | show_img = CVImage(image_in_).draw_landmarks(landmarks_) 79 | CVImage(show_img).show(0, 'results') 80 | return landmarks_ 81 | 82 | 83 | if __name__ == '__main__': 84 | # image_path = 'resources/for_pose/t_pose_1080p.jpeg' 85 | # image_in = CVImage(image_path).bgr 86 | 87 | """ 88 | model 1 82fps trt16 trt 109fps 89 | model 2 67fps trt16 output Nan trt 97fps 90 | """ 91 | ld = LandmarkDetectorYolox(model_complexity=2, provider='trt') 92 | 93 | # landmarks = ld.forward(image_in, show=True) 94 | # print(landmarks) 95 | # 96 | # with MyFpsCounter('model forward 10 times fps: ') as mfc: 97 | # for i in range(10): 98 | # filtered_detections = ld.forward(image_in) 99 | 100 | # video tracking test 101 | from cv2box import CVVideoLoader 102 | from tqdm import tqdm 103 | 104 | with CVVideoLoader('') as cvvl: 105 | for _ in tqdm(range(len(cvvl))): 106 | _, frame = cvvl.get() 107 | landmarks = ld.forward_w_tracking(frame, show=False) 108 | -------------------------------------------------------------------------------- /sd_lib/controlnet/controlnet_api.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2023/10/20 3 | # @Author : ykk648 4 | # @Project : https://github.com/ykk648/AI_power 5 | 6 | from diffusers import ControlNetModel 7 | from diffusers.image_processor import VaeImageProcessor 8 | from cv2box import CVImage 9 | import numpy as np 10 | import torch 11 | 12 | MODEL_ZOO = { 13 | 'control_v11p_sd15_canny': { 14 | 'model_path': 'sd_models/controlnets/control_v11p_sd15_canny/', 15 | 'use_safetensors': False, 16 | }, 17 | 'control_v11p_sd15_normalbae': { 18 | 'model_path': 'sd_models/controlnets/control_v11p_sd15_normalbae/', 19 | 'use_safetensors': False, 20 | }, 21 | 'control_v11f1e_sd15_tile': { 22 | 'model_path': 'sd_models/controlnets/control_v11f1e_sd15_tile/', 23 | 'use_safetensors': False, 24 | }, 25 | 'control_v11e_sd15_ip2p': { 26 | 'model_path': 'sd_models/controlnets/control_v11e_sd15_ip2p/', 27 | 'use_safetensors': False, 28 | }, 29 | 'control_v11p_sd15_inpaint': { 30 | 'model_path': 'sd_models/controlnets/control_v11p_sd15_inpaint/', 31 | 'use_safetensors': False, 32 | }, 33 | 34 | } 35 | 36 | 37 | class ControlNet: 38 | def __init__(self, model_name='control_v11p_sd15_canny', cond_scale=1, vae_scale_factor=8, height=512, width=512, 39 | device='cuda', dtype=torch.float32): 40 | self.cond_scale = cond_scale 41 | self.vae_scale_factor = vae_scale_factor 42 | self.height = height 43 | self.width = width 44 | self.device = device 45 | self.dtype = dtype 46 | self.condition_image = None 47 | 48 | self.model = ControlNetModel.from_pretrained(MODEL_ZOO[model_name]['model_path'], 49 | torch_dtype=dtype, 50 | use_safetensors=MODEL_ZOO[model_name]['use_safetensors'] 51 | ).to(self.device) 52 | self.control_image_processor = VaeImageProcessor( 53 | vae_scale_factor=vae_scale_factor, do_convert_rgb=True, do_normalize=False 54 | ) 55 | 56 | def preprocess(self, condition_image, do_classifier_free_guidance, guess_mode): 57 | if self.condition_image is None: 58 | if isinstance(condition_image, list): 59 | # inpainting 60 | from .utils import make_inpaint_condition 61 | condition_image = make_inpaint_condition(condition_image[0], condition_image[1]) 62 | else: 63 | condition_image = CVImage(condition_image).pillow() 64 | self.condition_image = self.control_image_processor.preprocess(condition_image, height=self.height, 65 | width=self.width).to(self.device, 66 | dtype=self.dtype) 67 | if do_classifier_free_guidance and not guess_mode: 68 | self.condition_image = torch.cat([self.condition_image] * 2) 69 | 70 | def forward(self, latent, t, condition_image, encoder_hidden_states, conditioning_scale=1, do_classifier_free_guidance=True, 71 | guess_mode=False): 72 | """ 73 | Args: 74 | latent: ([2,4,64,64]) 75 | t: int 76 | condition_image: str or list(inpainting img+mask 77 | encoder_hidden_states: ([2, 77, 768]) 78 | conditioning_scale: 79 | do_classifier_free_guidance: 80 | guess_mode: 81 | Returns: 82 | 83 | """ 84 | self.preprocess(condition_image, do_classifier_free_guidance, guess_mode) 85 | 86 | down_block_res_samples, mid_block_res_sample = self.model( 87 | latent.to(self.dtype), 88 | t, 89 | encoder_hidden_states=encoder_hidden_states, 90 | controlnet_cond=self.condition_image, 91 | conditioning_scale=conditioning_scale, 92 | guess_mode=guess_mode, 93 | return_dict=False, 94 | ) 95 | return down_block_res_samples, mid_block_res_sample 96 | 97 | 98 | if __name__ == '__main__': 99 | condition_image_p = 'resources/for_sd/controlnet/astronaut_canny.png' 100 | 101 | cn = ControlNet(model_name='control_v11p_sd15_inpaint', cond_scale=1, vae_scale_factor=8, height=512, width=512, 102 | dtype=torch.float32) 103 | 104 | down_block_res_samples_, mid_block_res_sample_ = cn.forward(torch.rand((2, 4, 64, 64)), 105 | 20, 106 | condition_image_p, 107 | torch.rand((2, 77, 768)) 108 | ) 109 | print(down_block_res_samples_[0].shape) 110 | print(mid_block_res_sample_.shape) 111 | --------------------------------------------------------------------------------