├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── app.py
├── dwpose-l_384x288.py
├── main.py
├── requirements.txt
├── sample_videos
├── input_video.mp4
├── output_video.mp4
├── side_by_side.gif
└── side_by_side.mp4
├── video2openpose2.py
└── yolox_l_8xb8-300e_coco.py
/.gitignore:
--------------------------------------------------------------------------------
1 | detectron2/
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "detectron2"]
2 | path = detectron2
3 | url = https://github.com/facebookresearch/detectron2
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Flode Labs
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Vid2DensePose
2 |
3 |
4 |
5 |
6 | 
7 |
8 | ## Overview
9 |
10 | The Vid2DensePose is a powerful tool designed for applying the DensePose model to videos, generating detailed "Part Index" visualizations for each frame. This tool is exceptionally useful for enhancing animations, particularly when used in conjunction with MagicAnimate for temporally consistent human image animation.
11 |
12 | ## Key Features
13 |
14 |
15 | - **Enhanced Output**: Produces video files showcasing DensePosedata in a vivid, color-coded format.
16 | - **MagicAnimate Integration**: Seamlessly compatible with MagicAnimate to foster advanced human animation projects.
17 |
18 | ## Prerequisites
19 |
20 | To utilize this tool, ensure the installation of:
21 | - Python 3.8 or later
22 | - PyTorch (preferably with CUDA for GPU support)
23 | - Detectron2
24 |
25 | ## Installation Steps
26 |
27 | 1. Clone the repository:
28 | ```bash
29 | git clone https://github.com/Flode-Labs/vid2densepose.git
30 | cd vid2densepose
31 | ```
32 |
33 | 2. Install necessary Python packages:
34 | ```bash
35 | pip install -r requirements.txt
36 | ```
37 |
38 | 3. Clone the Detectron repository:
39 | ```bash
40 | git clone https://github.com/facebookresearch/detectron2.git
41 | ```
42 |
43 | ## Usage Guide
44 |
45 | Run the script:
46 | ```bash
47 | python main.py -i input_video.mp4 -o output_video.mp4
48 | ```
49 |
50 | The script processes the input video and generates an output with the densePose format.
51 |
52 | #### Gradio version
53 | You can also use the Gradio to run the script with an interface. To do so, run the following command:
54 | ```bash
55 | python app.py
56 | ```
57 |
58 | ## Integration with MagicAnimate
59 |
60 | For integration with MagicAnimate:
61 |
62 | 1. Create the densepose video using the steps outlined above.
63 | 2. Use this output as an input to MagicAnimate for generating temporally consistent animations.
64 |
65 |
66 | ## Acknowledgments
67 |
68 | Special thanks to:
69 | - Facebook AI Research (FAIR) for the development of DensePose.
70 | - The contributors of the Detectron2 project.
71 | - [Gonzalo Vidal](https://www.tiktok.com/@_gonzavidal) for the sample videos.
72 | - [Sylvain Filoni](https://twitter.com/fffiloni) for the deployment of the Gradio Space in [Hugging Face](https://huggingface.co/spaces/fffiloni/video2densepose).
73 |
74 | ## Support
75 |
76 | For any inquiries or support, please file an issue in our GitHub repository's issue tracker.
77 |
78 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | from detectron2.config import get_cfg
3 | import torch
4 | import cv2
5 | import numpy as np
6 | from detectron2.engine import DefaultPredictor
7 | from densepose import add_densepose_config
8 | from densepose.vis.extractor import DensePoseResultExtractor
9 | from densepose.vis.densepose_results import DensePoseResultsFineSegmentationVisualizer as Visualizer
10 | import tempfile
11 | import shutil
12 |
13 | # Function to process video
14 | def process_video(input_video_path):
15 | # Temporary path for output video
16 | output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
17 |
18 | # Initialize Detectron2 configuration for DensePose
19 | cfg = get_cfg()
20 | add_densepose_config(cfg)
21 | cfg.merge_from_file("detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml")
22 | cfg.MODEL.WEIGHTS = "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
23 | predictor = DefaultPredictor(cfg)
24 |
25 | # Open the input video
26 | cap = cv2.VideoCapture(input_video_path)
27 | fps = cap.get(cv2.CAP_PROP_FPS)
28 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
29 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
30 |
31 | # Initialize video writer
32 | fourcc = cv2.VideoWriter_fourcc(*'mp4v')
33 | out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
34 |
35 | # Process each frame
36 | while cap.isOpened():
37 | ret, frame = cap.read()
38 | if not ret:
39 | break
40 |
41 | with torch.no_grad():
42 | outputs = predictor(frame)['instances']
43 |
44 | results = DensePoseResultExtractor()(outputs)
45 | cmap = cv2.COLORMAP_VIRIDIS
46 | # Visualizer outputs black for background, but we want the 0 value of
47 | # the colormap, so we initialize the array with that value
48 | arr = cv2.applyColorMap(np.zeros((height, width), dtype=np.uint8), cmap)
49 | out_frame = Visualizer(alpha=1, cmap=cmap).visualize(arr, results)
50 | out.write(out_frame)
51 |
52 | # Release resources
53 | cap.release()
54 | out.release()
55 |
56 | # Return processed video
57 | return output_video_path
58 |
59 | # Gradio interface
60 | iface = gr.Interface(
61 | fn=process_video,
62 | inputs=gr.Video(label="Input Video"),
63 | outputs=gr.Video(label="Output DensePose Video"),
64 | title="Video 2 DensePose"
65 | )
66 |
67 | # Run the app
68 | iface.launch()
69 |
--------------------------------------------------------------------------------
/dwpose-l_384x288.py:
--------------------------------------------------------------------------------
1 | # runtime
2 | max_epochs = 270
3 | stage2_num_epochs = 30
4 | base_lr = 4e-3
5 |
6 | train_cfg = dict(max_epochs=max_epochs, val_interval=10)
7 | randomness = dict(seed=21)
8 |
9 | # optimizer
10 | optim_wrapper = dict(
11 | type='OptimWrapper',
12 | optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
13 | paramwise_cfg=dict(
14 | norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
15 |
16 | # learning rate
17 | param_scheduler = [
18 | dict(
19 | type='LinearLR',
20 | start_factor=1.0e-5,
21 | by_epoch=False,
22 | begin=0,
23 | end=1000),
24 | dict(
25 | # use cosine lr from 150 to 300 epoch
26 | type='CosineAnnealingLR',
27 | eta_min=base_lr * 0.05,
28 | begin=max_epochs // 2,
29 | end=max_epochs,
30 | T_max=max_epochs // 2,
31 | by_epoch=True,
32 | convert_to_iter_based=True),
33 | ]
34 |
35 | # automatically scaling LR based on the actual training batch size
36 | auto_scale_lr = dict(base_batch_size=512)
37 |
38 | # codec settings
39 | codec = dict(
40 | type='SimCCLabel',
41 | input_size=(288, 384),
42 | sigma=(6., 6.93),
43 | simcc_split_ratio=2.0,
44 | normalize=False,
45 | use_dark=False)
46 |
47 | # model settings
48 | model = dict(
49 | type='TopdownPoseEstimator',
50 | data_preprocessor=dict(
51 | type='PoseDataPreprocessor',
52 | mean=[123.675, 116.28, 103.53],
53 | std=[58.395, 57.12, 57.375],
54 | bgr_to_rgb=True),
55 | backbone=dict(
56 | _scope_='mmdet',
57 | type='CSPNeXt',
58 | arch='P5',
59 | expand_ratio=0.5,
60 | deepen_factor=1.,
61 | widen_factor=1.,
62 | out_indices=(4, ),
63 | channel_attention=True,
64 | norm_cfg=dict(type='SyncBN'),
65 | act_cfg=dict(type='SiLU'),
66 | init_cfg=dict(
67 | type='Pretrained',
68 | prefix='backbone.',
69 | checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
70 | 'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
71 | )),
72 | head=dict(
73 | type='RTMCCHead',
74 | in_channels=1024,
75 | out_channels=133,
76 | input_size=codec['input_size'],
77 | in_featuremap_size=(9, 12),
78 | simcc_split_ratio=codec['simcc_split_ratio'],
79 | final_layer_kernel_size=7,
80 | gau_cfg=dict(
81 | hidden_dims=256,
82 | s=128,
83 | expansion_factor=2,
84 | dropout_rate=0.,
85 | drop_path=0.,
86 | act_fn='SiLU',
87 | use_rel_bias=False,
88 | pos_enc=False),
89 | loss=dict(
90 | type='KLDiscretLoss',
91 | use_target_weight=True,
92 | beta=10.,
93 | label_softmax=True),
94 | decoder=codec),
95 | test_cfg=dict(flip_test=True, ))
96 |
97 | # base dataset settings
98 | dataset_type = 'CocoWholeBodyDataset'
99 | data_mode = 'topdown'
100 | data_root = '/data/'
101 |
102 | backend_args = dict(backend='local')
103 | # backend_args = dict(
104 | # backend='petrel',
105 | # path_mapping=dict({
106 | # f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
107 | # f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
108 | # }))
109 |
110 | # pipelines
111 | train_pipeline = [
112 | dict(type='LoadImage', backend_args=backend_args),
113 | dict(type='GetBBoxCenterScale'),
114 | dict(type='RandomFlip', direction='horizontal'),
115 | dict(type='RandomHalfBody'),
116 | dict(
117 | type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
118 | dict(type='TopdownAffine', input_size=codec['input_size']),
119 | dict(type='mmdet.YOLOXHSVRandomAug'),
120 | dict(
121 | type='Albumentation',
122 | transforms=[
123 | dict(type='Blur', p=0.1),
124 | dict(type='MedianBlur', p=0.1),
125 | dict(
126 | type='CoarseDropout',
127 | max_holes=1,
128 | max_height=0.4,
129 | max_width=0.4,
130 | min_holes=1,
131 | min_height=0.2,
132 | min_width=0.2,
133 | p=1.0),
134 | ]),
135 | dict(type='GenerateTarget', encoder=codec),
136 | dict(type='PackPoseInputs')
137 | ]
138 | val_pipeline = [
139 | dict(type='LoadImage', backend_args=backend_args),
140 | dict(type='GetBBoxCenterScale'),
141 | dict(type='TopdownAffine', input_size=codec['input_size']),
142 | dict(type='PackPoseInputs')
143 | ]
144 |
145 | train_pipeline_stage2 = [
146 | dict(type='LoadImage', backend_args=backend_args),
147 | dict(type='GetBBoxCenterScale'),
148 | dict(type='RandomFlip', direction='horizontal'),
149 | dict(type='RandomHalfBody'),
150 | dict(
151 | type='RandomBBoxTransform',
152 | shift_factor=0.,
153 | scale_factor=[0.75, 1.25],
154 | rotate_factor=60),
155 | dict(type='TopdownAffine', input_size=codec['input_size']),
156 | dict(type='mmdet.YOLOXHSVRandomAug'),
157 | dict(
158 | type='Albumentation',
159 | transforms=[
160 | dict(type='Blur', p=0.1),
161 | dict(type='MedianBlur', p=0.1),
162 | dict(
163 | type='CoarseDropout',
164 | max_holes=1,
165 | max_height=0.4,
166 | max_width=0.4,
167 | min_holes=1,
168 | min_height=0.2,
169 | min_width=0.2,
170 | p=0.5),
171 | ]),
172 | dict(type='GenerateTarget', encoder=codec),
173 | dict(type='PackPoseInputs')
174 | ]
175 |
176 | datasets = []
177 | dataset_coco=dict(
178 | type=dataset_type,
179 | data_root=data_root,
180 | data_mode=data_mode,
181 | ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
182 | data_prefix=dict(img='coco/train2017/'),
183 | pipeline=[],
184 | )
185 | datasets.append(dataset_coco)
186 |
187 | scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
188 | 'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
189 | 'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
190 |
191 | for i in range(len(scene)):
192 | datasets.append(
193 | dict(
194 | type=dataset_type,
195 | data_root=data_root,
196 | data_mode=data_mode,
197 | ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
198 | data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
199 | pipeline=[],
200 | )
201 | )
202 |
203 | # data loaders
204 | train_dataloader = dict(
205 | batch_size=32,
206 | num_workers=10,
207 | persistent_workers=True,
208 | sampler=dict(type='DefaultSampler', shuffle=True),
209 | dataset=dict(
210 | type='CombinedDataset',
211 | metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
212 | datasets=datasets,
213 | pipeline=train_pipeline,
214 | test_mode=False,
215 | ))
216 | val_dataloader = dict(
217 | batch_size=32,
218 | num_workers=10,
219 | persistent_workers=True,
220 | drop_last=False,
221 | sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
222 | dataset=dict(
223 | type=dataset_type,
224 | data_root=data_root,
225 | data_mode=data_mode,
226 | ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
227 | bbox_file=f'{data_root}coco/person_detection_results/'
228 | 'COCO_val2017_detections_AP_H_56_person.json',
229 | data_prefix=dict(img='coco/val2017/'),
230 | test_mode=True,
231 | pipeline=val_pipeline,
232 | ))
233 | test_dataloader = val_dataloader
234 |
235 | # hooks
236 | default_hooks = dict(
237 | checkpoint=dict(
238 | save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
239 |
240 | custom_hooks = [
241 | dict(
242 | type='EMAHook',
243 | ema_type='ExpMomentumEMA',
244 | momentum=0.0002,
245 | update_buffers=True,
246 | priority=49),
247 | dict(
248 | type='mmdet.PipelineSwitchHook',
249 | switch_epoch=max_epochs - stage2_num_epochs,
250 | switch_pipeline=train_pipeline_stage2)
251 | ]
252 |
253 | # evaluators
254 | val_evaluator = dict(
255 | type='CocoWholeBodyMetric',
256 | ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
257 | test_evaluator = val_evaluator
258 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import cv2
4 | import numpy as np
5 | import torch
6 | from densepose import add_densepose_config
7 | from densepose.vis.densepose_results import (
8 | DensePoseResultsFineSegmentationVisualizer as Visualizer,
9 | )
10 | from densepose.vis.extractor import DensePoseResultExtractor
11 |
12 | from detectron2.config import get_cfg
13 | from detectron2.engine import DefaultPredictor
14 |
15 |
16 | def main(input_video_path="./input_video.mp4", output_video_path="./output_video.mp4"):
17 | # Initialize Detectron2 configuration for DensePose
18 | cfg = get_cfg()
19 | add_densepose_config(cfg)
20 | cfg.merge_from_file(
21 | "detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml"
22 | )
23 | cfg.MODEL.WEIGHTS = "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
24 | predictor = DefaultPredictor(cfg)
25 |
26 | # Open the input video
27 | cap = cv2.VideoCapture(input_video_path)
28 | fps = cap.get(cv2.CAP_PROP_FPS)
29 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
30 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
31 |
32 | # Initialize video writer
33 | fourcc = cv2.VideoWriter_fourcc(*"mp4v")
34 | out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
35 |
36 | # Process each frame
37 | while cap.isOpened():
38 | ret, frame = cap.read()
39 | if not ret:
40 | break
41 |
42 | with torch.no_grad():
43 | outputs = predictor(frame)["instances"]
44 |
45 | results = DensePoseResultExtractor()(outputs)
46 |
47 | # MagicAnimate uses the Viridis colormap for their training data
48 | cmap = cv2.COLORMAP_VIRIDIS
49 | # Visualizer outputs black for background, but we want the 0 value of
50 | # the colormap, so we initialize the array with that value
51 | arr = cv2.applyColorMap(np.zeros((height, width), dtype=np.uint8), cmap)
52 | out_frame = Visualizer(alpha=1, cmap=cmap).visualize(arr, results)
53 | out.write(out_frame)
54 |
55 | # Release resources
56 | cap.release()
57 | out.release()
58 |
59 |
60 | if __name__ == "__main__":
61 | parser = argparse.ArgumentParser()
62 | parser.add_argument(
63 | "-i", "--input_video_path", type=str, default="./input_video.mp4"
64 | )
65 | parser.add_argument(
66 | "-o", "--output_video_path", type=str, default="./output_video.mp4"
67 | )
68 | args = parser.parse_args()
69 |
70 | main(args.input_video_path, args.output_video_path)
71 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | #git+https://github.com/facebookresearch/detectron2@main#subdirectory=projects/DensePose
2 | moviepy
3 | controlnet_aux
4 | mediapipe
5 | openmim
6 |
--------------------------------------------------------------------------------
/sample_videos/input_video.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdbds/vid2pose/97957ca92b70fde93754fa861f2fec8df48e6c68/sample_videos/input_video.mp4
--------------------------------------------------------------------------------
/sample_videos/output_video.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdbds/vid2pose/97957ca92b70fde93754fa861f2fec8df48e6c68/sample_videos/output_video.mp4
--------------------------------------------------------------------------------
/sample_videos/side_by_side.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdbds/vid2pose/97957ca92b70fde93754fa861f2fec8df48e6c68/sample_videos/side_by_side.gif
--------------------------------------------------------------------------------
/sample_videos/side_by_side.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdbds/vid2pose/97957ca92b70fde93754fa861f2fec8df48e6c68/sample_videos/side_by_side.mp4
--------------------------------------------------------------------------------
/video2openpose2.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | from controlnet_aux import OpenposeDetector, DWposeDetector
3 | import os
4 | import cv2
5 | import numpy as np
6 | from PIL import Image
7 | from moviepy.editor import *
8 | import argparse
9 | import torch
10 | import re
11 |
12 |
13 | def main(
14 | input_path="vid2pose/sample_videos/input_video.mp4",
15 | output_path="./outputs/",
16 | pose_model="dwpose",
17 | ):
18 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19 | if pose_model.__contains__("openpose"):
20 | openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
21 | else:
22 | dwpose = DWposeDetector(
23 | det_config=os.path.dirname(__file__)
24 | + "/yolox_l_8xb8-300e_coco.py",
25 | pose_config=os.path.dirname(__file__)
26 | + "/dwpose-l_384x288.py",
27 | device=device,
28 | )
29 |
30 | def regex(string):
31 | return re.findall(r"\d+", str(string))[-1]
32 |
33 | def get_frames(video_in):
34 | frames = []
35 | # resize the video
36 | clip = VideoFileClip(video_in)
37 |
38 | # check fps
39 | video_path = os.path.join(output_path, "video_resized.mp4")
40 | if clip.fps > 30:
41 | print("vide rate is over 30, resetting to 30")
42 | clip_resized = clip.resize(height=512)
43 | clip_resized.write_videofile(video_path, fps=30)
44 | else:
45 | print("video rate is OK")
46 | clip_resized = clip.resize(height=512)
47 | clip_resized.write_videofile(video_path, fps=clip.fps)
48 |
49 | print("video resized to 512 height")
50 |
51 | # Opens the Video file with CV2
52 | cap = cv2.VideoCapture(video_path)
53 |
54 | fps = cap.get(cv2.CAP_PROP_FPS)
55 | print("video fps: " + str(fps))
56 | i = 0
57 | while cap.isOpened():
58 | ret, frame = cap.read()
59 | if ret == False:
60 | break
61 | path = os.path.join(output_path, "raw" + str(i) + ".jpg")
62 | cv2.imwrite(path, frame)
63 | frames.append(path)
64 | i += 1
65 |
66 | cap.release()
67 | cv2.destroyAllWindows()
68 | print("broke the video into frames")
69 |
70 | return frames, fps
71 |
72 | def get_openpose_filter(i):
73 | image = Image.open(i)
74 |
75 | # image = np.array(image)
76 | openpose.to(device)
77 |
78 | if pose_model.__contains__("full"):
79 | image = openpose(image, include_hand=True, include_face=True)
80 | elif pose_model.__contains__("hand"):
81 | image = openpose(image, include_hand=True)
82 | elif pose_model.__contains__("face"):
83 | image = openpose(image, include_face=True)
84 | elif pose_model.__contains__("openpose"):
85 | image = openpose(image)
86 | else:
87 | image = dwpose(image)
88 | # image = Image.fromarray(image)
89 | path = os.path.join(output_path, "openpose_frame_" + regex(i) + ".jpeg")
90 | image.save(path)
91 | return path
92 |
93 | def create_video(frames, fps, type):
94 | print("building video result")
95 | clip = ImageSequenceClip(frames, fps=fps)
96 | path = os.path.join(output_path, type + "_result.mp4")
97 | clip.write_videofile(path, fps=fps)
98 |
99 | return path
100 |
101 | def convertG2V(imported_gif):
102 | clip = VideoFileClip(imported_gif.name)
103 | path = os.path.join(output_path, "my_gif_video.mp4")
104 | clip.write_videofile(path)
105 | return path
106 |
107 | def infer(video_in):
108 | # 1. break video into frames and get FPS
109 | break_vid = get_frames(video_in)
110 | frames_list = break_vid[0]
111 | fps = break_vid[1]
112 | # n_frame = int(trim_value*fps)
113 | n_frame = len(frames_list)
114 |
115 | if n_frame >= len(frames_list):
116 | print("video is shorter than the cut value")
117 | n_frame = len(frames_list)
118 |
119 | # 2. prepare frames result arrays
120 | result_frames = []
121 | print("set stop frames to: " + str(n_frame))
122 |
123 | for i in frames_list[0 : int(n_frame)]:
124 | openpose_frame = get_openpose_filter(i)
125 | result_frames.append(openpose_frame)
126 | print("frame " + i + "/" + str(n_frame) + ": done;")
127 |
128 | final_vid = create_video(result_frames, fps, "openpose")
129 |
130 | files = [final_vid]
131 |
132 | return final_vid, files
133 |
134 | title = """
135 |