├── AudioCraft
    └── install.bat
├── AudioSep
    ├── audioset_textmap.npy
    └── bpe_simple_vocab_16e6.txt.gz
├── Disco
    └── DisCo_Demo.ipynb
├── PiperUI
    ├── convert_mp3_to_wav.bat
    └── transcript.py
├── README.md
├── Wav2Lip-ESRGAN
    ├── 1-prerequisite.txt
    ├── 2-wav2lip-hd.txt
    ├── 3-Real-ESRGAN.txt
    └── 4-run-commands.txt
└── melotts
    └── download.py


/AudioCraft/install.bat:
--------------------------------------------------------------------------------
 1 | cd c:\ai
 2 | 
 3 | git clone https://github.com/facebookresearch/audiocraft.git
 4 | 
 5 | cd audiocraft
 6 | 
 7 | echo y | conda create -n audiocraft python=3.9
 8 | 
 9 | call activate audiocraft
10 | 
11 | echo y | conda install -c conda-forge "ffmpeg<5"
12 | 
13 | echo y | pip install -r requirements.txt
14 | 
15 | echo y | pip install -U git+https://git@github.com/facebookresearch/audiocraft#egg=audiocraft
16 | 
17 | echo y | conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
18 | 
19 | echo y | pip install numpy==1.24
20 | 
21 | echo y | pip install chardet
22 | 
23 | echo y | pip install notebook
24 | 
25 | echo y | conda install -c conda-forge ipywidgets
26 | 


--------------------------------------------------------------------------------
/AudioSep/audioset_textmap.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/natlamir/ProjectFiles/6cac36b40e71b4a57b18ccd3e1e26c12379f97dd/AudioSep/audioset_textmap.npy


--------------------------------------------------------------------------------
/AudioSep/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/natlamir/ProjectFiles/6cac36b40e71b4a57b18ccd3e1e26c12379f97dd/AudioSep/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/Disco/DisCo_Demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "gpuType": "T4",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     },
 17 |     "accelerator": "GPU"
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/natlamir/ProjectFiles/blob/main/Disco/DisCo_Demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "code",
 32 |       "execution_count": null,
 33 |       "metadata": {
 34 |         "id": "1Qu-pX4GDlgs"
 35 |       },
 36 |       "outputs": [],
 37 |       "source": [
 38 |         "!nvidia-smi"
 39 |       ]
 40 |     },
 41 |     {
 42 |       "cell_type": "markdown",
 43 |       "source": [
 44 |         "### 1. Clone the github repo"
 45 |       ],
 46 |       "metadata": {
 47 |         "id": "N7Som16t69XO"
 48 |       }
 49 |     },
 50 |     {
 51 |       "cell_type": "code",
 52 |       "source": [
 53 |         "!git clone https://github.com/Wangt-CN/DisCo"
 54 |       ],
 55 |       "metadata": {
 56 |         "id": "kKByNFtrfV7M"
 57 |       },
 58 |       "execution_count": null,
 59 |       "outputs": []
 60 |     },
 61 |     {
 62 |       "cell_type": "markdown",
 63 |       "source": [
 64 |         "\n",
 65 |         "### 2. Install the package\n",
 66 |         "\n",
 67 |         "Ps: Most errors are due to the unsuccessful package installation, please check the installation carefully.\n"
 68 |       ],
 69 |       "metadata": {
 70 |         "id": "mpXDRYzO6rEY"
 71 |       }
 72 |     },
 73 |     {
 74 |       "cell_type": "code",
 75 |       "source": [
 76 |         "!pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchtext==0.14.1 torchaudio==0.13.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu117\n",
 77 |         "!pip install --user progressbar psutil pymongo simplejson yacs boto3 pyyaml ete3 easydict deprecated future django orderedset python-magic datasets h5py omegaconf einops ipdb\n",
 78 |         "!pip install --user --exists-action w -r DisCo/requirements.txt\n",
 79 |         "!pip install git+https://github.com/microsoft/azfuse.git\n",
 80 |         "\n",
 81 |         "## for acceleration\n",
 82 |         "!pip install --user deepspeed==0.6.3"
 83 |       ],
 84 |       "metadata": {
 85 |         "id": "37UDwQVxfp4T"
 86 |       },
 87 |       "execution_count": null,
 88 |       "outputs": []
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "source": [
 93 |         "!pip install -U xformers"
 94 |       ],
 95 |       "metadata": {
 96 |         "id": "ZMfiYfxAyLmx"
 97 |       },
 98 |       "execution_count": null,
 99 |       "outputs": []
100 |     },
101 |     {
102 |       "cell_type": "markdown",
103 |       "source": [
104 |         "### 3. Download the pretrained model\n",
105 |         "Feel free to use our other [checkpoints](https://github.com/Wangt-CN/DisCo#model-checkpoint-google-cloud-tiktok-training-data-fid-fvd-188--more-tiktok-style-training-data-fid-fvd-157) or change to your own model"
106 |       ],
107 |       "metadata": {
108 |         "id": "YkfPye5C7FDV"
109 |       }
110 |     },
111 |     {
112 |       "cell_type": "code",
113 |       "source": [
114 |         "!git clone https://huggingface.co/lambdalabs/sd-image-variations-diffusers\n",
115 |         "!wget https://storage.googleapis.com/disco-checkpoint-share/checkpoint_ft/moretiktok_nocfg/mp_rank_00_model_states.pt"
116 |       ],
117 |       "metadata": {
118 |         "id": "v5FjMXqkh827"
119 |       },
120 |       "execution_count": null,
121 |       "outputs": []
122 |     },
123 |     {
124 |       "cell_type": "markdown",
125 |       "source": [
126 |         "### 4. Start Running"
127 |       ],
128 |       "metadata": {
129 |         "id": "5-u3ohQt7o2c"
130 |       }
131 |     },
132 |     {
133 |       "cell_type": "code",
134 |       "source": [
135 |         "import os\n",
136 |         "os.chdir('/content/DisCo')\n",
137 |         "os.getcwd()"
138 |       ],
139 |       "metadata": {
140 |         "colab": {
141 |           "base_uri": "https://localhost:8080/",
142 |           "height": 35
143 |         },
144 |         "id": "n2knZKbPsxsj",
145 |         "outputId": "5cef10ee-ecf2-4120-fbc6-0ac5f224cd82"
146 |       },
147 |       "execution_count": null,
148 |       "outputs": [
149 |         {
150 |           "output_type": "execute_result",
151 |           "data": {
152 |             "text/plain": [
153 |               "'/content/DisCo'"
154 |             ],
155 |             "application/vnd.google.colaboratory.intrinsic+json": {
156 |               "type": "string"
157 |             }
158 |           },
159 |           "metadata": {},
160 |           "execution_count": 6
161 |         }
162 |       ]
163 |     },
164 |     {
165 |       "cell_type": "code",
166 |       "source": [
167 |         "!pip uninstall colorlog -y\n",
168 |         "!pip uninstall deepdish -y\n",
169 |         "!pip uninstall configobj -y\n",
170 |         "!pip uninstall json_lines -y\n",
171 |         "!pip install colorlog deepdish configobj json_lines"
172 |       ],
173 |       "metadata": {
174 |         "id": "tK1pUheJbWr0"
175 |       },
176 |       "execution_count": null,
177 |       "outputs": []
178 |     },
179 |     {
180 |       "cell_type": "code",
181 |       "source": [
182 |         "!pip uninstall einops -y"
183 |       ],
184 |       "metadata": {
185 |         "id": "T-uP6jcJcJR6"
186 |       },
187 |       "execution_count": null,
188 |       "outputs": []
189 |     },
190 |     {
191 |       "cell_type": "code",
192 |       "source": [
193 |         "!pip install einops"
194 |       ],
195 |       "metadata": {
196 |         "id": "1EYe-cm8cgoC"
197 |       },
198 |       "execution_count": null,
199 |       "outputs": []
200 |     },
201 |     {
202 |       "cell_type": "code",
203 |       "source": [
204 |         "!pip install transformers"
205 |       ],
206 |       "metadata": {
207 |         "id": "v4U3pLmYcyJ3"
208 |       },
209 |       "execution_count": null,
210 |       "outputs": []
211 |     },
212 |     {
213 |       "cell_type": "code",
214 |       "source": [
215 |         "!pip uninstall huggingface_hub -y"
216 |       ],
217 |       "metadata": {
218 |         "id": "2SJt4AqJdHNI"
219 |       },
220 |       "execution_count": null,
221 |       "outputs": []
222 |     },
223 |     {
224 |       "cell_type": "code",
225 |       "source": [
226 |         "!pip install huggingface_hub"
227 |       ],
228 |       "metadata": {
229 |         "id": "AH-y9q7jdO0K"
230 |       },
231 |       "execution_count": null,
232 |       "outputs": []
233 |     },
234 |     {
235 |       "cell_type": "code",
236 |       "source": [
237 |         "!pip install tensorboardX"
238 |       ],
239 |       "metadata": {
240 |         "id": "hGNAwm3pdYsC"
241 |       },
242 |       "execution_count": null,
243 |       "outputs": []
244 |     },
245 |     {
246 |       "cell_type": "code",
247 |       "source": [
248 |         "!pip uninstall deepspeed -y"
249 |       ],
250 |       "metadata": {
251 |         "id": "YQyLaOy5drzA"
252 |       },
253 |       "execution_count": null,
254 |       "outputs": []
255 |     },
256 |     {
257 |       "cell_type": "code",
258 |       "source": [
259 |         "!pip install deepspeed"
260 |       ],
261 |       "metadata": {
262 |         "id": "zfA9AdNPdyym"
263 |       },
264 |       "execution_count": null,
265 |       "outputs": []
266 |     },
267 |     {
268 |       "cell_type": "code",
269 |       "source": [
270 |         "!pip uninstall hjson -y\n",
271 |         "!pip install hjson"
272 |       ],
273 |       "metadata": {
274 |         "id": "eNYAx9_neRLn"
275 |       },
276 |       "execution_count": null,
277 |       "outputs": []
278 |     },
279 |     {
280 |       "cell_type": "code",
281 |       "source": [
282 |         "!pip install wandb"
283 |       ],
284 |       "metadata": {
285 |         "id": "gcDVdMoNei1A"
286 |       },
287 |       "execution_count": null,
288 |       "outputs": []
289 |     },
290 |     {
291 |       "cell_type": "code",
292 |       "source": [
293 |         "!pip uninstall deprecated -y\n",
294 |         "!pip install deprecated"
295 |       ],
296 |       "metadata": {
297 |         "id": "gJwkx7EqezqO"
298 |       },
299 |       "execution_count": null,
300 |       "outputs": []
301 |     },
302 |     {
303 |       "cell_type": "code",
304 |       "source": [
305 |         "import os\n",
306 |         "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
307 |         "os.environ[\"WANDB_ENABLE\"] = \"0\"\n",
308 |         "\n",
309 |         "from utils.wutils_ldm import *\n",
310 |         "from agent import Agent_LDM, WarmupLinearLR, WarmupLinearConstantLR\n",
311 |         "import torch\n",
312 |         "from config import BasicArgs\n",
313 |         "from utils.lib import *\n",
314 |         "# from utils.args import parse_with_cf\n",
315 |         "from utils.dist import dist_init\n",
316 |         "from dataset.tsv_dataset import make_data_sampler, make_batch_data_sampler\n",
317 |         "from finetune_sdm_yaml import get_loader_info, make_data_loader\n",
318 |         "torch.multiprocessing.set_sharing_strategy('file_system')"
319 |       ],
320 |       "metadata": {
321 |         "id": "513HsIP_sHMW"
322 |       },
323 |       "execution_count": null,
324 |       "outputs": []
325 |     },
326 |     {
327 |       "cell_type": "code",
328 |       "source": [
329 |         "!pip uninstall diffusers -y"
330 |       ],
331 |       "metadata": {
332 |         "id": "Nbv67O8Rft8J"
333 |       },
334 |       "execution_count": null,
335 |       "outputs": []
336 |     },
337 |     {
338 |       "cell_type": "code",
339 |       "source": [
340 |         "!pip install diffusers==0.14.0"
341 |       ],
342 |       "metadata": {
343 |         "id": "quRzdae_ikYv"
344 |       },
345 |       "execution_count": null,
346 |       "outputs": []
347 |     },
348 |     {
349 |       "cell_type": "code",
350 |       "source": [
351 |         "from utils.args import sharedArgs\n",
352 |         "manual_args = ['--cf', 'config/ref_attn_clip_combine_controlnet/app_demo_image_edit.py', '--eval_visu', 'True', '--root_dir', '/content/run_test', '--local_train_batch_size', '32', '--local_eval_batch_size', '32', '--log_dir', 'exp/tiktok_ft', '--epochs', '20', '--deepspeed', '--eval_step', '500',\n",
353 |         "               '--save_step', '500', '--gradient_accumulate_steps', '1', '--learning_rate', '2e-4', '--fix_dist_seed', 'True', '--loss_target',\n",
354 |         "               'noise', '--unet_unfreeze_type', 'all', '--guidance_scale', '3', '--refer_sdvae', 'True', '--ref_null_caption', 'False', '--combine_clip_local', 'True', '--combine_use_mask', 'True', '--conds', 'poses','masks', '--pretrained_model', '/content/mp_rank_00_model_states.pt', '--pretrained_model_path', '/content/sd-image-variations-diffusers', '--eval_save_filename', 'try']\n",
355 |         "parsed_args = sharedArgs.parser.parse_args(args=manual_args)\n",
356 |         "\n",
357 |         "###### process the args #######\n",
358 |         "if parsed_args.root_dir:\n",
359 |         "    BasicArgs.root_dir = parsed_args.root_dir\n",
360 |         "else:\n",
361 |         "    parsed_args.root_dir = BasicArgs.root_dir\n",
362 |         "parsed_args.pretrained_model_path = os.path.join(parsed_args.root_dir, parsed_args.pretrained_model_path)\n",
363 |         "\n",
364 |         "def parse_with_cf(parsed_args):\n",
365 |         "    \"\"\"This function will set args based on the input config file.\n",
366 |         "    (1) it only overwrites unset parameters,\n",
367 |         "        i.e., these parameters not set from user command line input\n",
368 |         "    (2) it also sets configs in the config file but declared in the parser\n",
369 |         "    \"\"\"\n",
370 |         "    # convert to EasyDict object,\n",
371 |         "    # enabling access from attributes even for nested config\n",
372 |         "    # e.g., args.train_datasets[0].name\n",
373 |         "    args = edict(vars(parsed_args))\n",
374 |         "    if os.path.exists(parsed_args.cf):\n",
375 |         "        cf = import_filename(parsed_args.cf)\n",
376 |         "        config_args = edict(vars(cf.Args))\n",
377 |         "        override_keys = {arg[2:].split(\"=\")[0] for arg in manual_args\n",
378 |         "                         if arg.startswith(\"--\")}\n",
379 |         "        # import pdb;pdb.set_trace()\n",
380 |         "        for k, v in config_args.items():\n",
381 |         "            if k not in override_keys:\n",
382 |         "                setattr(args, k, v)\n",
383 |         "    else:\n",
384 |         "        raise NotImplementedError('Config filename %s does not exist.' % args.cf)\n",
385 |         "    return args\n",
386 |         "\n",
387 |         "args = parse_with_cf(parsed_args)\n",
388 |         "\n",
389 |         "args.n_gpu = T.cuda.device_count() # local size\n",
390 |         "args.local_size = args.n_gpu\n",
391 |         "if args.root_dir not in args.log_dir:\n",
392 |         "    args.log_dir = os.path.join(args.root_dir, args.log_dir)\n",
393 |         "if args.stepwise_sample_depth == -1:\n",
394 |         "    args.interpolation = None\n",
395 |         "    args.interpolate_mode = None\n",
396 |         "if args.interpolation != \"interpolate\":\n",
397 |         "    args.interpolate_mode = None\n",
398 |         "\n",
399 |         "assert args.eval_step > 0, \"eval_step must be positive\"\n",
400 |         "assert args.save_step > 0, \"save_step must be positive\"\n",
401 |         "\n",
402 |         "dist_init(args)\n",
403 |         "args.dist = args.distributed\n",
404 |         "args.nodes = args.num_nodes\n",
405 |         "args.world_size = args.num_gpus\n",
406 |         "args.train_batch_size = args.local_train_batch_size * args.world_size\n",
407 |         "args.eval_batch_size = args.local_eval_batch_size * args.world_size\n",
408 |         "#############################################\n",
409 |         "\n",
410 |         "cf = import_filename(args.cf)\n",
411 |         "Net, inner_collect_fn = cf.Net, cf.inner_collect_fn\n",
412 |         "\n",
413 |         "dataset_cf = import_filename(args.dataset_cf)\n",
414 |         "BaseDataset = dataset_cf.BaseDataset\n",
415 |         "\n",
416 |         "# args = update_args(parsed_args, args)\n",
417 |         "\n",
418 |         "# init models\n",
419 |         "logger.info('Building models...')\n",
420 |         "model = Net(args)\n",
421 |         "print(f\"Args: {edict(vars(args))}\")"
422 |       ],
423 |       "metadata": {
424 |         "id": "pCuG7qZ3zjYi"
425 |       },
426 |       "execution_count": null,
427 |       "outputs": []
428 |     },
429 |     {
430 |       "cell_type": "code",
431 |       "source": [
432 |         "logger.warning(\"Do eval_visu...\")\n",
433 |         "if getattr(args, 'refer_clip_preprocess', None):\n",
434 |         "    eval_dataset = BaseDataset(args, args.val_yaml, split='val', preprocesser=model.feature_extractor)\n",
435 |         "else:\n",
436 |         "    eval_dataset = BaseDataset(args, args.val_yaml, split='val')\n",
437 |         "eval_dataloader, eval_info = make_data_loader(\n",
438 |         "    args, args.local_eval_batch_size,\n",
439 |         "    eval_dataset)\n",
440 |         "\n",
441 |         "\n",
442 |         "trainer = Agent_LDM(args=args, model=model)\n",
443 |         "trainer.eval_demo_pre()"
444 |       ],
445 |       "metadata": {
446 |         "id": "OmhxcD304rY-"
447 |       },
448 |       "execution_count": null,
449 |       "outputs": []
450 |     },
451 |     {
452 |       "cell_type": "code",
453 |       "source": [
454 |         "def load_image(image):\n",
455 |         "    if not image.mode == \"RGB\":\n",
456 |         "        image = image.convert(\"RGB\")\n",
457 |         "    return image\n",
458 |         "\n",
459 |         "@torch.no_grad()\n",
460 |         "def inference(reference_fg, fg_mask, ref_bg_image, bg_mask, skeleton_img, *args, **kwargs):\n",
461 |         "    reference_fg = load_image(reference_fg)\n",
462 |         "    fg_mask = load_image(fg_mask)\n",
463 |         "    ref_bg_image = load_image(ref_bg_image)\n",
464 |         "    bg_mask = load_image(bg_mask)\n",
465 |         "    skeleton_img = load_image(skeleton_img)\n",
466 |         "\n",
467 |         "    input_data = [reference_fg, fg_mask, ref_bg_image, bg_mask, skeleton_img]\n",
468 |         "    output_image = trainer.eval_demo_run(input_data, eval_dataset=eval_dataset)\n",
469 |         "    return output_image\n",
470 |         "\n",
471 |         "@torch.no_grad()\n",
472 |         "def inference_masked(reference_fg, ref_bg_image, skeleton_img, *args, **kwargs):\n",
473 |         "    reference_fg = load_image(reference_fg)\n",
474 |         "    ref_bg_image = load_image(ref_bg_image)\n",
475 |         "    skeleton_img = load_image(skeleton_img)\n",
476 |         "\n",
477 |         "    input_data = [reference_fg, ref_bg_image, skeleton_img]\n",
478 |         "    output_image = trainer.eval_demo_run_masked(input_data, eval_dataset=eval_dataset)\n",
479 |         "    return output_image"
480 |       ],
481 |       "metadata": {
482 |         "id": "fF-xqrj95ekN"
483 |       },
484 |       "execution_count": null,
485 |       "outputs": []
486 |     },
487 |     {
488 |       "cell_type": "markdown",
489 |       "source": [
490 |         "### 5. Launch the gradio demo"
491 |       ],
492 |       "metadata": {
493 |         "id": "wv2ZhLq_77Ik"
494 |       }
495 |     },
496 |     {
497 |       "cell_type": "code",
498 |       "source": [
499 |         "!pip install gradio"
500 |       ],
501 |       "metadata": {
502 |         "id": "oe4-FQCfmYqg"
503 |       },
504 |       "execution_count": null,
505 |       "outputs": []
506 |     },
507 |     {
508 |       "cell_type": "code",
509 |       "source": [
510 |         "\n",
511 |         "\n",
512 |         "import gradio as gr\n",
513 |         "'''\n",
514 |         "launch app\n",
515 |         "'''\n",
516 |         "title = \"DisCo Demo (Video Demo Comming Soon!)\"\n",
517 |         "description = \"\"\"<p style='text-align: center'> <a href='https://disco-dance.github.io/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/pdf/2212.11270.pdf' target='_blank'>Paper</a> | <a href='https://github.com/microsoft/X-Decoder' target='_blank'>Github Repo</a> | <a href='https://youtu.be/wYp6vmyolqE' target='_blank'>Video</a> </p>\n",
518 |         "<p>Skip the queue by duplicating this space and upgrading to GPU in settings</p>\n",
519 |         "<a href=\"https://huggingface.co/spaces/xdecoder/Demo?duplicate=true\"><img src=\"https://bit.ly/3gLdBN6\" alt=\"Duplicate Space\"></a>\n",
520 |         "\"\"\"\n",
521 |         "\n",
522 |         "\n",
523 |         "\n",
524 |         "with gr.Blocks() as demo:\n",
525 |         "    gr.Markdown(\n",
526 |         "    \"\"\"\n",
527 |         "    # DisCo Demo (Video Demo Comming Soon!)\n",
528 |         "    Start edit the human with provided human foreground, background, pose.\n",
529 |         "\n",
530 |         "    Note that for self-uploaded images, TikTok-Style human images are preferred.\n",
531 |         "\n",
532 |         "    [Project Page](https://disco-dance.github.io/) | [Github](https://github.com/Wangt-CN/DisCo)\n",
533 |         "    \"\"\")\n",
534 |         "\n",
535 |         "    with gr.Row().style(equal_height=False):\n",
536 |         "        with gr.Column(min_width=400, scale=2):\n",
537 |         "            input_fg = gr.Image(type='pil',label=\"Foreground Image\")\n",
538 |         "            gr.Examples(examples=[\"./demo_data/fg/masked_images/00035.png\", \"./demo_data/fg/masked_images/00335.png\", \"./demo_data/fg/masked_images/00147.png\", \"./demo_data/fg/masked_images/00072.png\", \"./demo_data/fg/masked_images/00115.png\"], inputs=input_fg)\n",
539 |         "\n",
540 |         "            input_bg = gr.Image(type='pil',label=\"Background Image\")\n",
541 |         "            gr.Examples(examples=[\"./demo_data/bg/masked_images/00035.png\", \"./demo_data/bg/masked_images/00335.png\", \"./demo_data/bg/masked_images/00147.png\", \"./demo_data/bg/masked_images/00072.png\", \"./demo_data/bg/masked_images/00115.png\"], inputs=input_bg)\n",
542 |         "\n",
543 |         "            input_pose = gr.Image(type='pil',label=\"Target Pose\",scale=1)\n",
544 |         "            gr.Examples(examples=[\"./demo_data/pose_img/0049.png\",\"./demo_data/pose_img/0198.png\",\"./demo_data/pose_img/0213.png\",\"./demo_data/pose_img/0264.png\",\"./demo_data/pose_img/0144.png\",\"./demo_data/pose_img/0054.png\"], inputs=input_pose)\n",
545 |         "\n",
546 |         "            btn = gr.Button(\"Generate\")\n",
547 |         "\n",
548 |         "\n",
549 |         "        with gr.Column(min_width=150):\n",
550 |         "            output_img = gr.Image(type='pil',label=\"Edited Human Image\")\n",
551 |         "\n",
552 |         "    btn.click(inference_masked, inputs=[input_fg, input_bg, input_pose], outputs=[output_img])\n",
553 |         "\n",
554 |         "demo.queue(concurrency_count=2)\n",
555 |         "demo.launch()"
556 |       ],
557 |       "metadata": {
558 |         "id": "yvs61CCg5iZV"
559 |       },
560 |       "execution_count": null,
561 |       "outputs": []
562 |     }
563 |   ]
564 | }


--------------------------------------------------------------------------------
/PiperUI/convert_mp3_to_wav.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | setlocal enabledelayedexpansion
 3 | 
 4 | set "input_folder=%CD%"
 5 | 
 6 | :: Check if ffmpeg is available in the system path
 7 | where ffmpeg >nul 2>nul
 8 | if %errorlevel% neq 0 (
 9 |     echo Error: ffmpeg not found in the system path.
10 |     exit /b 1
11 | )
12 | 
13 | :: Iterate through each MP3 file in the current folder
14 | for %%F in ("%input_folder%\*.mp3") do (
15 |     set /a count+=1
16 |     set "output_file=!count!.wav"
17 | 
18 |     :: Execute ffmpeg command for each MP3 file
19 |     ffmpeg -i "%%F" -acodec pcm_s16le -ar 22050 "!output_file!"
20 | )
21 | 
22 | echo Conversion completed.
23 | exit /b 0
24 | 


--------------------------------------------------------------------------------
/PiperUI/transcript.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import whisper
 3 | 
 4 | # Load the whisper model
 5 | model = whisper.load_model("base")
 6 | 
 7 | # Get the list of WAV files in the current directory
 8 | wav_files = [file for file in os.listdir() if file.endswith(".wav")]
 9 | 
10 | # Sort the WAV files in numeric order
11 | wav_files = sorted(wav_files, key=lambda x: int(os.path.splitext(x)[0]))
12 | 
13 | # Open a text file for writing the transcripts
14 | with open("transcript.txt", "w") as transcript_file:
15 |     # Iterate through each WAV file
16 |     for wav_file in wav_files:
17 |         print(wav_file)
18 |         # Transcribe the current WAV file
19 |         result = model.transcribe(wav_file)
20 | 
21 |         # Remove leading and trailing spaces from the transcribed text
22 |         transcribed_text = result['text'].strip()
23 | 
24 |         # Write the result to the transcript file without space after '|'
25 |         transcript_file.write(f"wavs/{wav_file}|{transcribed_text}\n")
26 | 
27 | print("Transcription complete. Check 'transcript.txt' for results.")
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ProjectFiles
2 | Where I will be storing misc files with details / links used during the installation process, etc
3 | 


--------------------------------------------------------------------------------
/Wav2Lip-ESRGAN/1-prerequisite.txt:
--------------------------------------------------------------------------------
1 | 1. enable long path with registry edit
2 | 2. install vs 2022 / visual c++ 2015-2022 redistributable
3 | 3. ffmpeg
4 | 4. git


--------------------------------------------------------------------------------
/Wav2Lip-ESRGAN/2-wav2lip-hd.txt:
--------------------------------------------------------------------------------
 1 | cd c:\ai
 2 | 
 3 | conda create -n wav2lip-hd python=3.6
 4 | 
 5 | conda activate wav2lip-hd
 6 | 
 7 | git clone https://github.com/saifhassan/Wav2Lip-HD.git
 8 | 
 9 | cd wav2lip-hd
10 | 
11 | delete from requirements:
12 | opencv-contrib-python>=4.2.0.34
13 | opencv-python
14 | torch>=1.3
15 | torchvision
16 | 
17 | conda install -c conda-forge opencv
18 | 
19 | pip install -r requirements.txt
20 | 
21 | pip install ffmpeg-python
22 | 
23 | conda install -c conda-forge ffmpeg
24 | 
25 | Install with conda from pytorch website:
26 | conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
27 | 
28 | To check Cuda:
29 | python
30 | import torch
31 | torch.cuda.is_available()
32 | 
33 | download models/weights


--------------------------------------------------------------------------------
/Wav2Lip-ESRGAN/3-Real-ESRGAN.txt:
--------------------------------------------------------------------------------
 1 | cd c:\ai
 2 | 
 3 | conda create -n Real-ESRGAN python=3.10
 4 | 
 5 | conda activate Real-ESRGAN
 6 | 
 7 | git clone https://github.com/xinntao/Real-ESRGAN.git
 8 | 
 9 | cd Real-ESRGAN
10 | 
11 | delete from requirements:
12 | opencv-python
13 | torch>=1.7
14 | torchvision
15 | 
16 | conda install -c conda-forge opencv
17 | 
18 | pip install -r requirements.txt
19 | 
20 | python setup.py develop
21 | 
22 | pip install chardet
23 | 
24 | pip install ffmpeg-python
25 | 
26 | conda install -c conda-forge ffmpeg
27 | 
28 | Install with conda from pytorch website:
29 | conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
30 | 
31 | To check Cuda:
32 | python
33 | import torch
34 | torch.cuda.is_available()
35 | 
36 | download models/weights


--------------------------------------------------------------------------------
/Wav2Lip-ESRGAN/4-run-commands.txt:
--------------------------------------------------------------------------------
1 | python inference_realesrgan.py -n RealESRGAN_x4plus -i inputs --face_enhance --outscale 3.5
2 | 
3 | ffmpeg -r 30 -i frame_%05d_out.jpg -i kekw.mp3 -vcodec libx264 -crf 25 -preset veryslow -acodec copy hd.mkv


--------------------------------------------------------------------------------
/melotts/download.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import shutil
  3 | import zipfile
  4 | import os
  5 | import sys
  6 | from wasabi import msg
  7 | from urllib.request import urlretrieve
  8 | from tqdm import tqdm
  9 | 
 10 | # This is used to show progress when downloading.
 11 | # see here: https://github.com/tqdm/tqdm#hooks-and-callbacks
 12 | class TqdmUpTo(tqdm):
 13 |     """Provides `update_to(n)` which uses `tqdm.update(delta_n)`."""
 14 |     def update_to(self, b=1, bsize=1, tsize=None):
 15 |         """
 16 |         b  : int, optional
 17 |             Number of blocks transferred so far [default: 1].
 18 |         bsize  : int, optional
 19 |             Size of each block (in tqdm units) [default: 1].
 20 |         tsize  : int, optional
 21 |             Total size (in tqdm units). If [default: None] remains unchanged.
 22 |         """
 23 |         if tsize is not None:
 24 |             self.total = tsize
 25 |         self.update(b * bsize - self.n)  # will also set self.n = b * bsize
 26 | 
 27 | def download_file(url, fname):
 28 |     with requests.get(url, stream=True) as r:
 29 |         with open(fname, 'wb') as f:
 30 |             shutil.copyfileobj(r.raw, f)
 31 | 
 32 |     return fname
 33 | 
 34 | def download_progress(url, fname):
 35 |     """Download a file and show a progress bar."""
 36 |     with TqdmUpTo(unit='B', unit_scale=True, miniters=1,
 37 |               desc=url.split('/')[-1]) as t:  # all optional kwargs
 38 |         urlretrieve(url, filename=fname, reporthook=t.update_to, data=None)
 39 |         t.total = t.n
 40 |     return fname
 41 | 
 42 | def get_json(url, desc):
 43 |     r = requests.get(url)
 44 |     if r.status_code != 200:
 45 |         msg.fail(
 46 |             "Server error ({})".format(r.status_code),
 47 |             "Couldn't fetch {}. If this error persists please open an issue."
 48 |             " http://github.com/polm/unidic-py/issues/".format(desc),
 49 |             exits=1,
 50 |         )
 51 |     return r.json()
 52 | 
 53 | def download_and_clean(version, url, dirname='unidic', delfiles=[]):
 54 |     """Download unidic and prep the dicdir.
 55 | 
 56 |     This downloads the zip file from the source, extracts it, renames the
 57 |     resulting directory, and removes large files not used at runtime.  
 58 |     """
 59 |     cdir = os.path.dirname(os.path.abspath(__file__))
 60 |     fname = os.path.join(cdir, 'unidic.zip')
 61 |     print("Downloading UniDic v{}...".format(version), file=sys.stderr)
 62 |     #download_progress(url, fname)
 63 |     print("Finished download.")
 64 | 
 65 |     with zipfile.ZipFile(fname, 'r') as zf:
 66 |         zf.extractall(cdir)
 67 |     os.remove(fname)
 68 | 
 69 |     dicdir = os.path.join(cdir, 'dicdir')
 70 |     if os.path.isdir(dicdir):
 71 |         shutil.rmtree(dicdir)
 72 | 
 73 |     outdir = os.path.join(cdir, dirname)
 74 |     shutil.move(outdir, dicdir)
 75 | 
 76 |     for dfile in delfiles:
 77 |         os.remove(os.path.join(dicdir, dfile))
 78 | 
 79 |     # save a version file so we can tell what it is
 80 |     vpath = os.path.join(dicdir, 'version')
 81 |     with open(vpath, 'w') as vfile:
 82 |         vfile.write('unidic-{}'.format(version))
 83 | 
 84 |     # Write a dummy mecabrc
 85 |     with open(os.path.join(dicdir, 'mecabrc'), 'w') as mecabrc:
 86 |         mecabrc.write('# This is a dummy file.')
 87 | 
 88 |     print("Downloaded UniDic v{} to {}".format(version, dicdir), file=sys.stderr)
 89 | 
 90 | DICT_INFO = "https://raw.githubusercontent.com/polm/unidic-py/master/dicts.json"
 91 | 
 92 | def download_version(ver="latest"):
 93 |     # res = get_json(DICT_INFO, "dictionary info")
 94 |     # try:
 95 |         # dictinfo = res[ver]
 96 |     # except KeyError:
 97 |         # print('Unknown version "{}".'.format(ver))
 98 |         # print("Known versions:")
 99 |         # for key, val in res.items():
100 |             # print("\t", key, "({})".format(val['version']))
101 | 
102 |     # print("download url:", dictinfo['url'])
103 |     # print("Dictionary version:", dictinfo['version'])
104 |     download_and_clean('1', '1')
105 | 
106 | 


--------------------------------------------------------------------------------