├── AudioCraft └── install.bat ├── AudioSep ├── audioset_textmap.npy └── bpe_simple_vocab_16e6.txt.gz ├── Disco └── DisCo_Demo.ipynb ├── PiperUI ├── convert_mp3_to_wav.bat └── transcript.py ├── README.md ├── Wav2Lip-ESRGAN ├── 1-prerequisite.txt ├── 2-wav2lip-hd.txt ├── 3-Real-ESRGAN.txt └── 4-run-commands.txt └── melotts └── download.py /AudioCraft/install.bat: -------------------------------------------------------------------------------- 1 | cd c:\ai 2 | 3 | git clone https://github.com/facebookresearch/audiocraft.git 4 | 5 | cd audiocraft 6 | 7 | echo y | conda create -n audiocraft python=3.9 8 | 9 | call activate audiocraft 10 | 11 | echo y | conda install -c conda-forge "ffmpeg<5" 12 | 13 | echo y | pip install -r requirements.txt 14 | 15 | echo y | pip install -U git+https://git@github.com/facebookresearch/audiocraft#egg=audiocraft 16 | 17 | echo y | conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia 18 | 19 | echo y | pip install numpy==1.24 20 | 21 | echo y | pip install chardet 22 | 23 | echo y | pip install notebook 24 | 25 | echo y | conda install -c conda-forge ipywidgets 26 | -------------------------------------------------------------------------------- /AudioSep/audioset_textmap.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/natlamir/ProjectFiles/6cac36b40e71b4a57b18ccd3e1e26c12379f97dd/AudioSep/audioset_textmap.npy -------------------------------------------------------------------------------- /AudioSep/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/natlamir/ProjectFiles/6cac36b40e71b4a57b18ccd3e1e26c12379f97dd/AudioSep/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /Disco/DisCo_Demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "accelerator": "GPU" 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "id": "1Qu-pX4GDlgs" 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "!nvidia-smi" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "source": [ 44 | "### 1. Clone the github repo" 45 | ], 46 | "metadata": { 47 | "id": "N7Som16t69XO" 48 | } 49 | }, 50 | { 51 | "cell_type": "code", 52 | "source": [ 53 | "!git clone https://github.com/Wangt-CN/DisCo" 54 | ], 55 | "metadata": { 56 | "id": "kKByNFtrfV7M" 57 | }, 58 | "execution_count": null, 59 | "outputs": [] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "source": [ 64 | "\n", 65 | "### 2. Install the package\n", 66 | "\n", 67 | "Ps: Most errors are due to the unsuccessful package installation, please check the installation carefully.\n" 68 | ], 69 | "metadata": { 70 | "id": "mpXDRYzO6rEY" 71 | } 72 | }, 73 | { 74 | "cell_type": "code", 75 | "source": [ 76 | "!pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchtext==0.14.1 torchaudio==0.13.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu117\n", 77 | "!pip install --user progressbar psutil pymongo simplejson yacs boto3 pyyaml ete3 easydict deprecated future django orderedset python-magic datasets h5py omegaconf einops ipdb\n", 78 | "!pip install --user --exists-action w -r DisCo/requirements.txt\n", 79 | "!pip install git+https://github.com/microsoft/azfuse.git\n", 80 | "\n", 81 | "## for acceleration\n", 82 | "!pip install --user deepspeed==0.6.3" 83 | ], 84 | "metadata": { 85 | "id": "37UDwQVxfp4T" 86 | }, 87 | "execution_count": null, 88 | "outputs": [] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "source": [ 93 | "!pip install -U xformers" 94 | ], 95 | "metadata": { 96 | "id": "ZMfiYfxAyLmx" 97 | }, 98 | "execution_count": null, 99 | "outputs": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "source": [ 104 | "### 3. Download the pretrained model\n", 105 | "Feel free to use our other [checkpoints](https://github.com/Wangt-CN/DisCo#model-checkpoint-google-cloud-tiktok-training-data-fid-fvd-188--more-tiktok-style-training-data-fid-fvd-157) or change to your own model" 106 | ], 107 | "metadata": { 108 | "id": "YkfPye5C7FDV" 109 | } 110 | }, 111 | { 112 | "cell_type": "code", 113 | "source": [ 114 | "!git clone https://huggingface.co/lambdalabs/sd-image-variations-diffusers\n", 115 | "!wget https://storage.googleapis.com/disco-checkpoint-share/checkpoint_ft/moretiktok_nocfg/mp_rank_00_model_states.pt" 116 | ], 117 | "metadata": { 118 | "id": "v5FjMXqkh827" 119 | }, 120 | "execution_count": null, 121 | "outputs": [] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "source": [ 126 | "### 4. Start Running" 127 | ], 128 | "metadata": { 129 | "id": "5-u3ohQt7o2c" 130 | } 131 | }, 132 | { 133 | "cell_type": "code", 134 | "source": [ 135 | "import os\n", 136 | "os.chdir('/content/DisCo')\n", 137 | "os.getcwd()" 138 | ], 139 | "metadata": { 140 | "colab": { 141 | "base_uri": "https://localhost:8080/", 142 | "height": 35 143 | }, 144 | "id": "n2knZKbPsxsj", 145 | "outputId": "5cef10ee-ecf2-4120-fbc6-0ac5f224cd82" 146 | }, 147 | "execution_count": null, 148 | "outputs": [ 149 | { 150 | "output_type": "execute_result", 151 | "data": { 152 | "text/plain": [ 153 | "'/content/DisCo'" 154 | ], 155 | "application/vnd.google.colaboratory.intrinsic+json": { 156 | "type": "string" 157 | } 158 | }, 159 | "metadata": {}, 160 | "execution_count": 6 161 | } 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "source": [ 167 | "!pip uninstall colorlog -y\n", 168 | "!pip uninstall deepdish -y\n", 169 | "!pip uninstall configobj -y\n", 170 | "!pip uninstall json_lines -y\n", 171 | "!pip install colorlog deepdish configobj json_lines" 172 | ], 173 | "metadata": { 174 | "id": "tK1pUheJbWr0" 175 | }, 176 | "execution_count": null, 177 | "outputs": [] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "source": [ 182 | "!pip uninstall einops -y" 183 | ], 184 | "metadata": { 185 | "id": "T-uP6jcJcJR6" 186 | }, 187 | "execution_count": null, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "source": [ 193 | "!pip install einops" 194 | ], 195 | "metadata": { 196 | "id": "1EYe-cm8cgoC" 197 | }, 198 | "execution_count": null, 199 | "outputs": [] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "source": [ 204 | "!pip install transformers" 205 | ], 206 | "metadata": { 207 | "id": "v4U3pLmYcyJ3" 208 | }, 209 | "execution_count": null, 210 | "outputs": [] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "source": [ 215 | "!pip uninstall huggingface_hub -y" 216 | ], 217 | "metadata": { 218 | "id": "2SJt4AqJdHNI" 219 | }, 220 | "execution_count": null, 221 | "outputs": [] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "source": [ 226 | "!pip install huggingface_hub" 227 | ], 228 | "metadata": { 229 | "id": "AH-y9q7jdO0K" 230 | }, 231 | "execution_count": null, 232 | "outputs": [] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "source": [ 237 | "!pip install tensorboardX" 238 | ], 239 | "metadata": { 240 | "id": "hGNAwm3pdYsC" 241 | }, 242 | "execution_count": null, 243 | "outputs": [] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "source": [ 248 | "!pip uninstall deepspeed -y" 249 | ], 250 | "metadata": { 251 | "id": "YQyLaOy5drzA" 252 | }, 253 | "execution_count": null, 254 | "outputs": [] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "source": [ 259 | "!pip install deepspeed" 260 | ], 261 | "metadata": { 262 | "id": "zfA9AdNPdyym" 263 | }, 264 | "execution_count": null, 265 | "outputs": [] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "source": [ 270 | "!pip uninstall hjson -y\n", 271 | "!pip install hjson" 272 | ], 273 | "metadata": { 274 | "id": "eNYAx9_neRLn" 275 | }, 276 | "execution_count": null, 277 | "outputs": [] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "source": [ 282 | "!pip install wandb" 283 | ], 284 | "metadata": { 285 | "id": "gcDVdMoNei1A" 286 | }, 287 | "execution_count": null, 288 | "outputs": [] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "source": [ 293 | "!pip uninstall deprecated -y\n", 294 | "!pip install deprecated" 295 | ], 296 | "metadata": { 297 | "id": "gJwkx7EqezqO" 298 | }, 299 | "execution_count": null, 300 | "outputs": [] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "source": [ 305 | "import os\n", 306 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", 307 | "os.environ[\"WANDB_ENABLE\"] = \"0\"\n", 308 | "\n", 309 | "from utils.wutils_ldm import *\n", 310 | "from agent import Agent_LDM, WarmupLinearLR, WarmupLinearConstantLR\n", 311 | "import torch\n", 312 | "from config import BasicArgs\n", 313 | "from utils.lib import *\n", 314 | "# from utils.args import parse_with_cf\n", 315 | "from utils.dist import dist_init\n", 316 | "from dataset.tsv_dataset import make_data_sampler, make_batch_data_sampler\n", 317 | "from finetune_sdm_yaml import get_loader_info, make_data_loader\n", 318 | "torch.multiprocessing.set_sharing_strategy('file_system')" 319 | ], 320 | "metadata": { 321 | "id": "513HsIP_sHMW" 322 | }, 323 | "execution_count": null, 324 | "outputs": [] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "source": [ 329 | "!pip uninstall diffusers -y" 330 | ], 331 | "metadata": { 332 | "id": "Nbv67O8Rft8J" 333 | }, 334 | "execution_count": null, 335 | "outputs": [] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "source": [ 340 | "!pip install diffusers==0.14.0" 341 | ], 342 | "metadata": { 343 | "id": "quRzdae_ikYv" 344 | }, 345 | "execution_count": null, 346 | "outputs": [] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "source": [ 351 | "from utils.args import sharedArgs\n", 352 | "manual_args = ['--cf', 'config/ref_attn_clip_combine_controlnet/app_demo_image_edit.py', '--eval_visu', 'True', '--root_dir', '/content/run_test', '--local_train_batch_size', '32', '--local_eval_batch_size', '32', '--log_dir', 'exp/tiktok_ft', '--epochs', '20', '--deepspeed', '--eval_step', '500',\n", 353 | " '--save_step', '500', '--gradient_accumulate_steps', '1', '--learning_rate', '2e-4', '--fix_dist_seed', 'True', '--loss_target',\n", 354 | " 'noise', '--unet_unfreeze_type', 'all', '--guidance_scale', '3', '--refer_sdvae', 'True', '--ref_null_caption', 'False', '--combine_clip_local', 'True', '--combine_use_mask', 'True', '--conds', 'poses','masks', '--pretrained_model', '/content/mp_rank_00_model_states.pt', '--pretrained_model_path', '/content/sd-image-variations-diffusers', '--eval_save_filename', 'try']\n", 355 | "parsed_args = sharedArgs.parser.parse_args(args=manual_args)\n", 356 | "\n", 357 | "###### process the args #######\n", 358 | "if parsed_args.root_dir:\n", 359 | " BasicArgs.root_dir = parsed_args.root_dir\n", 360 | "else:\n", 361 | " parsed_args.root_dir = BasicArgs.root_dir\n", 362 | "parsed_args.pretrained_model_path = os.path.join(parsed_args.root_dir, parsed_args.pretrained_model_path)\n", 363 | "\n", 364 | "def parse_with_cf(parsed_args):\n", 365 | " \"\"\"This function will set args based on the input config file.\n", 366 | " (1) it only overwrites unset parameters,\n", 367 | " i.e., these parameters not set from user command line input\n", 368 | " (2) it also sets configs in the config file but declared in the parser\n", 369 | " \"\"\"\n", 370 | " # convert to EasyDict object,\n", 371 | " # enabling access from attributes even for nested config\n", 372 | " # e.g., args.train_datasets[0].name\n", 373 | " args = edict(vars(parsed_args))\n", 374 | " if os.path.exists(parsed_args.cf):\n", 375 | " cf = import_filename(parsed_args.cf)\n", 376 | " config_args = edict(vars(cf.Args))\n", 377 | " override_keys = {arg[2:].split(\"=\")[0] for arg in manual_args\n", 378 | " if arg.startswith(\"--\")}\n", 379 | " # import pdb;pdb.set_trace()\n", 380 | " for k, v in config_args.items():\n", 381 | " if k not in override_keys:\n", 382 | " setattr(args, k, v)\n", 383 | " else:\n", 384 | " raise NotImplementedError('Config filename %s does not exist.' % args.cf)\n", 385 | " return args\n", 386 | "\n", 387 | "args = parse_with_cf(parsed_args)\n", 388 | "\n", 389 | "args.n_gpu = T.cuda.device_count() # local size\n", 390 | "args.local_size = args.n_gpu\n", 391 | "if args.root_dir not in args.log_dir:\n", 392 | " args.log_dir = os.path.join(args.root_dir, args.log_dir)\n", 393 | "if args.stepwise_sample_depth == -1:\n", 394 | " args.interpolation = None\n", 395 | " args.interpolate_mode = None\n", 396 | "if args.interpolation != \"interpolate\":\n", 397 | " args.interpolate_mode = None\n", 398 | "\n", 399 | "assert args.eval_step > 0, \"eval_step must be positive\"\n", 400 | "assert args.save_step > 0, \"save_step must be positive\"\n", 401 | "\n", 402 | "dist_init(args)\n", 403 | "args.dist = args.distributed\n", 404 | "args.nodes = args.num_nodes\n", 405 | "args.world_size = args.num_gpus\n", 406 | "args.train_batch_size = args.local_train_batch_size * args.world_size\n", 407 | "args.eval_batch_size = args.local_eval_batch_size * args.world_size\n", 408 | "#############################################\n", 409 | "\n", 410 | "cf = import_filename(args.cf)\n", 411 | "Net, inner_collect_fn = cf.Net, cf.inner_collect_fn\n", 412 | "\n", 413 | "dataset_cf = import_filename(args.dataset_cf)\n", 414 | "BaseDataset = dataset_cf.BaseDataset\n", 415 | "\n", 416 | "# args = update_args(parsed_args, args)\n", 417 | "\n", 418 | "# init models\n", 419 | "logger.info('Building models...')\n", 420 | "model = Net(args)\n", 421 | "print(f\"Args: {edict(vars(args))}\")" 422 | ], 423 | "metadata": { 424 | "id": "pCuG7qZ3zjYi" 425 | }, 426 | "execution_count": null, 427 | "outputs": [] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "source": [ 432 | "logger.warning(\"Do eval_visu...\")\n", 433 | "if getattr(args, 'refer_clip_preprocess', None):\n", 434 | " eval_dataset = BaseDataset(args, args.val_yaml, split='val', preprocesser=model.feature_extractor)\n", 435 | "else:\n", 436 | " eval_dataset = BaseDataset(args, args.val_yaml, split='val')\n", 437 | "eval_dataloader, eval_info = make_data_loader(\n", 438 | " args, args.local_eval_batch_size,\n", 439 | " eval_dataset)\n", 440 | "\n", 441 | "\n", 442 | "trainer = Agent_LDM(args=args, model=model)\n", 443 | "trainer.eval_demo_pre()" 444 | ], 445 | "metadata": { 446 | "id": "OmhxcD304rY-" 447 | }, 448 | "execution_count": null, 449 | "outputs": [] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "source": [ 454 | "def load_image(image):\n", 455 | " if not image.mode == \"RGB\":\n", 456 | " image = image.convert(\"RGB\")\n", 457 | " return image\n", 458 | "\n", 459 | "@torch.no_grad()\n", 460 | "def inference(reference_fg, fg_mask, ref_bg_image, bg_mask, skeleton_img, *args, **kwargs):\n", 461 | " reference_fg = load_image(reference_fg)\n", 462 | " fg_mask = load_image(fg_mask)\n", 463 | " ref_bg_image = load_image(ref_bg_image)\n", 464 | " bg_mask = load_image(bg_mask)\n", 465 | " skeleton_img = load_image(skeleton_img)\n", 466 | "\n", 467 | " input_data = [reference_fg, fg_mask, ref_bg_image, bg_mask, skeleton_img]\n", 468 | " output_image = trainer.eval_demo_run(input_data, eval_dataset=eval_dataset)\n", 469 | " return output_image\n", 470 | "\n", 471 | "@torch.no_grad()\n", 472 | "def inference_masked(reference_fg, ref_bg_image, skeleton_img, *args, **kwargs):\n", 473 | " reference_fg = load_image(reference_fg)\n", 474 | " ref_bg_image = load_image(ref_bg_image)\n", 475 | " skeleton_img = load_image(skeleton_img)\n", 476 | "\n", 477 | " input_data = [reference_fg, ref_bg_image, skeleton_img]\n", 478 | " output_image = trainer.eval_demo_run_masked(input_data, eval_dataset=eval_dataset)\n", 479 | " return output_image" 480 | ], 481 | "metadata": { 482 | "id": "fF-xqrj95ekN" 483 | }, 484 | "execution_count": null, 485 | "outputs": [] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "source": [ 490 | "### 5. Launch the gradio demo" 491 | ], 492 | "metadata": { 493 | "id": "wv2ZhLq_77Ik" 494 | } 495 | }, 496 | { 497 | "cell_type": "code", 498 | "source": [ 499 | "!pip install gradio" 500 | ], 501 | "metadata": { 502 | "id": "oe4-FQCfmYqg" 503 | }, 504 | "execution_count": null, 505 | "outputs": [] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "source": [ 510 | "\n", 511 | "\n", 512 | "import gradio as gr\n", 513 | "'''\n", 514 | "launch app\n", 515 | "'''\n", 516 | "title = \"DisCo Demo (Video Demo Comming Soon!)\"\n", 517 | "description = \"\"\"

Project Page | Paper | Github Repo | Video

\n", 518 | "

Skip the queue by duplicating this space and upgrading to GPU in settings

\n", 519 | "\"Duplicate\n", 520 | "\"\"\"\n", 521 | "\n", 522 | "\n", 523 | "\n", 524 | "with gr.Blocks() as demo:\n", 525 | " gr.Markdown(\n", 526 | " \"\"\"\n", 527 | " # DisCo Demo (Video Demo Comming Soon!)\n", 528 | " Start edit the human with provided human foreground, background, pose.\n", 529 | "\n", 530 | " Note that for self-uploaded images, TikTok-Style human images are preferred.\n", 531 | "\n", 532 | " [Project Page](https://disco-dance.github.io/) | [Github](https://github.com/Wangt-CN/DisCo)\n", 533 | " \"\"\")\n", 534 | "\n", 535 | " with gr.Row().style(equal_height=False):\n", 536 | " with gr.Column(min_width=400, scale=2):\n", 537 | " input_fg = gr.Image(type='pil',label=\"Foreground Image\")\n", 538 | " gr.Examples(examples=[\"./demo_data/fg/masked_images/00035.png\", \"./demo_data/fg/masked_images/00335.png\", \"./demo_data/fg/masked_images/00147.png\", \"./demo_data/fg/masked_images/00072.png\", \"./demo_data/fg/masked_images/00115.png\"], inputs=input_fg)\n", 539 | "\n", 540 | " input_bg = gr.Image(type='pil',label=\"Background Image\")\n", 541 | " gr.Examples(examples=[\"./demo_data/bg/masked_images/00035.png\", \"./demo_data/bg/masked_images/00335.png\", \"./demo_data/bg/masked_images/00147.png\", \"./demo_data/bg/masked_images/00072.png\", \"./demo_data/bg/masked_images/00115.png\"], inputs=input_bg)\n", 542 | "\n", 543 | " input_pose = gr.Image(type='pil',label=\"Target Pose\",scale=1)\n", 544 | " gr.Examples(examples=[\"./demo_data/pose_img/0049.png\",\"./demo_data/pose_img/0198.png\",\"./demo_data/pose_img/0213.png\",\"./demo_data/pose_img/0264.png\",\"./demo_data/pose_img/0144.png\",\"./demo_data/pose_img/0054.png\"], inputs=input_pose)\n", 545 | "\n", 546 | " btn = gr.Button(\"Generate\")\n", 547 | "\n", 548 | "\n", 549 | " with gr.Column(min_width=150):\n", 550 | " output_img = gr.Image(type='pil',label=\"Edited Human Image\")\n", 551 | "\n", 552 | " btn.click(inference_masked, inputs=[input_fg, input_bg, input_pose], outputs=[output_img])\n", 553 | "\n", 554 | "demo.queue(concurrency_count=2)\n", 555 | "demo.launch()" 556 | ], 557 | "metadata": { 558 | "id": "yvs61CCg5iZV" 559 | }, 560 | "execution_count": null, 561 | "outputs": [] 562 | } 563 | ] 564 | } -------------------------------------------------------------------------------- /PiperUI/convert_mp3_to_wav.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enabledelayedexpansion 3 | 4 | set "input_folder=%CD%" 5 | 6 | :: Check if ffmpeg is available in the system path 7 | where ffmpeg >nul 2>nul 8 | if %errorlevel% neq 0 ( 9 | echo Error: ffmpeg not found in the system path. 10 | exit /b 1 11 | ) 12 | 13 | :: Iterate through each MP3 file in the current folder 14 | for %%F in ("%input_folder%\*.mp3") do ( 15 | set /a count+=1 16 | set "output_file=!count!.wav" 17 | 18 | :: Execute ffmpeg command for each MP3 file 19 | ffmpeg -i "%%F" -acodec pcm_s16le -ar 22050 "!output_file!" 20 | ) 21 | 22 | echo Conversion completed. 23 | exit /b 0 24 | -------------------------------------------------------------------------------- /PiperUI/transcript.py: -------------------------------------------------------------------------------- 1 | import os 2 | import whisper 3 | 4 | # Load the whisper model 5 | model = whisper.load_model("base") 6 | 7 | # Get the list of WAV files in the current directory 8 | wav_files = [file for file in os.listdir() if file.endswith(".wav")] 9 | 10 | # Sort the WAV files in numeric order 11 | wav_files = sorted(wav_files, key=lambda x: int(os.path.splitext(x)[0])) 12 | 13 | # Open a text file for writing the transcripts 14 | with open("transcript.txt", "w") as transcript_file: 15 | # Iterate through each WAV file 16 | for wav_file in wav_files: 17 | print(wav_file) 18 | # Transcribe the current WAV file 19 | result = model.transcribe(wav_file) 20 | 21 | # Remove leading and trailing spaces from the transcribed text 22 | transcribed_text = result['text'].strip() 23 | 24 | # Write the result to the transcript file without space after '|' 25 | transcript_file.write(f"wavs/{wav_file}|{transcribed_text}\n") 26 | 27 | print("Transcription complete. Check 'transcript.txt' for results.") 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ProjectFiles 2 | Where I will be storing misc files with details / links used during the installation process, etc 3 | -------------------------------------------------------------------------------- /Wav2Lip-ESRGAN/1-prerequisite.txt: -------------------------------------------------------------------------------- 1 | 1. enable long path with registry edit 2 | 2. install vs 2022 / visual c++ 2015-2022 redistributable 3 | 3. ffmpeg 4 | 4. git -------------------------------------------------------------------------------- /Wav2Lip-ESRGAN/2-wav2lip-hd.txt: -------------------------------------------------------------------------------- 1 | cd c:\ai 2 | 3 | conda create -n wav2lip-hd python=3.6 4 | 5 | conda activate wav2lip-hd 6 | 7 | git clone https://github.com/saifhassan/Wav2Lip-HD.git 8 | 9 | cd wav2lip-hd 10 | 11 | delete from requirements: 12 | opencv-contrib-python>=4.2.0.34 13 | opencv-python 14 | torch>=1.3 15 | torchvision 16 | 17 | conda install -c conda-forge opencv 18 | 19 | pip install -r requirements.txt 20 | 21 | pip install ffmpeg-python 22 | 23 | conda install -c conda-forge ffmpeg 24 | 25 | Install with conda from pytorch website: 26 | conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia 27 | 28 | To check Cuda: 29 | python 30 | import torch 31 | torch.cuda.is_available() 32 | 33 | download models/weights -------------------------------------------------------------------------------- /Wav2Lip-ESRGAN/3-Real-ESRGAN.txt: -------------------------------------------------------------------------------- 1 | cd c:\ai 2 | 3 | conda create -n Real-ESRGAN python=3.10 4 | 5 | conda activate Real-ESRGAN 6 | 7 | git clone https://github.com/xinntao/Real-ESRGAN.git 8 | 9 | cd Real-ESRGAN 10 | 11 | delete from requirements: 12 | opencv-python 13 | torch>=1.7 14 | torchvision 15 | 16 | conda install -c conda-forge opencv 17 | 18 | pip install -r requirements.txt 19 | 20 | python setup.py develop 21 | 22 | pip install chardet 23 | 24 | pip install ffmpeg-python 25 | 26 | conda install -c conda-forge ffmpeg 27 | 28 | Install with conda from pytorch website: 29 | conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia 30 | 31 | To check Cuda: 32 | python 33 | import torch 34 | torch.cuda.is_available() 35 | 36 | download models/weights -------------------------------------------------------------------------------- /Wav2Lip-ESRGAN/4-run-commands.txt: -------------------------------------------------------------------------------- 1 | python inference_realesrgan.py -n RealESRGAN_x4plus -i inputs --face_enhance --outscale 3.5 2 | 3 | ffmpeg -r 30 -i frame_%05d_out.jpg -i kekw.mp3 -vcodec libx264 -crf 25 -preset veryslow -acodec copy hd.mkv -------------------------------------------------------------------------------- /melotts/download.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import shutil 3 | import zipfile 4 | import os 5 | import sys 6 | from wasabi import msg 7 | from urllib.request import urlretrieve 8 | from tqdm import tqdm 9 | 10 | # This is used to show progress when downloading. 11 | # see here: https://github.com/tqdm/tqdm#hooks-and-callbacks 12 | class TqdmUpTo(tqdm): 13 | """Provides `update_to(n)` which uses `tqdm.update(delta_n)`.""" 14 | def update_to(self, b=1, bsize=1, tsize=None): 15 | """ 16 | b : int, optional 17 | Number of blocks transferred so far [default: 1]. 18 | bsize : int, optional 19 | Size of each block (in tqdm units) [default: 1]. 20 | tsize : int, optional 21 | Total size (in tqdm units). If [default: None] remains unchanged. 22 | """ 23 | if tsize is not None: 24 | self.total = tsize 25 | self.update(b * bsize - self.n) # will also set self.n = b * bsize 26 | 27 | def download_file(url, fname): 28 | with requests.get(url, stream=True) as r: 29 | with open(fname, 'wb') as f: 30 | shutil.copyfileobj(r.raw, f) 31 | 32 | return fname 33 | 34 | def download_progress(url, fname): 35 | """Download a file and show a progress bar.""" 36 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, 37 | desc=url.split('/')[-1]) as t: # all optional kwargs 38 | urlretrieve(url, filename=fname, reporthook=t.update_to, data=None) 39 | t.total = t.n 40 | return fname 41 | 42 | def get_json(url, desc): 43 | r = requests.get(url) 44 | if r.status_code != 200: 45 | msg.fail( 46 | "Server error ({})".format(r.status_code), 47 | "Couldn't fetch {}. If this error persists please open an issue." 48 | " http://github.com/polm/unidic-py/issues/".format(desc), 49 | exits=1, 50 | ) 51 | return r.json() 52 | 53 | def download_and_clean(version, url, dirname='unidic', delfiles=[]): 54 | """Download unidic and prep the dicdir. 55 | 56 | This downloads the zip file from the source, extracts it, renames the 57 | resulting directory, and removes large files not used at runtime. 58 | """ 59 | cdir = os.path.dirname(os.path.abspath(__file__)) 60 | fname = os.path.join(cdir, 'unidic.zip') 61 | print("Downloading UniDic v{}...".format(version), file=sys.stderr) 62 | #download_progress(url, fname) 63 | print("Finished download.") 64 | 65 | with zipfile.ZipFile(fname, 'r') as zf: 66 | zf.extractall(cdir) 67 | os.remove(fname) 68 | 69 | dicdir = os.path.join(cdir, 'dicdir') 70 | if os.path.isdir(dicdir): 71 | shutil.rmtree(dicdir) 72 | 73 | outdir = os.path.join(cdir, dirname) 74 | shutil.move(outdir, dicdir) 75 | 76 | for dfile in delfiles: 77 | os.remove(os.path.join(dicdir, dfile)) 78 | 79 | # save a version file so we can tell what it is 80 | vpath = os.path.join(dicdir, 'version') 81 | with open(vpath, 'w') as vfile: 82 | vfile.write('unidic-{}'.format(version)) 83 | 84 | # Write a dummy mecabrc 85 | with open(os.path.join(dicdir, 'mecabrc'), 'w') as mecabrc: 86 | mecabrc.write('# This is a dummy file.') 87 | 88 | print("Downloaded UniDic v{} to {}".format(version, dicdir), file=sys.stderr) 89 | 90 | DICT_INFO = "https://raw.githubusercontent.com/polm/unidic-py/master/dicts.json" 91 | 92 | def download_version(ver="latest"): 93 | # res = get_json(DICT_INFO, "dictionary info") 94 | # try: 95 | # dictinfo = res[ver] 96 | # except KeyError: 97 | # print('Unknown version "{}".'.format(ver)) 98 | # print("Known versions:") 99 | # for key, val in res.items(): 100 | # print("\t", key, "({})".format(val['version'])) 101 | 102 | # print("download url:", dictinfo['url']) 103 | # print("Dictionary version:", dictinfo['version']) 104 | download_and_clean('1', '1') 105 | 106 | --------------------------------------------------------------------------------