├── AudioCraft
└── install.bat
├── AudioSep
├── audioset_textmap.npy
└── bpe_simple_vocab_16e6.txt.gz
├── Disco
└── DisCo_Demo.ipynb
├── PiperUI
├── convert_mp3_to_wav.bat
└── transcript.py
├── README.md
├── Wav2Lip-ESRGAN
├── 1-prerequisite.txt
├── 2-wav2lip-hd.txt
├── 3-Real-ESRGAN.txt
└── 4-run-commands.txt
└── melotts
└── download.py
/AudioCraft/install.bat:
--------------------------------------------------------------------------------
1 | cd c:\ai
2 |
3 | git clone https://github.com/facebookresearch/audiocraft.git
4 |
5 | cd audiocraft
6 |
7 | echo y | conda create -n audiocraft python=3.9
8 |
9 | call activate audiocraft
10 |
11 | echo y | conda install -c conda-forge "ffmpeg<5"
12 |
13 | echo y | pip install -r requirements.txt
14 |
15 | echo y | pip install -U git+https://git@github.com/facebookresearch/audiocraft#egg=audiocraft
16 |
17 | echo y | conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
18 |
19 | echo y | pip install numpy==1.24
20 |
21 | echo y | pip install chardet
22 |
23 | echo y | pip install notebook
24 |
25 | echo y | conda install -c conda-forge ipywidgets
26 |
--------------------------------------------------------------------------------
/AudioSep/audioset_textmap.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/natlamir/ProjectFiles/6cac36b40e71b4a57b18ccd3e1e26c12379f97dd/AudioSep/audioset_textmap.npy
--------------------------------------------------------------------------------
/AudioSep/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/natlamir/ProjectFiles/6cac36b40e71b4a57b18ccd3e1e26c12379f97dd/AudioSep/bpe_simple_vocab_16e6.txt.gz
--------------------------------------------------------------------------------
/Disco/DisCo_Demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "gpuType": "T4",
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | },
17 | "accelerator": "GPU"
18 | },
19 | "cells": [
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {
23 | "id": "view-in-github",
24 | "colab_type": "text"
25 | },
26 | "source": [
27 | ""
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {
34 | "id": "1Qu-pX4GDlgs"
35 | },
36 | "outputs": [],
37 | "source": [
38 | "!nvidia-smi"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "source": [
44 | "### 1. Clone the github repo"
45 | ],
46 | "metadata": {
47 | "id": "N7Som16t69XO"
48 | }
49 | },
50 | {
51 | "cell_type": "code",
52 | "source": [
53 | "!git clone https://github.com/Wangt-CN/DisCo"
54 | ],
55 | "metadata": {
56 | "id": "kKByNFtrfV7M"
57 | },
58 | "execution_count": null,
59 | "outputs": []
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "source": [
64 | "\n",
65 | "### 2. Install the package\n",
66 | "\n",
67 | "Ps: Most errors are due to the unsuccessful package installation, please check the installation carefully.\n"
68 | ],
69 | "metadata": {
70 | "id": "mpXDRYzO6rEY"
71 | }
72 | },
73 | {
74 | "cell_type": "code",
75 | "source": [
76 | "!pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchtext==0.14.1 torchaudio==0.13.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu117\n",
77 | "!pip install --user progressbar psutil pymongo simplejson yacs boto3 pyyaml ete3 easydict deprecated future django orderedset python-magic datasets h5py omegaconf einops ipdb\n",
78 | "!pip install --user --exists-action w -r DisCo/requirements.txt\n",
79 | "!pip install git+https://github.com/microsoft/azfuse.git\n",
80 | "\n",
81 | "## for acceleration\n",
82 | "!pip install --user deepspeed==0.6.3"
83 | ],
84 | "metadata": {
85 | "id": "37UDwQVxfp4T"
86 | },
87 | "execution_count": null,
88 | "outputs": []
89 | },
90 | {
91 | "cell_type": "code",
92 | "source": [
93 | "!pip install -U xformers"
94 | ],
95 | "metadata": {
96 | "id": "ZMfiYfxAyLmx"
97 | },
98 | "execution_count": null,
99 | "outputs": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "source": [
104 | "### 3. Download the pretrained model\n",
105 | "Feel free to use our other [checkpoints](https://github.com/Wangt-CN/DisCo#model-checkpoint-google-cloud-tiktok-training-data-fid-fvd-188--more-tiktok-style-training-data-fid-fvd-157) or change to your own model"
106 | ],
107 | "metadata": {
108 | "id": "YkfPye5C7FDV"
109 | }
110 | },
111 | {
112 | "cell_type": "code",
113 | "source": [
114 | "!git clone https://huggingface.co/lambdalabs/sd-image-variations-diffusers\n",
115 | "!wget https://storage.googleapis.com/disco-checkpoint-share/checkpoint_ft/moretiktok_nocfg/mp_rank_00_model_states.pt"
116 | ],
117 | "metadata": {
118 | "id": "v5FjMXqkh827"
119 | },
120 | "execution_count": null,
121 | "outputs": []
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "source": [
126 | "### 4. Start Running"
127 | ],
128 | "metadata": {
129 | "id": "5-u3ohQt7o2c"
130 | }
131 | },
132 | {
133 | "cell_type": "code",
134 | "source": [
135 | "import os\n",
136 | "os.chdir('/content/DisCo')\n",
137 | "os.getcwd()"
138 | ],
139 | "metadata": {
140 | "colab": {
141 | "base_uri": "https://localhost:8080/",
142 | "height": 35
143 | },
144 | "id": "n2knZKbPsxsj",
145 | "outputId": "5cef10ee-ecf2-4120-fbc6-0ac5f224cd82"
146 | },
147 | "execution_count": null,
148 | "outputs": [
149 | {
150 | "output_type": "execute_result",
151 | "data": {
152 | "text/plain": [
153 | "'/content/DisCo'"
154 | ],
155 | "application/vnd.google.colaboratory.intrinsic+json": {
156 | "type": "string"
157 | }
158 | },
159 | "metadata": {},
160 | "execution_count": 6
161 | }
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "source": [
167 | "!pip uninstall colorlog -y\n",
168 | "!pip uninstall deepdish -y\n",
169 | "!pip uninstall configobj -y\n",
170 | "!pip uninstall json_lines -y\n",
171 | "!pip install colorlog deepdish configobj json_lines"
172 | ],
173 | "metadata": {
174 | "id": "tK1pUheJbWr0"
175 | },
176 | "execution_count": null,
177 | "outputs": []
178 | },
179 | {
180 | "cell_type": "code",
181 | "source": [
182 | "!pip uninstall einops -y"
183 | ],
184 | "metadata": {
185 | "id": "T-uP6jcJcJR6"
186 | },
187 | "execution_count": null,
188 | "outputs": []
189 | },
190 | {
191 | "cell_type": "code",
192 | "source": [
193 | "!pip install einops"
194 | ],
195 | "metadata": {
196 | "id": "1EYe-cm8cgoC"
197 | },
198 | "execution_count": null,
199 | "outputs": []
200 | },
201 | {
202 | "cell_type": "code",
203 | "source": [
204 | "!pip install transformers"
205 | ],
206 | "metadata": {
207 | "id": "v4U3pLmYcyJ3"
208 | },
209 | "execution_count": null,
210 | "outputs": []
211 | },
212 | {
213 | "cell_type": "code",
214 | "source": [
215 | "!pip uninstall huggingface_hub -y"
216 | ],
217 | "metadata": {
218 | "id": "2SJt4AqJdHNI"
219 | },
220 | "execution_count": null,
221 | "outputs": []
222 | },
223 | {
224 | "cell_type": "code",
225 | "source": [
226 | "!pip install huggingface_hub"
227 | ],
228 | "metadata": {
229 | "id": "AH-y9q7jdO0K"
230 | },
231 | "execution_count": null,
232 | "outputs": []
233 | },
234 | {
235 | "cell_type": "code",
236 | "source": [
237 | "!pip install tensorboardX"
238 | ],
239 | "metadata": {
240 | "id": "hGNAwm3pdYsC"
241 | },
242 | "execution_count": null,
243 | "outputs": []
244 | },
245 | {
246 | "cell_type": "code",
247 | "source": [
248 | "!pip uninstall deepspeed -y"
249 | ],
250 | "metadata": {
251 | "id": "YQyLaOy5drzA"
252 | },
253 | "execution_count": null,
254 | "outputs": []
255 | },
256 | {
257 | "cell_type": "code",
258 | "source": [
259 | "!pip install deepspeed"
260 | ],
261 | "metadata": {
262 | "id": "zfA9AdNPdyym"
263 | },
264 | "execution_count": null,
265 | "outputs": []
266 | },
267 | {
268 | "cell_type": "code",
269 | "source": [
270 | "!pip uninstall hjson -y\n",
271 | "!pip install hjson"
272 | ],
273 | "metadata": {
274 | "id": "eNYAx9_neRLn"
275 | },
276 | "execution_count": null,
277 | "outputs": []
278 | },
279 | {
280 | "cell_type": "code",
281 | "source": [
282 | "!pip install wandb"
283 | ],
284 | "metadata": {
285 | "id": "gcDVdMoNei1A"
286 | },
287 | "execution_count": null,
288 | "outputs": []
289 | },
290 | {
291 | "cell_type": "code",
292 | "source": [
293 | "!pip uninstall deprecated -y\n",
294 | "!pip install deprecated"
295 | ],
296 | "metadata": {
297 | "id": "gJwkx7EqezqO"
298 | },
299 | "execution_count": null,
300 | "outputs": []
301 | },
302 | {
303 | "cell_type": "code",
304 | "source": [
305 | "import os\n",
306 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
307 | "os.environ[\"WANDB_ENABLE\"] = \"0\"\n",
308 | "\n",
309 | "from utils.wutils_ldm import *\n",
310 | "from agent import Agent_LDM, WarmupLinearLR, WarmupLinearConstantLR\n",
311 | "import torch\n",
312 | "from config import BasicArgs\n",
313 | "from utils.lib import *\n",
314 | "# from utils.args import parse_with_cf\n",
315 | "from utils.dist import dist_init\n",
316 | "from dataset.tsv_dataset import make_data_sampler, make_batch_data_sampler\n",
317 | "from finetune_sdm_yaml import get_loader_info, make_data_loader\n",
318 | "torch.multiprocessing.set_sharing_strategy('file_system')"
319 | ],
320 | "metadata": {
321 | "id": "513HsIP_sHMW"
322 | },
323 | "execution_count": null,
324 | "outputs": []
325 | },
326 | {
327 | "cell_type": "code",
328 | "source": [
329 | "!pip uninstall diffusers -y"
330 | ],
331 | "metadata": {
332 | "id": "Nbv67O8Rft8J"
333 | },
334 | "execution_count": null,
335 | "outputs": []
336 | },
337 | {
338 | "cell_type": "code",
339 | "source": [
340 | "!pip install diffusers==0.14.0"
341 | ],
342 | "metadata": {
343 | "id": "quRzdae_ikYv"
344 | },
345 | "execution_count": null,
346 | "outputs": []
347 | },
348 | {
349 | "cell_type": "code",
350 | "source": [
351 | "from utils.args import sharedArgs\n",
352 | "manual_args = ['--cf', 'config/ref_attn_clip_combine_controlnet/app_demo_image_edit.py', '--eval_visu', 'True', '--root_dir', '/content/run_test', '--local_train_batch_size', '32', '--local_eval_batch_size', '32', '--log_dir', 'exp/tiktok_ft', '--epochs', '20', '--deepspeed', '--eval_step', '500',\n",
353 | " '--save_step', '500', '--gradient_accumulate_steps', '1', '--learning_rate', '2e-4', '--fix_dist_seed', 'True', '--loss_target',\n",
354 | " 'noise', '--unet_unfreeze_type', 'all', '--guidance_scale', '3', '--refer_sdvae', 'True', '--ref_null_caption', 'False', '--combine_clip_local', 'True', '--combine_use_mask', 'True', '--conds', 'poses','masks', '--pretrained_model', '/content/mp_rank_00_model_states.pt', '--pretrained_model_path', '/content/sd-image-variations-diffusers', '--eval_save_filename', 'try']\n",
355 | "parsed_args = sharedArgs.parser.parse_args(args=manual_args)\n",
356 | "\n",
357 | "###### process the args #######\n",
358 | "if parsed_args.root_dir:\n",
359 | " BasicArgs.root_dir = parsed_args.root_dir\n",
360 | "else:\n",
361 | " parsed_args.root_dir = BasicArgs.root_dir\n",
362 | "parsed_args.pretrained_model_path = os.path.join(parsed_args.root_dir, parsed_args.pretrained_model_path)\n",
363 | "\n",
364 | "def parse_with_cf(parsed_args):\n",
365 | " \"\"\"This function will set args based on the input config file.\n",
366 | " (1) it only overwrites unset parameters,\n",
367 | " i.e., these parameters not set from user command line input\n",
368 | " (2) it also sets configs in the config file but declared in the parser\n",
369 | " \"\"\"\n",
370 | " # convert to EasyDict object,\n",
371 | " # enabling access from attributes even for nested config\n",
372 | " # e.g., args.train_datasets[0].name\n",
373 | " args = edict(vars(parsed_args))\n",
374 | " if os.path.exists(parsed_args.cf):\n",
375 | " cf = import_filename(parsed_args.cf)\n",
376 | " config_args = edict(vars(cf.Args))\n",
377 | " override_keys = {arg[2:].split(\"=\")[0] for arg in manual_args\n",
378 | " if arg.startswith(\"--\")}\n",
379 | " # import pdb;pdb.set_trace()\n",
380 | " for k, v in config_args.items():\n",
381 | " if k not in override_keys:\n",
382 | " setattr(args, k, v)\n",
383 | " else:\n",
384 | " raise NotImplementedError('Config filename %s does not exist.' % args.cf)\n",
385 | " return args\n",
386 | "\n",
387 | "args = parse_with_cf(parsed_args)\n",
388 | "\n",
389 | "args.n_gpu = T.cuda.device_count() # local size\n",
390 | "args.local_size = args.n_gpu\n",
391 | "if args.root_dir not in args.log_dir:\n",
392 | " args.log_dir = os.path.join(args.root_dir, args.log_dir)\n",
393 | "if args.stepwise_sample_depth == -1:\n",
394 | " args.interpolation = None\n",
395 | " args.interpolate_mode = None\n",
396 | "if args.interpolation != \"interpolate\":\n",
397 | " args.interpolate_mode = None\n",
398 | "\n",
399 | "assert args.eval_step > 0, \"eval_step must be positive\"\n",
400 | "assert args.save_step > 0, \"save_step must be positive\"\n",
401 | "\n",
402 | "dist_init(args)\n",
403 | "args.dist = args.distributed\n",
404 | "args.nodes = args.num_nodes\n",
405 | "args.world_size = args.num_gpus\n",
406 | "args.train_batch_size = args.local_train_batch_size * args.world_size\n",
407 | "args.eval_batch_size = args.local_eval_batch_size * args.world_size\n",
408 | "#############################################\n",
409 | "\n",
410 | "cf = import_filename(args.cf)\n",
411 | "Net, inner_collect_fn = cf.Net, cf.inner_collect_fn\n",
412 | "\n",
413 | "dataset_cf = import_filename(args.dataset_cf)\n",
414 | "BaseDataset = dataset_cf.BaseDataset\n",
415 | "\n",
416 | "# args = update_args(parsed_args, args)\n",
417 | "\n",
418 | "# init models\n",
419 | "logger.info('Building models...')\n",
420 | "model = Net(args)\n",
421 | "print(f\"Args: {edict(vars(args))}\")"
422 | ],
423 | "metadata": {
424 | "id": "pCuG7qZ3zjYi"
425 | },
426 | "execution_count": null,
427 | "outputs": []
428 | },
429 | {
430 | "cell_type": "code",
431 | "source": [
432 | "logger.warning(\"Do eval_visu...\")\n",
433 | "if getattr(args, 'refer_clip_preprocess', None):\n",
434 | " eval_dataset = BaseDataset(args, args.val_yaml, split='val', preprocesser=model.feature_extractor)\n",
435 | "else:\n",
436 | " eval_dataset = BaseDataset(args, args.val_yaml, split='val')\n",
437 | "eval_dataloader, eval_info = make_data_loader(\n",
438 | " args, args.local_eval_batch_size,\n",
439 | " eval_dataset)\n",
440 | "\n",
441 | "\n",
442 | "trainer = Agent_LDM(args=args, model=model)\n",
443 | "trainer.eval_demo_pre()"
444 | ],
445 | "metadata": {
446 | "id": "OmhxcD304rY-"
447 | },
448 | "execution_count": null,
449 | "outputs": []
450 | },
451 | {
452 | "cell_type": "code",
453 | "source": [
454 | "def load_image(image):\n",
455 | " if not image.mode == \"RGB\":\n",
456 | " image = image.convert(\"RGB\")\n",
457 | " return image\n",
458 | "\n",
459 | "@torch.no_grad()\n",
460 | "def inference(reference_fg, fg_mask, ref_bg_image, bg_mask, skeleton_img, *args, **kwargs):\n",
461 | " reference_fg = load_image(reference_fg)\n",
462 | " fg_mask = load_image(fg_mask)\n",
463 | " ref_bg_image = load_image(ref_bg_image)\n",
464 | " bg_mask = load_image(bg_mask)\n",
465 | " skeleton_img = load_image(skeleton_img)\n",
466 | "\n",
467 | " input_data = [reference_fg, fg_mask, ref_bg_image, bg_mask, skeleton_img]\n",
468 | " output_image = trainer.eval_demo_run(input_data, eval_dataset=eval_dataset)\n",
469 | " return output_image\n",
470 | "\n",
471 | "@torch.no_grad()\n",
472 | "def inference_masked(reference_fg, ref_bg_image, skeleton_img, *args, **kwargs):\n",
473 | " reference_fg = load_image(reference_fg)\n",
474 | " ref_bg_image = load_image(ref_bg_image)\n",
475 | " skeleton_img = load_image(skeleton_img)\n",
476 | "\n",
477 | " input_data = [reference_fg, ref_bg_image, skeleton_img]\n",
478 | " output_image = trainer.eval_demo_run_masked(input_data, eval_dataset=eval_dataset)\n",
479 | " return output_image"
480 | ],
481 | "metadata": {
482 | "id": "fF-xqrj95ekN"
483 | },
484 | "execution_count": null,
485 | "outputs": []
486 | },
487 | {
488 | "cell_type": "markdown",
489 | "source": [
490 | "### 5. Launch the gradio demo"
491 | ],
492 | "metadata": {
493 | "id": "wv2ZhLq_77Ik"
494 | }
495 | },
496 | {
497 | "cell_type": "code",
498 | "source": [
499 | "!pip install gradio"
500 | ],
501 | "metadata": {
502 | "id": "oe4-FQCfmYqg"
503 | },
504 | "execution_count": null,
505 | "outputs": []
506 | },
507 | {
508 | "cell_type": "code",
509 | "source": [
510 | "\n",
511 | "\n",
512 | "import gradio as gr\n",
513 | "'''\n",
514 | "launch app\n",
515 | "'''\n",
516 | "title = \"DisCo Demo (Video Demo Comming Soon!)\"\n",
517 | "description = \"\"\"
Project Page | Paper | Github Repo | Video
\n", 518 | "Skip the queue by duplicating this space and upgrading to GPU in settings
\n", 519 | "