├── SD_infinity_06feb.ipynb └── one_shot_talking_face.ipynb /SD_infinity_06feb.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "include_colab_link": true 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | }, 16 | "accelerator": "GPU" 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "source": [ 32 | "#@title **Setup**\n", 33 | "\n", 34 | "# install library\n", 35 | "!nvidia-smi -L\n", 36 | "!pip install -qq -U diffusers==0.11.1 transformers ftfy accelerate\n", 37 | "!pip install -q gradio==3.11.0\n", 38 | "!pip install -q fpie timm\n", 39 | "!pip uninstall taichi -y\n", 40 | "\n", 41 | "# git clone\n", 42 | "!git clone --recurse-submodules https://github.com/lkwq007/stablediffusion-infinity\n", 43 | "%cd stablediffusion-infinity\n", 44 | "!cp -r PyPatchMatch/csrc .\n", 45 | "!cp PyPatchMatch/Makefile .\n", 46 | "!cp PyPatchMatch/Makefile_fallback .\n", 47 | "!cp PyPatchMatch/travis.sh .\n", 48 | "!cp PyPatchMatch/patch_match.py . \n", 49 | "\n", 50 | "# program start\n", 51 | "!python app.py --share" 52 | ], 53 | "metadata": { 54 | "id": "MDjNh7svvSl3" 55 | }, 56 | "execution_count": null, 57 | "outputs": [] 58 | } 59 | ] 60 | } -------------------------------------------------------------------------------- /one_shot_talking_face.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "id": "OHPfXjK_opA4" 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "#@title #**Setup**\n", 22 | "!git lfs install\n", 23 | "%cd /content\n", 24 | "!git clone https://huggingface.co/camenduru/pocketsphinx-20.04-t4 pocketsphinx\n", 25 | "%cd /content/pocketsphinx\n", 26 | "!sudo cmake --build build --target install\n", 27 | "%cd /content\n", 28 | "!git clone https://huggingface.co/camenduru/one-shot-talking-face-20.04-t4 one-shot-talking-face\n", 29 | "%cd /content/one-shot-talking-face\n", 30 | "!pip install -r /content/one-shot-talking-face/requirements.txt\n", 31 | "!chmod 755 /content/one-shot-talking-face/OpenFace/FeatureExtraction\n", 32 | "!mkdir /content/out\n", 33 | "!apt install -qq libgtk2.0-0 jq -y\n", 34 | "!pip install -q imageio-ffmpeg" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "id": "XdoRAI2OopA6" 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "#@title #**make movie**\n", 46 | "import os, random, torchaudio\n", 47 | "from IPython.display import HTML\n", 48 | "from base64 import b64encode\n", 49 | "from IPython.display import clear_output\n", 50 | "\n", 51 | "def show_video(video_path, video_width = 256):\n", 52 | " video_file = open(video_path, \"r+b\").read()\n", 53 | " video_url = f\"data:video/mp4;base64,{b64encode(video_file).decode()}\"\n", 54 | " return HTML(f\"\"\"\"\"\")\n", 55 | "\n", 56 | "#@markdown -Select wav_file_name from the one-shot-talking-face/samples/audios folder.\\\n", 57 | "#@markdown -Select image_file_name from the one-shot-talking-face/samples/imgs folder.\n", 58 | "\n", 59 | "wav_file_name = 'obama2.wav' #@param {type:\"string\"}\n", 60 | "image_file_name = 'paint.jpg' #@param {type:\"string\"}\n", 61 | "\n", 62 | "wav_file = \"/content/one-shot-talking-face/samples/audios/\" + wav_file_name\n", 63 | "image_file = \"/content/one-shot-talking-face/samples/imgs/\" + image_file_name \n", 64 | "\n", 65 | "waveform, sample_rate = torchaudio.load(wav_file)\n", 66 | "torchaudio.save(wav_file, waveform, sample_rate, encoding=\"PCM_S\", bits_per_sample=16)\n", 67 | "\n", 68 | "os.environ['wav_file'] = wav_file\n", 69 | "os.environ['image_file'] = image_file\n", 70 | "random_int = str(random.randint(1, 1000000))\n", 71 | "\n", 72 | "!mkdir /content/train\n", 73 | "!cp $wav_file /content/train/audio.wav\n", 74 | "!cp $image_file /content/train/image.png\n", 75 | "\n", 76 | "!pocketsphinx -phone_align yes single /content/train/audio.wav $text | jq '[.w[]|{word: (.t | ascii_upcase | sub(\"\"; \"sil\") | sub(\"\"; \"sil\") | sub(\"\\\\(2\\\\)\"; \"\") | sub(\"\\\\(3\\\\)\"; \"\") | sub(\"\\\\(4\\\\)\"; \"\") | sub(\"\\\\[SPEECH\\\\]\"; \"SIL\") | sub(\"\\\\[NOISE\\\\]\"; \"SIL\")), phones: [.w[]|{ph: .t | sub(\"\\\\+SPN\\\\+\"; \"SIL\") | sub(\"\\\\+NSN\\\\+\"; \"SIL\"), bg: (.b*100)|floor, ed: (.b*100+.d*100)|floor}]}]' > /content/test.json\n", 77 | "%cd /content/one-shot-talking-face\n", 78 | "!python -B test_script.py --img_path /content/train/image.png --audio_path /content/train/audio.wav --phoneme_path /content/test.json --save_dir /content/train\n", 79 | "\n", 80 | "os.environ['ran_num'] = random_int\n", 81 | "!cp /content/train/image_audio.mp4 \"/content/out/${ran_num}.mp4\"\n", 82 | "\n", 83 | "clear_output()\n", 84 | "\n", 85 | "show_video(f\"/content/out/{random_int}.mp4\")" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "source": [ 91 | "#@title #**Download movie** ( for google chrome)\n", 92 | "from google.colab import files\n", 93 | "file_path =f\"/content/out/{random_int}.mp4\"\n", 94 | "files.download(file_path)" 95 | ], 96 | "metadata": { 97 | "id": "PYUvVPPd_dk5" 98 | }, 99 | "execution_count": null, 100 | "outputs": [] 101 | } 102 | ], 103 | "metadata": { 104 | "accelerator": "GPU", 105 | "colab": { 106 | "provenance": [], 107 | "include_colab_link": true 108 | }, 109 | "gpuClass": "standard", 110 | "kernelspec": { 111 | "display_name": "Python 3", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "name": "python" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 0 120 | } --------------------------------------------------------------------------------