├── Big_Sleep.ipynb ├── pymaf_demo.ipynb ├── LSE_OpenPose.ipynb ├── CLIP_search.ipynb ├── StyleCLIP_global.ipynb ├── PIFuHD_Demo.ipynb ├── DALL_E_demo.ipynb ├── ArtLine_make_gif.ipynb ├── CLIP_demo.ipynb ├── DALL_E.ipynb ├── DALL_e_sample.ipynb ├── SwapAE.ipynb ├── VideoPose3D.ipynb ├── DeepDream.ipynb ├── infinite_nature_demo.ipynb └── PIFuHD_movie.ipynb /Big_Sleep.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Big Sleep", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "toc_visible": true, 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "accelerator": "GPU" 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "w5HPIGUSA9jf" 33 | }, 34 | "source": [ 35 | "# セットアップ" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "metadata": { 41 | "id": "9c-dAOUylyYt" 42 | }, 43 | "source": [ 44 | "# 接続GPUのチェック\n", 45 | "! nvidia-smi -L" 46 | ], 47 | "execution_count": null, 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "D2jUsCZXaqcw" 54 | }, 55 | "source": [ 56 | "# big-sleep インストール\n", 57 | "!pip install big-sleep --upgrade" 58 | ], 59 | "execution_count": null, 60 | "outputs": [] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "id": "E232NSiQBi6X" 66 | }, 67 | "source": [ 68 | "# テキストから画像生成" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "metadata": { 74 | "id": "2_JetNvHCP6l" 75 | }, 76 | "source": [ 77 | "TEXT = 'an armchair in the shape of an avocado' " 78 | ], 79 | "execution_count": null, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "metadata": { 85 | "id": "NfJ0RMCAauV3" 86 | }, 87 | "source": [ 88 | "from tqdm.notebook import trange\n", 89 | "from IPython.display import Image, display\n", 90 | "from big_sleep import Imagine\n", 91 | "import random\n", 92 | "\n", 93 | "# パラメータ設定\n", 94 | "SAVE_EVERY = 100 \n", 95 | "SAVE_PROGRESS = False \n", 96 | "LEARNING_RATE = 8e-2 \n", 97 | "ITERATIONS = 1050 \n", 98 | "SEED = random.randint(0, 10000) \n", 99 | "\n", 100 | "# モデルセッティング\n", 101 | "model = Imagine(\n", 102 | " text = TEXT,\n", 103 | " save_every = SAVE_EVERY,\n", 104 | " lr = LEARNING_RATE,\n", 105 | " iterations = ITERATIONS,\n", 106 | " save_progress = SAVE_PROGRESS,\n", 107 | " seed = SEED\n", 108 | ")\n", 109 | "\n", 110 | "# 探索ループ\n", 111 | "for epoch in range(1):\n", 112 | " for i in trange(1000, desc = 'iteration'):\n", 113 | " model.train_step(epoch, i)\n", 114 | "\n", 115 | " if i == 0 or i % model.save_every != 0:\n", 116 | " continue\n", 117 | "\n", 118 | " filename = TEXT.replace(' ', '_')\n", 119 | " image = Image(f'./{filename}.png')\n", 120 | " display(image)" 121 | ], 122 | "execution_count": null, 123 | "outputs": [] 124 | } 125 | ] 126 | } -------------------------------------------------------------------------------- /pymaf_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "colab": { 7 | "name": "pymaf_demo", 8 | "provenance": [], 9 | "collapsed_sections": [], 10 | "toc_visible": true, 11 | "include_colab_link": true 12 | }, 13 | "kernelspec": { 14 | "display_name": "Python 3", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "name": "python" 19 | } 20 | }, 21 | "cells": [ 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "id": "view-in-github", 26 | "colab_type": "text" 27 | }, 28 | "source": [ 29 | "\"Open" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "id": "mryOv98_qlod" 36 | }, 37 | "source": [ 38 | "# セットアップ" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "metadata": { 44 | "id": "TzS8Y_oIWzNt" 45 | }, 46 | "source": [ 47 | "# GPUチェッック\n", 48 | "!nvidia-smi -L" 49 | ], 50 | "execution_count": null, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "metadata": { 56 | "id": "Plo2YZZmaDtY", 57 | "collapsed": true 58 | }, 59 | "source": [ 60 | "# githubからコードを取得\n", 61 | "! git clone https://github.com/HongwenZhang/PyMAF.git\n", 62 | "%cd PyMAF\n", 63 | "\n", 64 | "# 必要なファイルをダウンロード\n", 65 | "! pip install --upgrade gdown\n", 66 | "import gdown\n", 67 | "gdown.download('https://drive.google.com/u/1/uc?id=1XvE73SWbwYMoPTZncmHGwipbsrE4-zyq', 'pymaf.zip', quiet=False)\n", 68 | "! unzip pymaf.zip\n", 69 | "! rm pymaf.zip\n", 70 | "\n", 71 | "# pytorchバージョン変更\n", 72 | "! pip install -U https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl\n", 73 | "! pip install -U https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp37-cp37m-linux_x86_64.whl\n", 74 | "\n", 75 | "# ライブラリーインストール\n", 76 | "! pip install -r requirements.txt\n", 77 | "! pip install imageio==2.4.1\n", 78 | "! pip install pyglet==1.5.27" 79 | ], 80 | "execution_count": null, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "id": "dXa9qrbwqwuA" 87 | }, 88 | "source": [ 89 | "# 3Dポーズ推定" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "metadata": { 95 | "id": "KmHCtKpHqt9V" 96 | }, 97 | "source": [ 98 | "! python3 demo.py --checkpoint=data/pretrained_model/PyMAF_model_checkpoint.pt\\\n", 99 | " --vid_file ./dance.mp4" 100 | ], 101 | "execution_count": null, 102 | "outputs": [] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "metadata": { 107 | "id": "mwlxnPuCxxqD" 108 | }, 109 | "source": [ 110 | "# Play the generated video\n", 111 | "from IPython.display import HTML\n", 112 | "from base64 import b64encode\n", 113 | "\n", 114 | "def video(path):\n", 115 | " mp4 = open(path,'rb').read()\n", 116 | " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", 117 | " return HTML('' % data_url)\n", 118 | "\n", 119 | "video('output/dance/dance_result.mp4')" 120 | ], 121 | "execution_count": null, 122 | "outputs": [] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": { 127 | "id": "2SnSeng37gpg" 128 | }, 129 | "source": [ 130 | "# フレームレート調整" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "metadata": { 136 | "id": "THTbfAFNutBn" 137 | }, 138 | "source": [ 139 | "! ffmpeg -r 30 -i output/dance/dance_mp4_output/%6d.png\\\n", 140 | " -vcodec libx264 -pix_fmt yuv420p out_dance.mp4" 141 | ], 142 | "execution_count": null, 143 | "outputs": [] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "metadata": { 148 | "id": "5VQv5qu2vsAc" 149 | }, 150 | "source": [ 151 | "# Play the generated video\n", 152 | "from IPython.display import HTML\n", 153 | "from base64 import b64encode\n", 154 | "\n", 155 | "def video(path):\n", 156 | " mp4 = open(path,'rb').read()\n", 157 | " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", 158 | " return HTML('' % data_url)\n", 159 | "\n", 160 | "video('out_dance.mp4')" 161 | ], 162 | "execution_count": null, 163 | "outputs": [] 164 | } 165 | ] 166 | } 167 | -------------------------------------------------------------------------------- /LSE_OpenPose.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "colab": { 7 | "name": "LSE OpenPose", 8 | "provenance": [], 9 | "collapsed_sections": [], 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.7.9" 28 | } 29 | }, 30 | "cells": [ 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "view-in-github", 35 | "colab_type": "text" 36 | }, 37 | "source": [ 38 | "\"Open" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "id": "X38L6tanrnrB" 45 | }, 46 | "source": [ 47 | "## セットアップ" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "FOdkDhb6ga6N" 54 | }, 55 | "source": [ 56 | "import os\n", 57 | "from os.path import exists, join, basename, splitext\n", 58 | "\n", 59 | "git_repo_url = 'https://github.com/CMU-Perceptual-Computing-Lab/openpose.git'\n", 60 | "project_name = splitext(basename(git_repo_url))[0]\n", 61 | "if not exists(project_name):\n", 62 | " # see: https://github.com/CMU-Perceptual-Computing-Lab/openpose/issues/949\n", 63 | " # install new CMake becaue of CUDA10\n", 64 | " !wget -q https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.tar.gz\n", 65 | " !tar xfz cmake-3.13.0-Linux-x86_64.tar.gz --strip-components=1 -C /usr/local\n", 66 | " # clone openpose\n", 67 | " !git clone -q --depth 1 $git_repo_url\n", 68 | " !sed -i 's/execute_process(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\\/3rdparty\\/caffe)/execute_process(COMMAND git checkout f019d0dfe86f49d1140961f8c7dec22130c83154 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\\/3rdparty\\/caffe)/g' openpose/CMakeLists.txt\n", 69 | " # install system dependencies\n", 70 | " !apt-get -qq install -y libatlas-base-dev libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev protobuf-compiler libgflags-dev libgoogle-glog-dev liblmdb-dev opencl-headers ocl-icd-opencl-dev libviennacl-dev\n", 71 | " # install python dependencies\n", 72 | " !pip install -q youtube-dl\n", 73 | " # build openpose\n", 74 | " !cd openpose && rm -rf build || true && mkdir build && cd build && cmake .. && make -j`nproc`" 75 | ], 76 | "execution_count": null, 77 | "outputs": [] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": { 82 | "id": "n5L3Z5YVrZ2R" 83 | }, 84 | "source": [ 85 | "## ダウンロードする Youtube ビデオの確認" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "metadata": { 91 | "id": "xIt-eyIDO6XG" 92 | }, 93 | "source": [ 94 | "from IPython.display import YouTubeVideo\n", 95 | "YOUTUBE_ID ='Sa3k_7ZtoCA'\n", 96 | "YouTubeVideo(YOUTUBE_ID)" 97 | ], 98 | "execution_count": null, 99 | "outputs": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "id": "5qq6g97DwMON" 105 | }, 106 | "source": [ 107 | "## Youtube ビデオのダウンロードと編集\n", 108 | "・ビデオをダウンロードし20秒に編集して、content/video.mp4に保存します。" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "metadata": { 114 | "id": "oNASdyyiO65I" 115 | }, 116 | "source": [ 117 | "# download the youtube with the given ID\n", 118 | "!youtube-dl -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID\n", 119 | "# cut the seconds 20 from 03:13\n", 120 | "!ffmpeg -y -loglevel info -i youtube.mp4 -ss 00:03:13.0 -t 20 video.mp4" 121 | ], 122 | "execution_count": null, 123 | "outputs": [] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": { 128 | "id": "pPiFxw36wjsL" 129 | }, 130 | "source": [ 131 | "## 動画からポーズ推定\n", 132 | "・content/video.mp4からポーズ推定した動画を作成\\\n", 133 | "・自分の用意した動画を使う場合は、content/video.mp4を置き換えて下さい。\n" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "metadata": { 139 | "id": "knxPE2w9wXX6" 140 | }, 141 | "source": [ 142 | "# detect poses\n", 143 | "!cd openpose && ./build/examples/openpose/openpose.bin --video ../video.mp4 --write_json ./output/ --display 0 --write_video ../openpose.avi --face --hand\n", 144 | "# convert the result into MP4\n", 145 | "!ffmpeg -y -loglevel info -i openpose.avi output.mp4" 146 | ], 147 | "execution_count": null, 148 | "outputs": [] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": { 153 | "id": "kDDkgCCSrFTv" 154 | }, 155 | "source": [ 156 | "・動画の再生" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "metadata": { 162 | "id": "nZ3Ud9zLgOoQ" 163 | }, 164 | "source": [ 165 | "def show_local_mp4_video(file_name, width=640, height=480):\n", 166 | " import io\n", 167 | " import base64\n", 168 | " from IPython.display import HTML\n", 169 | " video_encoded = base64.b64encode(io.open(file_name, 'rb').read())\n", 170 | " return HTML(data=''''''.format(width, height, video_encoded.decode('ascii')))\n", 173 | "\n", 174 | "show_local_mp4_video('output.mp4', width=960, height=720)" 175 | ], 176 | "execution_count": null, 177 | "outputs": [] 178 | } 179 | ] 180 | } -------------------------------------------------------------------------------- /CLIP_search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "colab": { 7 | "name": "CLIP_search", 8 | "provenance": [], 9 | "collapsed_sections": [], 10 | "toc_visible": true, 11 | "include_colab_link": true 12 | }, 13 | "kernelspec": { 14 | "display_name": "Python 3", 15 | "language": "python", 16 | "name": "python3" 17 | }, 18 | "language_info": { 19 | "codemirror_mode": { 20 | "name": "ipython", 21 | "version": 3 22 | }, 23 | "file_extension": ".py", 24 | "mimetype": "text/x-python", 25 | "name": "python", 26 | "nbconvert_exporter": "python", 27 | "pygments_lexer": "ipython3", 28 | "version": "3.7.9" 29 | } 30 | }, 31 | "cells": [ 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "id": "view-in-github", 36 | "colab_type": "text" 37 | }, 38 | "source": [ 39 | "\"Open" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": { 45 | "id": "EXnkFjoZy9kd" 46 | }, 47 | "source": [ 48 | "# セットアップ" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "metadata": { 54 | "id": "itUEF1Ltb5r3" 55 | }, 56 | "source": [ 57 | "# Pytorchバージョン変更\n", 58 | "! pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html \n", 59 | "\n", 60 | "# CLIP関連コードのコピー\n", 61 | "! git clone https://github.com/openai/CLIP.git\n", 62 | "%cd /content/CLIP/\n", 63 | "\n", 64 | "# CLIPのモデル化\n", 65 | "! pip install ftfy regex\n", 66 | "import clip\n", 67 | "model, preprocess = clip.load('ViT-B/32', jit=True) \n", 68 | "model = model.eval() \n", 69 | "\n", 70 | "# サンプル画像ダウンロード\n", 71 | "! pip install --upgrade gdown\n", 72 | "import gdown\n", 73 | "gdown.download('https://drive.google.com/uc?id=1xIYYYzw9aZhjhyjMM12nz4XjnWUzpp6v', 'img.zip', quiet=False)\n", 74 | "! unzip img.zip" 75 | ], 76 | "execution_count": null, 77 | "outputs": [] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": { 82 | "id": "6H6tX85TKA0n" 83 | }, 84 | "source": [ 85 | "# 検索する画像の読み込み\n" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "metadata": { 91 | "id": "d6cpiIFHp9N6" 92 | }, 93 | "source": [ 94 | "# --- 画像の前処理 ----\n", 95 | "import torch\n", 96 | "import numpy as np\n", 97 | "from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize\n", 98 | "from PIL import Image\n", 99 | "import glob\n", 100 | "from tqdm import tqdm\n", 101 | "\n", 102 | "# 前処理設定\n", 103 | "preprocess = Compose([\n", 104 | " Resize(224, interpolation=Image.BICUBIC),\n", 105 | " CenterCrop(224),\n", 106 | " ToTensor()\n", 107 | "])\n", 108 | "image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()\n", 109 | "image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()\n", 110 | "\n", 111 | "\n", 112 | "# 画像の読み込み\n", 113 | "images =[]\n", 114 | "files = glob.glob('./img/*.png')\n", 115 | "files.sort()\n", 116 | "\n", 117 | "for i, file in enumerate(tqdm(files)):\n", 118 | " image = preprocess(Image.open(file).convert(\"RGB\"))\n", 119 | " images.append(image)\n", 120 | "\n", 121 | "image_input = torch.tensor(np.stack(images)).cuda()\n", 122 | "image_input -= image_mean[:, None, None]\n", 123 | "image_input /= image_std[:, None, None]\n", 124 | "\n", 125 | "print('image_input.shape = ', image_input.shape)" 126 | ], 127 | "execution_count": null, 128 | "outputs": [] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": { 133 | "id": "L_uKiB2nKQJX" 134 | }, 135 | "source": [ 136 | "# 検索テキストの入力\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "metadata": { 142 | "id": "C4S__zCGy2MT" 143 | }, 144 | "source": [ 145 | "text = 'She is a charming woman with blonde hair and blue eyes'\n", 146 | "text_input = clip.tokenize(text)\n", 147 | "text_input = text_input.cuda()\n", 148 | "\n", 149 | "print('text_input = ', text_input)\n", 150 | "print('text_input.shape = ', text_input.shape)" 151 | ], 152 | "execution_count": null, 153 | "outputs": [] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": { 158 | "id": "2u4oePsAKdJO" 159 | }, 160 | "source": [ 161 | "# 画像とテキストのcos類似度の計算\n", 162 | "\n" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "metadata": { 168 | "id": "iqjF6NbBCT0a" 169 | }, 170 | "source": [ 171 | "# --- 画像とテキストのCOS類似度の計算 ----\n", 172 | "\n", 173 | "# 特徴ベクトルを抽出\n", 174 | "with torch.no_grad():\n", 175 | " image_features = model.encode_image(image_input).float()\n", 176 | " text_features = model.encode_text(text_input).float()\n", 177 | " text_features /= text_features.norm(dim=-1, keepdim=True) \n", 178 | "\n", 179 | "# COS類似度を計算\n", 180 | "text_probs = torch.cosine_similarity(image_features, text_features)\n", 181 | "\n", 182 | "print('image_features.shape = ', image_features.shape)\n", 183 | "print('text_features.shape = ', text_features.shape)\n", 184 | "print('text_probs.shape = ', text_probs.shape)" 185 | ], 186 | "execution_count": null, 187 | "outputs": [] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": { 192 | "id": "OIM5PWmSKlVm" 193 | }, 194 | "source": [ 195 | "# 検索結果の表示" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "metadata": { 201 | "id": "LxPbA0_ScWUk" 202 | }, 203 | "source": [ 204 | "# --- 検索結果の表示 ---\n", 205 | "\n", 206 | "import matplotlib.pyplot as plt\n", 207 | "\n", 208 | "# 検索テキスト表示\n", 209 | "print('text = ', text)\n", 210 | "print()\n", 211 | "\n", 212 | "# COS類似度の高い順にインデックスをソート\n", 213 | "x = np.argsort(-text_probs.cpu(), axis=0)\n", 214 | "\n", 215 | "# COS類似度TOP3を表示\n", 216 | "fig = plt.figure(figsize=(30, 40))\n", 217 | "for i in range(3):\n", 218 | " name = str(x[i].item()).zfill(6)+'.png'\n", 219 | " img = Image.open('./img/'+name) \n", 220 | " images = np.asarray(img)\n", 221 | " ax = fig.add_subplot(10, 10, i+1, xticks=[], yticks=[])\n", 222 | " image_plt = np.array(images)\n", 223 | " ax.imshow(image_plt)\n", 224 | " cos_value = round(text_probs[x[i].item()].item(), 3)\n", 225 | " ax.set_xlabel(cos_value, fontsize=12) \n", 226 | "plt.show()\n", 227 | "plt.close() " 228 | ], 229 | "execution_count": null, 230 | "outputs": [] 231 | } 232 | ] 233 | } 234 | -------------------------------------------------------------------------------- /StyleCLIP_global.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "StyleCLIP_global", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | }, 18 | "accelerator": "GPU" 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "cFh0PtHAw5ax" 35 | }, 36 | "source": [ 37 | "# セットアップ" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "metadata": { 43 | "id": "5hlml6ebZ9xa" 44 | }, 45 | "source": [ 46 | "%tensorflow_version 1.x\n", 47 | "! pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html\n", 48 | "! pip install ftfy regex tqdm\n", 49 | "!pip install git+https://github.com/openai/CLIP.git\n", 50 | "! git clone https://github.com/orpatashnik/StyleCLIP" 51 | ], 52 | "execution_count": null, 53 | "outputs": [] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "metadata": { 58 | "id": "WaZbI-6maJin" 59 | }, 60 | "source": [ 61 | "# input dataset name \n", 62 | "dataset_name='ffhq' # input dataset name, currently, only support ffhq\n", 63 | "\n", 64 | "% cd StyleCLIP/global/\n", 65 | "\n", 66 | "# input prepare data \n", 67 | "!python GetCode.py --dataset_name $dataset_name --code_type 'w'\n", 68 | "!python GetCode.py --dataset_name $dataset_name --code_type 's'\n", 69 | "!python GetCode.py --dataset_name $dataset_name --code_type 's_mean_std'\n", 70 | "\n", 71 | "import tensorflow as tf\n", 72 | "import numpy as np \n", 73 | "import torch\n", 74 | "import clip\n", 75 | "from PIL import Image\n", 76 | "import pickle\n", 77 | "import copy\n", 78 | "import matplotlib.pyplot as plt\n", 79 | "from MapTS import GetFs,GetBoundary,GetDt\n", 80 | "from manipulate import Manipulator\n", 81 | "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 82 | "model, preprocess = clip.load(\"ViT-B/32\", device=device)\n", 83 | "\n", 84 | "M=Manipulator(dataset_name='ffhq')\n", 85 | "fs3=np.load('./npy/ffhq/fs3.npy')\n", 86 | "np.set_printoptions(suppress=True)" 87 | ], 88 | "execution_count": null, 89 | "outputs": [] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "id": "NQNEpDYfpup0" 95 | }, 96 | "source": [ 97 | "# GUIによる画像編集" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "u6Y31y0hpXbF" 104 | }, 105 | "source": [ 106 | "# 画像選択\n", 107 | "img_indexs=[1259]\n", 108 | "dlatent_tmp=[tmp[img_indexs] for tmp in M.dlatents]\n", 109 | "M.num_images=len(img_indexs)" 110 | ], 111 | "execution_count": null, 112 | "outputs": [] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "metadata": { 117 | "id": "mlekymCvpXlo" 118 | }, 119 | "source": [ 120 | "#テキスト入力\n", 121 | "neutral='face'\n", 122 | "target='smile face'\n", 123 | "classnames=[target,neutral]\n", 124 | "dt=GetDt(classnames,model)" 125 | ], 126 | "execution_count": null, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "metadata": { 132 | "id": "hjAf0M3ttM7x" 133 | }, 134 | "source": [ 135 | "#@markdown ###元画像表示\n", 136 | "beta = 0.1 \n", 137 | "alpha = 0 \n", 138 | "M.alpha=[alpha]\n", 139 | "boundary_tmp2,c=GetBoundary(fs3,dt,M,threshold=beta)\n", 140 | "codes=M.MSCode(dlatent_tmp,boundary_tmp2)\n", 141 | "out=M.GenerateImg(codes)\n", 142 | "Image.fromarray(out[0,0])" 143 | ], 144 | "execution_count": null, 145 | "outputs": [] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "metadata": { 150 | "id": "oXgtSUvVpX64" 151 | }, 152 | "source": [ 153 | "#@markdown ###編集画像表示\n", 154 | "beta = 0.1 #@param {type:\"slider\", min:0.08, max:0.3, step:0.01}\n", 155 | "alpha = 2 #@param {type:\"slider\", min:-10, max:10, step:0.1}\n", 156 | "M.alpha=[alpha]\n", 157 | "boundary_tmp2,c=GetBoundary(fs3,dt,M,threshold=beta)\n", 158 | "codes=M.MSCode(dlatent_tmp,boundary_tmp2)\n", 159 | "out=M.GenerateImg(codes)\n", 160 | "Image.fromarray(out[0,0])" 161 | ], 162 | "execution_count": null, 163 | "outputs": [] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": { 168 | "id": "VKF-2zrFqILy" 169 | }, 170 | "source": [ 171 | "# 画像編集ビデオ" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "metadata": { 177 | "id": "-fhnU7Lnq2Tj" 178 | }, 179 | "source": [ 180 | "# 画像の選択\n", 181 | "img_indexs=[1276]\n", 182 | "dlatent_tmp=[tmp[img_indexs] for tmp in M.dlatents]\n", 183 | "M.num_images=len(img_indexs)" 184 | ], 185 | "execution_count": null, 186 | "outputs": [] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "metadata": { 191 | "id": "DxUlRCeOqn-J" 192 | }, 193 | "source": [ 194 | "# テキスト入力\n", 195 | "neutral='face with hair'\n", 196 | "target='Curly Hair'\n", 197 | "classnames=[target,neutral]\n", 198 | "dt=GetDt(classnames,model)" 199 | ], 200 | "execution_count": null, 201 | "outputs": [] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "metadata": { 206 | "id": "07NGF7LFqoHD" 207 | }, 208 | "source": [ 209 | "# 段階的な編集画像の保存\n", 210 | "import os\n", 211 | "import shutil\n", 212 | "if os.path.isdir('pic'):\n", 213 | " shutil.rmtree('pic')\n", 214 | "os.makedirs('pic', exist_ok=True)\n", 215 | "cnt = 0\n", 216 | "for i in range(0,20,1):\n", 217 | " beta = 0.1 \n", 218 | " alpha = i/10 \n", 219 | " M.alpha=[alpha]\n", 220 | " boundary_tmp2,c=GetBoundary(fs3,dt,M,threshold=beta)\n", 221 | " codes=M.MSCode(dlatent_tmp,boundary_tmp2)\n", 222 | " out=M.GenerateImg(codes)\n", 223 | " pic = Image.fromarray(out[0,0])\n", 224 | " pic.save('./pic/'+str(cnt).zfill(6)+'.png') \n", 225 | " cnt +=1\n", 226 | "\n", 227 | "for i in range(20,0,-1):\n", 228 | " beta = 0.1 \n", 229 | " alpha = i/10 \n", 230 | " M.alpha=[alpha]\n", 231 | " boundary_tmp2,c=GetBoundary(fs3,dt,M,threshold=beta)\n", 232 | " codes=M.MSCode(dlatent_tmp,boundary_tmp2)\n", 233 | " out=M.GenerateImg(codes)\n", 234 | " pic = Image.fromarray(out[0,0])\n", 235 | " pic.save('./pic/'+str(cnt).zfill(6)+'.png') \n", 236 | " cnt +=1 " 237 | ], 238 | "execution_count": null, 239 | "outputs": [] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "metadata": { 244 | "id": "CKYEVW4PJXWk" 245 | }, 246 | "source": [ 247 | "# 段階的な編集画像を動画に変換\n", 248 | "! ffmpeg -r 10 -i pic/%6d.png\\\n", 249 | " -vcodec libx264 -pix_fmt yuv420p output.mp4" 250 | ], 251 | "execution_count": null, 252 | "outputs": [] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "metadata": { 257 | "id": "9caPRkYaFCiJ" 258 | }, 259 | "source": [ 260 | "# Play the generated video\n", 261 | "from IPython.display import HTML\n", 262 | "from base64 import b64encode\n", 263 | "\n", 264 | "def video(path):\n", 265 | " mp4 = open(path,'rb').read()\n", 266 | " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", 267 | " return HTML('' % data_url)\n", 268 | "\n", 269 | "video('output.mp4')" 270 | ], 271 | "execution_count": null, 272 | "outputs": [] 273 | } 274 | ] 275 | } -------------------------------------------------------------------------------- /PIFuHD_Demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "WYhlsDkg1Hwb" 17 | }, 18 | "source": [ 19 | "## セットアップ" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "id": "8B1jmr82DtjG" 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "# ライブラリー取得\n", 31 | "!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n", 32 | "!pip install pytorch3d\n", 33 | "\n", 34 | "# githubからpifuhdのコードをコピー\n", 35 | "!git clone https://github.com/facebookresearch/pifuhd\n", 36 | "\n", 37 | "# githubからpose-estimationのコードをコピーし、学習済み重みをダウンロード\n", 38 | "!git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git\n", 39 | "%cd /content/lightweight-human-pose-estimation.pytorch/\n", 40 | "!wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth\n", 41 | "\n", 42 | "# pifuhdの学習済み重みをダウンロード\n", 43 | "%cd /content/pifuhd/\n", 44 | "!sh ./scripts/download_trained_model.sh" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "id": "QvQm-A8ESKb2" 51 | }, 52 | "source": [ 53 | "## PIFuHDの実行" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "id": "jaV_7Yi8fM-B" 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "# 自分の画像のアップロード(このブロックを実行しなければテスト画像を使用します)\n", 65 | "# Google Chrome 推奨(Safariではエラーが出ます)\n", 66 | "%cd /content/pifuhd/sample_images\n", 67 | "from google.colab import files\n", 68 | "filename = list(files.upload().keys())[0]" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "id": "AEzmmB01SOZp" 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "# セッティング\n", 80 | "import os\n", 81 | "\n", 82 | "try:\n", 83 | " image_path = '/content/pifuhd/sample_images/%s' % filename\n", 84 | "except:\n", 85 | " image_path = '/content/pifuhd/sample_images/test.png' # example image\n", 86 | "image_dir = os.path.dirname(image_path)\n", 87 | "file_name = os.path.splitext(os.path.basename(image_path))[0]\n", 88 | "\n", 89 | "# output pathes\n", 90 | "obj_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.obj' % file_name\n", 91 | "out_img_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.png' % file_name\n", 92 | "video_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.mp4' % file_name\n", 93 | "video_display_path = '/content/pifuhd/results/pifuhd_final/result_%s_256_display.mp4' % file_name" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "id": "PdRcDXe38lHB" 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "# クロッピング\n", 105 | "%cd /content/lightweight-human-pose-estimation.pytorch/\n", 106 | "import torch\n", 107 | "import cv2\n", 108 | "import numpy as np\n", 109 | "from models.with_mobilenet import PoseEstimationWithMobileNet\n", 110 | "from modules.keypoints import extract_keypoints, group_keypoints\n", 111 | "from modules.load_state import load_state\n", 112 | "from modules.pose import Pose, track_poses\n", 113 | "import demo\n", 114 | "\n", 115 | "def get_rect(net, images, height_size):\n", 116 | " net = net.eval()\n", 117 | "\n", 118 | " stride = 8\n", 119 | " upsample_ratio = 4\n", 120 | " num_keypoints = Pose.num_kpts\n", 121 | " previous_poses = []\n", 122 | " delay = 33\n", 123 | " for image in images:\n", 124 | " rect_path = image.replace('.%s' % (image.split('.')[-1]), '_rect.txt')\n", 125 | " img = cv2.imread(image, cv2.IMREAD_COLOR)\n", 126 | " orig_img = img.copy()\n", 127 | " orig_img = img.copy()\n", 128 | " heatmaps, pafs, scale, pad = demo.infer_fast(net, img, height_size, stride, upsample_ratio, cpu=False)\n", 129 | "\n", 130 | " total_keypoints_num = 0\n", 131 | " all_keypoints_by_type = []\n", 132 | " for kpt_idx in range(num_keypoints): # 19th for bg\n", 133 | " total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)\n", 134 | "\n", 135 | " pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs)\n", 136 | " for kpt_id in range(all_keypoints.shape[0]):\n", 137 | " all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale\n", 138 | " all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale\n", 139 | " current_poses = []\n", 140 | "\n", 141 | " rects = []\n", 142 | " for n in range(len(pose_entries)):\n", 143 | " if len(pose_entries[n]) == 0:\n", 144 | " continue\n", 145 | " pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1\n", 146 | " valid_keypoints = []\n", 147 | " for kpt_id in range(num_keypoints):\n", 148 | " if pose_entries[n][kpt_id] != -1.0: # keypoint was found\n", 149 | " pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])\n", 150 | " pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])\n", 151 | " valid_keypoints.append([pose_keypoints[kpt_id, 0], pose_keypoints[kpt_id, 1]])\n", 152 | " valid_keypoints = np.array(valid_keypoints)\n", 153 | " \n", 154 | " if pose_entries[n][10] != -1.0 or pose_entries[n][13] != -1.0:\n", 155 | " pmin = valid_keypoints.min(0)\n", 156 | " pmax = valid_keypoints.max(0)\n", 157 | "\n", 158 | " center = (0.5 * (pmax[:2] + pmin[:2])).astype(np.int)\n", 159 | " radius = int(0.65 * max(pmax[0]-pmin[0], pmax[1]-pmin[1]))\n", 160 | " elif pose_entries[n][10] == -1.0 and pose_entries[n][13] == -1.0 and pose_entries[n][8] != -1.0 and pose_entries[n][11] != -1.0:\n", 161 | " # if leg is missing, use pelvis to get cropping\n", 162 | " center = (0.5 * (pose_keypoints[8] + pose_keypoints[11])).astype(np.int)\n", 163 | " radius = int(1.45*np.sqrt(((center[None,:] - valid_keypoints)**2).sum(1)).max(0))\n", 164 | " center[1] += int(0.05*radius)\n", 165 | " else:\n", 166 | " center = np.array([img.shape[1]//2,img.shape[0]//2])\n", 167 | " radius = max(img.shape[1]//2,img.shape[0]//2)\n", 168 | "\n", 169 | " x1 = center[0] - radius\n", 170 | " y1 = center[1] - radius\n", 171 | "\n", 172 | " rects.append([x1, y1, 2*radius, 2*radius])\n", 173 | "\n", 174 | " np.savetxt(rect_path, np.array(rects), fmt='%d')\n", 175 | "\n", 176 | "net = PoseEstimationWithMobileNet()\n", 177 | "checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu')\n", 178 | "load_state(net, checkpoint)\n", 179 | "\n", 180 | "get_rect(net.cuda(), [image_path], 512)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "id": "5995t2PnQTmG" 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "# レンダリング\n", 192 | "%cd /content/pifuhd/\n", 193 | "!python -m apps.simple_test -r 256 --use_rect -i $image_dir" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": { 200 | "id": "afwL_-ROCmDf" 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "# mp4の作成\n", 205 | "from lib.colab_util import generate_video_from_obj, set_renderer, video\n", 206 | "\n", 207 | "renderer = set_renderer()\n", 208 | "generate_video_from_obj(obj_path, out_img_path, video_path, renderer)\n", 209 | "\n", 210 | "# we cannot play a mp4 video generated by cv2\n", 211 | "!ffmpeg -i $video_path -vcodec libx264 $video_display_path -y -loglevel quiet\n", 212 | "video(video_display_path)" 213 | ] 214 | } 215 | ], 216 | "metadata": { 217 | "accelerator": "GPU", 218 | "colab": { 219 | "collapsed_sections": [], 220 | "include_colab_link": true, 221 | "name": "PIFuHD Demo", 222 | "provenance": [], 223 | "toc_visible": true 224 | }, 225 | "kernelspec": { 226 | "display_name": "Python 3", 227 | "language": "python", 228 | "name": "python3" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.7.9" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 1 245 | } 246 | -------------------------------------------------------------------------------- /DALL_E_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "colab": { 7 | "name": "DALL_E_demo", 8 | "provenance": [], 9 | "collapsed_sections": [], 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "name": "python3" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "2nD1n0xEBcko" 32 | }, 33 | "source": [ 34 | "# セットアップ" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "metadata": { 40 | "id": "F4iTie2EKrbb" 41 | }, 42 | "source": [ 43 | "# 1.Pytorchバージョン変更\n", 44 | "! pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html \n", 45 | "\n", 46 | "# 2.Pytorch画像処理ライブラリー・インストール\n", 47 | "! pip install kornia==0.5.0\n", 48 | "\n", 49 | "# 3.CLIP関連コードのコピー\n", 50 | "! git clone https://github.com/openai/CLIP.git\n", 51 | "%cd /content/CLIP/\n", 52 | "\n", 53 | "# 4.CLIPのモデル化\n", 54 | "! pip install ftfy regex\n", 55 | "import clip\n", 56 | "model, preprocess = clip.load('ViT-B/32', jit=True) \n", 57 | "model = model.eval() \n", 58 | "\n", 59 | "# 5.DALL-Eのモデル化\n", 60 | "! pip install DALL-E\n", 61 | "from dall_e import map_pixels, unmap_pixels, load_model\n", 62 | "dec = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", 'cuda') \n" 63 | ], 64 | "execution_count": null, 65 | "outputs": [] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "id": "KAcixx9Z3XYH" 71 | }, 72 | "source": [ 73 | "# ライブラリー・インポート&関数定義\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "metadata": { 79 | "id": "piJOg9MY7khd" 80 | }, 81 | "source": [ 82 | "import torch\n", 83 | "import numpy as np\n", 84 | "import torchvision\n", 85 | "import torchvision.transforms.functional as TF\n", 86 | "import torchvision.transforms as T\n", 87 | "import kornia\n", 88 | "import PIL\n", 89 | "import os, io, sys\n", 90 | "import random\n", 91 | "import imageio\n", 92 | "from IPython import display\n", 93 | "from IPython.core.interactiveshell import InteractiveShell\n", 94 | "InteractiveShell.ast_node_interactivity = \"all\"\n", 95 | "from google.colab import output\n", 96 | "import requests\n", 97 | "\n", 98 | "# 初期設定\n", 99 | "im_shape = [512, 512, 3]\n", 100 | "sideX, sideY, channels = im_shape\n", 101 | "target_image_size = sideX\n", 102 | "tau_value = 2.\n", 103 | "\n", 104 | "# 画像表示・保存\n", 105 | "def displ(img):\n", 106 | " img = np.array(img)[:,:,:]\n", 107 | " img = np.transpose(img, (1, 2, 0))\n", 108 | " imageio.imwrite('output.png', np.array(img))\n", 109 | " return display.Image('output.png')\n", 110 | "\n", 111 | "# 画像のランダム切り出し\n", 112 | "def augment(out, cutn=16):\n", 113 | " p_s = []\n", 114 | " for ch in range(cutn):\n", 115 | " sizey = int(torch.zeros(1,).uniform_(.5, .99)*sideY)\n", 116 | " sizex = int(torch.zeros(1,).uniform_(.5, .99)*sideX)\n", 117 | " offsetx = torch.randint(0, sideX - sizex, ())\n", 118 | " offsety = torch.randint(0, sideY - sizey, ())\n", 119 | " apper = out[:, :, offsetx:offsetx + sizex, offsety:offsety + sizey]\n", 120 | " apper = apper + .1*torch.rand(1,1,1,1).cuda()*torch.randn_like(apper, requires_grad=True)\n", 121 | " apper = torch.nn.functional.interpolate(apper, (224,224), mode='bilinear')\n", 122 | " p_s.append(apper)\n", 123 | " into = augs(torch.cat(p_s, 0))\n", 124 | " return into\n", 125 | "\n", 126 | "# 正規化と回転設定\n", 127 | "nom = torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))\n", 128 | "augs = kornia.augmentation.RandomRotation(30).cuda()\n" 129 | ], 130 | "execution_count": null, 131 | "outputs": [] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": { 136 | "id": "XaocGDQXz3Zx" 137 | }, 138 | "source": [ 139 | "# テキストから特徴ベクトルを抽出" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "metadata": { 145 | "id": "PGBTOiJqWgZ3" 146 | }, 147 | "source": [ 148 | "# テキスト入力\n", 149 | "text_input = 'a beautiful and mysterious house designed by Escher'\n", 150 | "\n", 151 | "# テキストを特徴ベクトルに変換\n", 152 | "token = clip.tokenize(text_input) \n", 153 | "text_v = model.encode_text(token.cuda()).detach().clone() " 154 | ], 155 | "execution_count": null, 156 | "outputs": [] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "metadata": { 161 | "id": "TSqoQrpGCUp0" 162 | }, 163 | "source": [ 164 | "# 【チェック】token, text_vのシェイプ\n", 165 | "print('token.shape = ', token.shape)\n", 166 | "print('token = ', token)\n", 167 | "print('text_v.shape = ', text_v.shape)" 168 | ], 169 | "execution_count": null, 170 | "outputs": [] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": { 175 | "id": "oiAD3aRNMC4l" 176 | }, 177 | "source": [ 178 | "# パラメータ・最適化手法の設定" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "metadata": { 184 | "id": "GdCh2D8Dt8Xd" 185 | }, 186 | "source": [ 187 | "# パラメータの設定\n", 188 | "class Pars(torch.nn.Module):\n", 189 | " def __init__(self):\n", 190 | " super(Pars, self).__init__()\n", 191 | " hots = torch.nn.functional.one_hot((torch.arange(0, 8192).to(torch.int64)), num_classes=8192)\n", 192 | " rng = torch.zeros(1, 64*64, 8192).uniform_()\n", 193 | " for i in range(64*64):\n", 194 | " rng[0,i] = hots[[np.random.randint(8191)]]\n", 195 | " rng = rng.permute(0, 2, 1)\n", 196 | " self.normu = torch.nn.Parameter(rng.cuda().view(1, 8192, 64*64))\n", 197 | " \n", 198 | " def forward(self): \n", 199 | " normu = torch.nn.functional.gumbel_softmax(self.normu.reshape(1,64*64,8192), dim=1, tau=tau_value).view(1, 8192, 64, 64)\n", 200 | " return normu \n", 201 | "\n", 202 | "# 最適化手法の設定\n", 203 | "latent = Pars().cuda() \n", 204 | "param = [latent.normu] \n", 205 | "optimizer = torch.optim.Adam([{'params': param, 'lr': .01}]) \n" 206 | ], 207 | "execution_count": null, 208 | "outputs": [] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "metadata": { 213 | "id": "3ZyFGu6IC5Rx" 214 | }, 215 | "source": [ 216 | "# 【チェック】パラメータから画像生成\n", 217 | "with torch.no_grad():\n", 218 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3].float()))\n", 219 | " displ(out.cpu()[0])\n", 220 | "\n", 221 | " print('latent().shape = ', latent().shape)\n", 222 | " print('dec(latent()).shape = ', dec(latent()).shape)\n", 223 | " print('out.shape = ', out.shape) " 224 | ], 225 | "execution_count": null, 226 | "outputs": [] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": { 231 | "id": "WztSrRF23Rqg" 232 | }, 233 | "source": [ 234 | "# 学習" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "metadata": { 240 | "id": "NwYNUzzovPEW" 241 | }, 242 | "source": [ 243 | "# 学習ループ\n", 244 | "for iteration in range(1001):\n", 245 | "\n", 246 | " # --- 順伝播 ---\n", 247 | " # パラメータから画像を生成\n", 248 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3].float()))\n", 249 | " # 画像をランダム切り出し・回転 \n", 250 | " into = augment(out)\n", 251 | " # 画像を正規化\n", 252 | " into = nom((into))\n", 253 | " # 画像から特徴ベクトルを取得\n", 254 | " image_v = model.encode_image(into)\n", 255 | " # テキストと画像の特徴ベクトルのCOS類似度を計算 \n", 256 | " loss = -torch.cosine_similarity(text_v, image_v).mean() \n", 257 | "\n", 258 | " # 逆伝播\n", 259 | " optimizer.zero_grad()\n", 260 | " loss.backward()\n", 261 | " optimizer.step() \n", 262 | "\n", 263 | " # 学習率の調整\n", 264 | " for g in optimizer.param_groups:\n", 265 | " g['lr'] = g['lr']*1.005\n", 266 | " g['lr'] = min(g['lr'], .12)\n", 267 | "\n", 268 | " # ログ表示 \n", 269 | " if iteration % 50 == 0:\n", 270 | " with torch.no_grad():\n", 271 | "\n", 272 | " # 生成画像の表示・保存\n", 273 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3]).float()) ###\n", 274 | " displ(out.cpu()[0]) ###\n", 275 | "\n", 276 | " # データ表示\n", 277 | " print('iter = ',iteration)\n", 278 | " for g in optimizer.param_groups:\n", 279 | " print('lr = ', g['lr'])\n", 280 | " print('tau_value = ', tau_value)\n", 281 | " print('loss = ',loss.item())\n", 282 | " print('\\n')\n" 283 | ], 284 | "execution_count": null, 285 | "outputs": [] 286 | } 287 | ] 288 | } 289 | -------------------------------------------------------------------------------- /ArtLine_make_gif.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "ArtLine_make_gif", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "eOhPqC6fysD4" 32 | }, 33 | "source": [ 34 | "# **ArtLine_make_gif**\n", 35 | "**Create** **Amazing** **Line** **Art**." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "metadata": { 41 | "id": "xzHW4dq4ys7_" 42 | }, 43 | "source": [ 44 | "# copy github\n", 45 | "!git clone https://github.com/vijishmadhavan/ArtLine.git ArtLine\n", 46 | "%cd ArtLine/\n", 47 | "\n", 48 | "# get libralies\n", 49 | "!pip install -r colab_requirements.txt\n", 50 | "!pip install -q youtube-dl" 51 | ], 52 | "execution_count": null, 53 | "outputs": [] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": { 58 | "id": "2cjGDScH86iU" 59 | }, 60 | "source": [ 61 | "# **Runtime**\n", 62 | "\n", 63 | "* Hardware Accelerator = GPU \n", 64 | "You have to click twice\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "metadata": { 70 | "id": "qnC6OObV3sNk" 71 | }, 72 | "source": [ 73 | "import fastai\n", 74 | "from fastai.vision import *\n", 75 | "from fastai.utils.mem import *\n", 76 | "from fastai.vision import open_image, load_learner, image, torch\n", 77 | "import numpy as np\n", 78 | "import urllib.request\n", 79 | "import PIL.Image\n", 80 | "from io import BytesIO\n", 81 | "import torchvision.transforms as T\n", 82 | "from PIL import Image\n", 83 | "import requests\n", 84 | "from io import BytesIO\n", 85 | "import fastai\n", 86 | "from fastai.vision import *\n", 87 | "from fastai.utils.mem import *\n", 88 | "from fastai.vision import open_image, load_learner, image, torch\n", 89 | "import numpy as np\n", 90 | "import urllib.request\n", 91 | "import PIL.Image\n", 92 | "from io import BytesIO\n", 93 | "import torchvision.transforms as T\n", 94 | "\n", 95 | "class FeatureLoss(nn.Module):\n", 96 | " def __init__(self, m_feat, layer_ids, layer_wgts):\n", 97 | " super().__init__()\n", 98 | " self.m_feat = m_feat\n", 99 | " self.loss_features = [self.m_feat[i] for i in layer_ids]\n", 100 | " self.hooks = hook_outputs(self.loss_features, detach=False)\n", 101 | " self.wgts = layer_wgts\n", 102 | " self.metric_names = ['pixel',] + [f'feat_{i}' for i in range(len(layer_ids))\n", 103 | " ] + [f'gram_{i}' for i in range(len(layer_ids))]\n", 104 | "\n", 105 | " def make_features(self, x, clone=False):\n", 106 | " self.m_feat(x)\n", 107 | " return [(o.clone() if clone else o) for o in self.hooks.stored]\n", 108 | " \n", 109 | " def forward(self, input, target):\n", 110 | " out_feat = self.make_features(target, clone=True)\n", 111 | " in_feat = self.make_features(input)\n", 112 | " self.feat_losses = [base_loss(input,target)]\n", 113 | " self.feat_losses += [base_loss(f_in, f_out)*w\n", 114 | " for f_in, f_out, w in zip(in_feat, out_feat, self.wgts)]\n", 115 | " self.feat_losses += [base_loss(gram_matrix(f_in), gram_matrix(f_out))*w**2 * 5e3\n", 116 | " for f_in, f_out, w in zip(in_feat, out_feat, self.wgts)]\n", 117 | " self.metrics = dict(zip(self.metric_names, self.feat_losses))\n", 118 | " return sum(self.feat_losses)\n", 119 | " \n", 120 | " def __del__(self): self.hooks.remove()" 121 | ], 122 | "execution_count": null, 123 | "outputs": [] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "metadata": { 128 | "id": "qmLIGUuu3vp5" 129 | }, 130 | "source": [ 131 | "MODEL_URL = \"https://www.dropbox.com/s/p9lynpwygjmeed2/ArtLine_500.pkl?dl=1 \"\n", 132 | "urllib.request.urlretrieve(MODEL_URL, \"ArtLine_500.pkl\")\n", 133 | "path = Path(\".\")\n", 134 | "learn=load_learner(path, 'ArtLine_500.pkl')" 135 | ], 136 | "execution_count": null, 137 | "outputs": [] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "metadata": { 142 | "id": "teAfEed9GOdX" 143 | }, 144 | "source": [ 145 | "# check YouTubeVideo\n", 146 | "from IPython.display import YouTubeVideo\n", 147 | "YOUTUBE_ID ='m0u0uAhoxq4'\n", 148 | "YouTubeVideo(YOUTUBE_ID)" 149 | ], 150 | "execution_count": null, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "metadata": { 156 | "id": "dl9MwHi89M4C" 157 | }, 158 | "source": [ 159 | "# download YouTubeVideo\n", 160 | "!rm -rf youtube.mp4\n", 161 | "!youtube-dl -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID" 162 | ], 163 | "execution_count": null, 164 | "outputs": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "metadata": { 169 | "id": "yoVhz1KEiWcV" 170 | }, 171 | "source": [ 172 | "# edit YouTubeVideo\n", 173 | "import os\n", 174 | "os.makedirs('video', exist_ok=True)\n", 175 | "!ffmpeg -i youtube.mp4 -filter:v 'crop=300:300:170:0' -ss 00:00:01 -t 00:00:02 -async 1 ./video/takikuri.mp4" 176 | ], 177 | "execution_count": null, 178 | "outputs": [] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "metadata": { 183 | "id": "_LUgHbTyMlJt" 184 | }, 185 | "source": [ 186 | "# video2frames\n", 187 | "import os\n", 188 | "import cv2\n", 189 | "\n", 190 | "os.makedirs('images', exist_ok=True)\n", 191 | " \n", 192 | "def video_2_frames(video_file='./video/takikuri.mp4', \n", 193 | " image_dir='./images/', \n", 194 | " image_file='img_%s.png'):\n", 195 | " \n", 196 | " # Initial setting\n", 197 | " i = 0\n", 198 | " interval = 3\n", 199 | " length = 30\n", 200 | " \n", 201 | " cap = cv2.VideoCapture(video_file)\n", 202 | " while(cap.isOpened()):\n", 203 | " flag, frame = cap.read() \n", 204 | " if flag == False: \n", 205 | " break\n", 206 | " if i == length*interval:\n", 207 | " break\n", 208 | " if i % interval == 0: \n", 209 | " cv2.imwrite(image_dir+image_file % str(i).zfill(6), frame)\n", 210 | " print('Save', image_dir+image_file % str(i).zfill(6))\n", 211 | " i += 1 \n", 212 | " cap.release() \n", 213 | " \n", 214 | "def main():\n", 215 | " video_2_frames()\n", 216 | " \n", 217 | "if __name__ == '__main__':\n", 218 | " main() \n" 219 | ], 220 | "execution_count": null, 221 | "outputs": [] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "metadata": { 226 | "id": "aeSLtsxqHqEV" 227 | }, 228 | "source": [ 229 | "# frames2ArtLines\n", 230 | "import os\n", 231 | "import torchvision.utils as vutils\n", 232 | "\n", 233 | "os.makedirs('out', exist_ok=True)\n", 234 | "\n", 235 | "input_path = './images' \n", 236 | "output_path = './out'\n", 237 | "files = os.listdir(input_path)\n", 238 | "files.sort()\n", 239 | "\n", 240 | "temp =[add_metrics]\n", 241 | "\n", 242 | "for file in files:\n", 243 | " print(file)\n", 244 | " if file == '.ipynb_checkpoints':\n", 245 | " continue\n", 246 | " img = PIL.Image.open(input_path+'/'+file).convert(\"RGB\")\n", 247 | " img_t = T.ToTensor()(img)\n", 248 | " img_fast = Image(img_t)\n", 249 | " p,img_hr,b = learn.predict(img_fast)\n", 250 | " vutils.save_image(img_hr,output_path+'/'+file)\n", 251 | " " 252 | ], 253 | "execution_count": null, 254 | "outputs": [] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "metadata": { 259 | "id": "NbmPUrXcQNRm" 260 | }, 261 | "source": [ 262 | "# ArtLines2GIF\n", 263 | "from PIL import Image\n", 264 | "import glob\n", 265 | " \n", 266 | "files = sorted(glob.glob('./out/*.png'))\n", 267 | "images = list(map(lambda file: Image.open(file), files))\n", 268 | "images[0].save('./takikuri.gif', save_all=True, \n", 269 | " append_images=images[1:], \n", 270 | " duration=100, loop=0)" 271 | ], 272 | "execution_count": null, 273 | "outputs": [] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "metadata": { 278 | "id": "Tu3MDH6Q1pzV" 279 | }, 280 | "source": [ 281 | "# display GIF\n", 282 | "from IPython.display import Image\n", 283 | "Image('./takikuri.gif', format='png')" 284 | ], 285 | "execution_count": null, 286 | "outputs": [] 287 | } 288 | ] 289 | } -------------------------------------------------------------------------------- /CLIP_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "EXnkFjoZy9kd" 17 | }, 18 | "source": [ 19 | "# セットアップ" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "id": "0BpdJkdBssk9" 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "# --- セットアップ ---\n", 31 | "\n", 32 | "# 1.pytorchバージョン変更\n", 33 | "! pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html #ftfy regex\n", 34 | "\n", 35 | "# 2.GithubからCLIPをコピー\n", 36 | "! git clone https://github.com/openai/CLIP.git\n", 37 | "%cd CLIP/clip\n", 38 | "\n", 39 | "# 3.CLIPモデルの重みをダウンロード\n", 40 | "MODELS = {\n", 41 | " \"RN50\": \"https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt\",\n", 42 | " \"RN101\": \"https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt\",\n", 43 | " \"RN50x4\": \"https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt\",\n", 44 | " \"ViT-B/32\": \"https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt\", \n", 45 | "}\n", 46 | "! wget {MODELS[\"ViT-B/32\"]} -O model.pt\n", 47 | "\n", 48 | "# 4.simple_tokenizer インストール\n", 49 | "! pip install ftfy regex\n", 50 | "from simple_tokenizer import *\n", 51 | "tokenizer = SimpleTokenizer()\n", 52 | "\n", 53 | "# 5.サンプル画像ダウンロード\n", 54 | "! pip install --upgrade gdown\n", 55 | "import gdown\n", 56 | "gdown.download('https://drive.google.com/uc?id=1vcxH6JOtwh_-FoZ8SNXYlHF9qCi3YoDH', 'food_101.zip', quiet=False)\n", 57 | "! unzip food_101.zip" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "qJCVYoXrK1ty" 64 | }, 65 | "source": [ 66 | "# CLIPモデルの仕様確認" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "id": "IBRVTY9lbGm8" 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "# --- CLIPモデルの仕様確認 ----\n", 78 | "\n", 79 | "import numpy as np\n", 80 | "import torch\n", 81 | "\n", 82 | "model = torch.jit.load(\"model.pt\").cuda().eval()\n", 83 | "input_resolution = model.input_resolution.item()\n", 84 | "context_length = model.context_length.item()\n", 85 | "vocab_size = model.vocab_size.item()\n", 86 | "\n", 87 | "print(\"Model parameters:\", f\"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}\")\n", 88 | "print(\"Input resolution:\", input_resolution)\n", 89 | "print(\"Context length:\", context_length)\n", 90 | "print(\"Vocab size:\", vocab_size)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": { 96 | "id": "kmQXf9wrK-6t" 97 | }, 98 | "source": [ 99 | "# simple_tokenizer の動作確認" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "id": "LFyjzC85LKrH" 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "# テキストをトークンへ変換1\n", 111 | "index = tokenizer.encode('I ate an apple')\n", 112 | "print(index)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "id": "SgmKMg6mV5RA" 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# テキストをトークンへ変換2\n", 124 | "index = tokenizer.encode('image segmentation')\n", 125 | "print(index)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "id": "6H6tX85TKA0n" 132 | }, 133 | "source": [ 134 | "# 画像の前処理\n" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "id": "d6cpiIFHp9N6" 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "# --- 画像の前処理 ----\n", 146 | "\n", 147 | "from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize\n", 148 | "from PIL import Image\n", 149 | "import glob\n", 150 | "\n", 151 | "# 設定\n", 152 | "preprocess = Compose([\n", 153 | " Resize(input_resolution, interpolation=Image.BICUBIC),\n", 154 | " CenterCrop(input_resolution),\n", 155 | " ToTensor()\n", 156 | "])\n", 157 | "\n", 158 | "image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()\n", 159 | "image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()\n", 160 | "\n", 161 | "# 前処理実行\n", 162 | "images =[]\n", 163 | "files = glob.glob('./food_101/*.jpg')\n", 164 | "files.sort()\n", 165 | "for file in files:\n", 166 | " image = preprocess(Image.open(file).convert(\"RGB\"))\n", 167 | " images.append(image)\n", 168 | "\n", 169 | "image_input = torch.tensor(np.stack(images)).cuda()\n", 170 | "image_input -= image_mean[:, None, None]\n", 171 | "image_input /= image_std[:, None, None]\n", 172 | "\n", 173 | "print('image_input.shape = ', image_input.shape)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": { 179 | "id": "L_uKiB2nKQJX" 180 | }, 181 | "source": [ 182 | "# テキストの前処理" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "id": "C4S__zCGy2MT" 190 | }, 191 | "outputs": [], 192 | "source": [ 193 | "# --- テキストの前処理 ----\n", 194 | "\n", 195 | "# 分類ラベルの設定\n", 196 | "labels = ['takoyaki', 'susi', 'spagetti', 'ramen', 'pizza', 'omelette', 'humburger', 'gyoza']\n", 197 | "\n", 198 | "# ラベルを文の形のトークンへ変換\n", 199 | "text_descriptions = [f\"This is a photo of a {label}\" for label in labels] \n", 200 | "sot_token = tokenizer.encoder['<|startoftext|>']\n", 201 | "eot_token = tokenizer.encoder['<|endoftext|>']\n", 202 | "text_tokens = [[sot_token] + tokenizer.encode(desc) + [eot_token] for desc in text_descriptions]\n", 203 | "text_input = torch.zeros(len(text_tokens), model.context_length, dtype=torch.long)\n", 204 | "\n", 205 | "# トークンをテンソルに変換\n", 206 | "for i, tokens in enumerate(text_tokens):\n", 207 | " text_input[i, :len(tokens)] = torch.tensor(tokens)\n", 208 | "\n", 209 | "text_input = text_input.cuda()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "id": "1tFp8PHOKLdE" 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "# 各データの先頭を表示\n", 221 | "print(text_descriptions[0]) \n", 222 | "print(text_tokens[0])\n", 223 | "print(text_input[0])\n", 224 | "print(text_input.shape)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "id": "2u4oePsAKdJO" 231 | }, 232 | "source": [ 233 | "# 画像とテキストのcos類似度を計算" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "id": "iqjF6NbBCT0a" 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "# --- 画像とテキストのCOS類似度を計算 ----\n", 245 | "\n", 246 | "# CLIPモデルで画像とテキストの特徴を抽出\n", 247 | "with torch.no_grad():\n", 248 | " image_features = model.encode_image(image_input).float()\n", 249 | " text_features = model.encode_text(text_input).float()\n", 250 | " text_features /= text_features.norm(dim=-1, keepdim=True) \n", 251 | "\n", 252 | "# 画像の特徴とテキストの特徴からCOS類似度を計算\n", 253 | "text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)\n", 254 | "top_probs, top_labels = text_probs.cpu().topk(5, dim=-1)\n", 255 | "\n", 256 | "print(image_features.shape)\n", 257 | "print(text_features.shape)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "id": "EYqMIccpabig" 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "# COS類似度の計算結果をそのまま表示\n", 269 | "print(text_probs)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": { 275 | "id": "OIM5PWmSKlVm" 276 | }, 277 | "source": [ 278 | "# 予測結果の表示" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": { 285 | "id": "s5HrieUc34n_" 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "# --- 予測結果の表示 ---\n", 290 | "\n", 291 | "import matplotlib.pyplot as plt\n", 292 | "\n", 293 | "def pred_disp(i, image):\n", 294 | " plt.figure(figsize=(8, 4))\n", 295 | " plt.subplot(1, 2, 1)\n", 296 | " plt.imshow(image.permute(1, 2, 0))\n", 297 | " plt.axis(\"off\")\n", 298 | "\n", 299 | " plt.subplot(1, 2, 2)\n", 300 | " y = np.arange(top_probs.shape[-1])\n", 301 | " plt.grid()\n", 302 | " plt.barh(y, top_probs[i])\n", 303 | " plt.gca().invert_yaxis()\n", 304 | " plt.gca().set_axisbelow(True)\n", 305 | " plt.yticks(y, [labels[index] for index in top_labels[i].numpy()])\n", 306 | " plt.xlabel(\"probability\")\n", 307 | "\n", 308 | " plt.subplots_adjust(wspace=0.5)\n", 309 | " plt.show()\n", 310 | "\n", 311 | "for i, image in enumerate(images):\n", 312 | " pred_disp(i, image)" 313 | ] 314 | } 315 | ], 316 | "metadata": { 317 | "accelerator": "GPU", 318 | "colab": { 319 | "collapsed_sections": [], 320 | "include_colab_link": true, 321 | "name": "CLIP_demo", 322 | "provenance": [], 323 | "toc_visible": true 324 | }, 325 | "kernelspec": { 326 | "display_name": "Python 3", 327 | "language": "python", 328 | "name": "python3" 329 | }, 330 | "language_info": { 331 | "codemirror_mode": { 332 | "name": "ipython", 333 | "version": 3 334 | }, 335 | "file_extension": ".py", 336 | "mimetype": "text/x-python", 337 | "name": "python", 338 | "nbconvert_exporter": "python", 339 | "pygments_lexer": "ipython3", 340 | "version": "3.7.9" 341 | } 342 | }, 343 | "nbformat": 4, 344 | "nbformat_minor": 1 345 | } 346 | -------------------------------------------------------------------------------- /DALL_E.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "colab": { 7 | "name": "DALL_E", 8 | "provenance": [], 9 | "collapsed_sections": [], 10 | "toc_visible": true, 11 | "include_colab_link": true 12 | }, 13 | "kernelspec": { 14 | "display_name": "Python 3", 15 | "name": "python3" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "2nD1n0xEBcko" 33 | }, 34 | "source": [ 35 | "# セットアップ" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "metadata": { 41 | "id": "N65H8lL1cR1V" 42 | }, 43 | "source": [ 44 | "# GPUスペック確認\n", 45 | "!nvidia-smi -L" 46 | ], 47 | "execution_count": null, 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "F4iTie2EKrbb" 54 | }, 55 | "source": [ 56 | "# Pytorchバージョン変更\n", 57 | "! pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html \n", 58 | "\n", 59 | "# Pytorch画像処理ライブラリー・インストール\n", 60 | "! pip install kornia==0.5.0\n", 61 | "\n", 62 | "# CLIP関連コードのコピー\n", 63 | "! git clone https://github.com/openai/CLIP.git\n", 64 | "%cd /content/CLIP/\n", 65 | "\n", 66 | "# CLIPのモデル化\n", 67 | "! pip install ftfy regex\n", 68 | "import clip\n", 69 | "model, preprocess = clip.load('ViT-B/32', jit=True) \n", 70 | "model = model.eval() \n", 71 | "\n", 72 | "# DALL-Eのモデル化\n", 73 | "! pip install DALL-E\n", 74 | "from dall_e import map_pixels, unmap_pixels, load_model\n", 75 | "dec = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", 'cuda') \n" 76 | ], 77 | "execution_count": null, 78 | "outputs": [] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": { 83 | "id": "KAcixx9Z3XYH" 84 | }, 85 | "source": [ 86 | "# ライブラリー・インポート&関数定義\n" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "metadata": { 92 | "id": "piJOg9MY7khd" 93 | }, 94 | "source": [ 95 | "import torch\n", 96 | "import numpy as np\n", 97 | "import torchvision\n", 98 | "import torchvision.transforms.functional as TF\n", 99 | "import torchvision.transforms as T\n", 100 | "import kornia\n", 101 | "import PIL\n", 102 | "import os, io, sys\n", 103 | "import random\n", 104 | "import imageio\n", 105 | "from IPython import display\n", 106 | "from IPython.core.interactiveshell import InteractiveShell\n", 107 | "InteractiveShell.ast_node_interactivity = \"all\"\n", 108 | "from google.colab import output\n", 109 | "import requests\n", 110 | "\n", 111 | "# 初期設定\n", 112 | "im_shape = [512, 512, 3]\n", 113 | "sideX, sideY, channels = im_shape\n", 114 | "target_image_size = sideX\n", 115 | "tau_value = 2.\n", 116 | "\n", 117 | "# 画像表示・保存\n", 118 | "def displ(img):\n", 119 | " img = np.array(img)[:,:,:]\n", 120 | " img = np.transpose(img, (1, 2, 0))\n", 121 | " imageio.imwrite('output.png', np.array(img))\n", 122 | " return display.Image('output.png')\n", 123 | "\n", 124 | "# 画像のランダム切り出し\n", 125 | "def augment(out, cutn=16):\n", 126 | " p_s = []\n", 127 | " for ch in range(cutn):\n", 128 | " sizey = int(torch.zeros(1,).uniform_(.5, .99)*sideY)\n", 129 | " sizex = int(torch.zeros(1,).uniform_(.5, .99)*sideX)\n", 130 | " offsetx = torch.randint(0, sideX - sizex, ())\n", 131 | " offsety = torch.randint(0, sideY - sizey, ())\n", 132 | " apper = out[:, :, offsetx:offsetx + sizex, offsety:offsety + sizey]\n", 133 | " apper = apper + .1*torch.rand(1,1,1,1).cuda()*torch.randn_like(apper, requires_grad=True)\n", 134 | " apper = torch.nn.functional.interpolate(apper, (224,224), mode='bilinear')\n", 135 | " p_s.append(apper)\n", 136 | " into = augs(torch.cat(p_s, 0))\n", 137 | " return into\n", 138 | "\n", 139 | "# 正規化と回転設定\n", 140 | "nom = torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))\n", 141 | "augs = kornia.augmentation.RandomRotation(30).cuda()\n", 142 | "\n", 143 | "# パラメータの設定\n", 144 | "class Pars(torch.nn.Module):\n", 145 | " def __init__(self):\n", 146 | " super(Pars, self).__init__()\n", 147 | " hots = torch.nn.functional.one_hot((torch.arange(0, 8192).to(torch.int64)), num_classes=8192)\n", 148 | " rng = torch.zeros(1, 64*64, 8192).uniform_()\n", 149 | " for i in range(64*64):\n", 150 | " rng[0,i] = hots[[np.random.randint(8191)]]\n", 151 | " rng = rng.permute(0, 2, 1)\n", 152 | " self.normu = torch.nn.Parameter(rng.cuda().view(1, 8192, 64*64))\n", 153 | " \n", 154 | " def forward(self): \n", 155 | " normu = torch.nn.functional.gumbel_softmax(self.normu.reshape(1,64*64,8192), dim=1, tau=tau_value).view(1, 8192, 64, 64)\n", 156 | " return normu \n" 157 | ], 158 | "execution_count": null, 159 | "outputs": [] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": { 164 | "id": "XaocGDQXz3Zx" 165 | }, 166 | "source": [ 167 | "# テキストから画像の生成" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": { 173 | "id": "mGWtvVB-arNH" 174 | }, 175 | "source": [ 176 | "**テキストから特徴ベクトルを抽出**" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "metadata": { 182 | "id": "PGBTOiJqWgZ3" 183 | }, 184 | "source": [ 185 | "# テキスト入力\n", 186 | "text_input = 'an armchair in the shape of an avocado'\n", 187 | "\n", 188 | "# テキストを特徴ベクトルに変換\n", 189 | "token = clip.tokenize(text_input) \n", 190 | "text_v = model.encode_text(token.cuda()).detach().clone() " 191 | ], 192 | "execution_count": null, 193 | "outputs": [] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": { 198 | "id": "WztSrRF23Rqg" 199 | }, 200 | "source": [ 201 | "**学習**" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "metadata": { 207 | "id": "NwYNUzzovPEW" 208 | }, 209 | "source": [ 210 | "# パラメータリセット\n", 211 | "latent = Pars().cuda() \n", 212 | "param = [latent.normu] \n", 213 | "optimizer = torch.optim.Adam([{'params': param, 'lr': .01}]) \n", 214 | "\n", 215 | "# images フォルダーリセット\n", 216 | "import os\n", 217 | "import shutil\n", 218 | "if os.path.isdir('images'):\n", 219 | " shutil.rmtree('images')\n", 220 | "os.makedirs('images', exist_ok=True)\n", 221 | "\n", 222 | "# 学習ループ\n", 223 | "for iteration in range(1001):\n", 224 | "\n", 225 | " # --- 順伝播 ---\n", 226 | " # パラメータから画像を生成\n", 227 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3].float()))\n", 228 | " # 画像をランダム切り出し・回転 \n", 229 | " into = augment(out)\n", 230 | " # 画像を正規化\n", 231 | " into = nom((into))\n", 232 | " # 画像から特徴ベクトルを取得\n", 233 | " image_v = model.encode_image(into)\n", 234 | " # テキストと画像の特徴ベクトルのCOS類似度を計算 \n", 235 | " loss = -torch.cosine_similarity(text_v, image_v).mean() \n", 236 | "\n", 237 | " # 逆伝播\n", 238 | " optimizer.zero_grad()\n", 239 | " loss.backward()\n", 240 | " optimizer.step() \n", 241 | "\n", 242 | " # 学習率の調整\n", 243 | " for g in optimizer.param_groups:\n", 244 | " g['lr'] = g['lr']*1.005\n", 245 | " g['lr'] = min(g['lr'], .12)\n", 246 | "\n", 247 | " # ログ表示 \n", 248 | " if iteration % 50 == 0:\n", 249 | " with torch.no_grad():\n", 250 | "\n", 251 | " # 生成画像の表示・保存\n", 252 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3]).float()) \n", 253 | " displ(out.cpu()[0]) \n", 254 | " shutil.copy('output.png', './images/%s.png'%str(int(iteration/50)).zfill(6))\n", 255 | "\n", 256 | " # データ表示\n", 257 | " print('iter = ',iteration)\n", 258 | " for g in optimizer.param_groups:\n", 259 | " print('lr = ', g['lr'])\n", 260 | " print('tau_value = ', tau_value)\n", 261 | " print('loss = ',loss.item())\n", 262 | " print('\\n')\n" 263 | ], 264 | "execution_count": null, 265 | "outputs": [] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": { 270 | "id": "zclMtW3CaSNX" 271 | }, 272 | "source": [ 273 | "# 学習過程の動画作成" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": { 279 | "id": "2zxCTHkJbBD9" 280 | }, 281 | "source": [ 282 | "**mp4動画の作成**" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "metadata": { 288 | "id": "BcIHq0zsY8OC" 289 | }, 290 | "source": [ 291 | "# images フォルダーの最後の画像を5枚コピー\n", 292 | "import shutil\n", 293 | "for i in range(21,26,1):\n", 294 | " shutil.copy('output.png', './images/%s.png'%str(int(i)).zfill(6))\n", 295 | "\n", 296 | "# ouput.mp4を一旦削除\n", 297 | "import os \n", 298 | "if os.path.exists('./output.mp4'):\n", 299 | " os.remove('./output.mp4')\n", 300 | "\n", 301 | "# images フォルダーの画像から動画を生成\n", 302 | "! ffmpeg -r 5 -i images/%06d.png -vcodec libx264 -pix_fmt yuv420p output.mp4" 303 | ], 304 | "execution_count": null, 305 | "outputs": [] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": { 310 | "id": "IzoB1G26bFwX" 311 | }, 312 | "source": [ 313 | "**mp4動画の再生**" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "metadata": { 319 | "id": "n1rs5DnwZvuh" 320 | }, 321 | "source": [ 322 | "from IPython.display import HTML\n", 323 | "from base64 import b64encode\n", 324 | " \n", 325 | "mp4 = open('./output.mp4', 'rb').read()\n", 326 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n", 327 | "HTML(f\"\"\"\n", 328 | "\"\"\")" 331 | ], 332 | "execution_count": null, 333 | "outputs": [] 334 | } 335 | ] 336 | } 337 | -------------------------------------------------------------------------------- /DALL_e_sample.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "df134_Y0L9Wv" 17 | }, 18 | "source": [ 19 | "# SetUP" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "id": "ILu1-B-xLPqx" 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "!nvidia-smi -L" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "id": "EKnjNf_TLf5g" 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "!pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html ftfy regex\n", 42 | "!pip install DALL-E\n", 43 | "!pip install ftfy\n", 44 | "!git clone https://github.com/openai/CLIP.git\n", 45 | "%cd /content/CLIP/" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": { 51 | "id": "92iGUGG8MGE2" 52 | }, 53 | "source": [ 54 | "# Import Library & Define" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "id": "qiKjXi57Lic9" 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "import torch\n", 66 | "import numpy as np\n", 67 | "import torchvision\n", 68 | "import torchvision.transforms.functional as TF\n", 69 | "import PIL\n", 70 | "import matplotlib.pyplot as plt\n", 71 | "import os\n", 72 | "import random\n", 73 | "import imageio\n", 74 | "from IPython import display\n", 75 | "from IPython.core.interactiveshell import InteractiveShell\n", 76 | "import glob\n", 77 | "from google.colab import output\n", 78 | "InteractiveShell.ast_node_interactivity = \"all\"\n", 79 | "\n", 80 | "# probably don't mess with this unless you're changing generator size\n", 81 | "im_shape = [512, 512, 3]\n", 82 | "sideX, sideY, channels = im_shape\n", 83 | "\n", 84 | "def displ(img, pre_scaled=True):\n", 85 | " img = np.array(img)[:,:,:]\n", 86 | " img = np.transpose(img, (1, 2, 0))\n", 87 | " if not pre_scaled:\n", 88 | " img = scale(img, 48*4, 32*4)\n", 89 | " imageio.imwrite(str(3) + '.png', np.array(img))\n", 90 | " return display.Image(str(3)+'.png')\n", 91 | "\n", 92 | "def gallery(array, ncols=2):\n", 93 | " nindex, height, width, intensity = array.shape\n", 94 | " nrows = nindex//ncols\n", 95 | " assert nindex == nrows*ncols\n", 96 | " # want result.shape = (height*nrows, width*ncols, intensity)\n", 97 | " result = (array.reshape(nrows, ncols, height, width, intensity)\n", 98 | " .swapaxes(1,2)\n", 99 | " .reshape(height*nrows, width*ncols, intensity))\n", 100 | " return result\n", 101 | "\n", 102 | "def card_padded(im, to_pad=3):\n", 103 | " return np.pad(np.pad(np.pad(im, [[1,1], [1,1], [0,0]],constant_values=0), [[2,2], [2,2], [0,0]],constant_values=1),\n", 104 | " [[to_pad,to_pad], [to_pad,to_pad], [0,0]],constant_values=0)\n", 105 | "\n", 106 | "def get_all(img):\n", 107 | " img = np.transpose(img, (0,2,3,1))\n", 108 | " cards = np.zeros((img.shape[0], sideX+12, sideY+12, 3))\n", 109 | " for i in range(len(img)):\n", 110 | " cards[i] = card_padded(img[i])\n", 111 | " print(img.shape)\n", 112 | " cards = gallery(cards)\n", 113 | " imageio.imwrite(str(3) + '.png', np.array(cards))\n", 114 | " return display.Image(str(3)+'.png')" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "id": "kiERo_W-MN0e" 121 | }, 122 | "source": [ 123 | "# Perceptor" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "id": "Yx6ejrn2LnUO" 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "import clip\n", 135 | "clip.available_models()\n", 136 | "\n", 137 | "# Load the model\n", 138 | "perceptor, preprocess = clip.load('ViT-B/32', jit=True)\n", 139 | "perceptor = perceptor.eval()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": { 145 | "id": "j6SO-LlEMRm2" 146 | }, 147 | "source": [ 148 | "# Generator" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": { 155 | "id": "2X2_bsdnLp7Y" 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "import io\n", 160 | "import os, sys\n", 161 | "import requests\n", 162 | "import PIL\n", 163 | "import torch\n", 164 | "import torchvision.transforms as T\n", 165 | "import torchvision.transforms.functional as TF\n", 166 | "from dall_e import map_pixels, unmap_pixels, load_model\n", 167 | "\n", 168 | "target_image_size = sideX\n", 169 | "\n", 170 | "def preprocess(img):\n", 171 | " s = min(img.size)\n", 172 | " \n", 173 | " if s < target_image_size:\n", 174 | " raise ValueError(f'min dim for image {s} < {target_image_size}')\n", 175 | " \n", 176 | " r = target_image_size / s\n", 177 | " s = (round(r * img.size[1]), round(r * img.size[0]))\n", 178 | " img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)\n", 179 | " img = TF.center_crop(img, output_size=2 * [target_image_size])\n", 180 | " img = torch.unsqueeze(T.ToTensor()(img), 0)\n", 181 | " return map_pixels(img)\n", 182 | "\n", 183 | "model = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", 'cuda')" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": { 189 | "id": "IrZovePXMUsM" 190 | }, 191 | "source": [ 192 | "# Text input" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": { 199 | "id": "QkRilIfDLtYP" 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "text_input = \"a beautiful and mysterious castle designed by Escher\" \n", 204 | "tau_value =1.2" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "id": "0QO-DW36MYM4" 211 | }, 212 | "source": [ 213 | "# Latent coordinate" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "id": "FHhEmANLLwKF" 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "class Pars(torch.nn.Module):\n", 225 | " def __init__(self):\n", 226 | " super(Pars, self).__init__()\n", 227 | " self.normu = torch.nn.Parameter(torch.randn(1, 8192, 64, 64).cuda())\n", 228 | "\n", 229 | " def forward(self):\n", 230 | " # normu = torch.nn.functional.gumbel_softmax(self.normu.view(1, 8192, -1), dim=-1).view(1, 8192, 64, 64)\n", 231 | " normu = torch.nn.functional.gumbel_softmax(self.normu.view(1, 8192, -1), dim=-1, tau=tau_value).view(1, 8192, 64, 64)\n", 232 | " return normu\n", 233 | "\n", 234 | "lats = Pars().cuda()\n", 235 | "mapper = [lats.normu]\n", 236 | "optimizer = torch.optim.Adam([{'params': mapper, 'lr': .1}])\n", 237 | "eps = 0\n", 238 | "tx = clip.tokenize(text_input)\n", 239 | "t = perceptor.encode_text(tx.cuda()).detach().clone()\n", 240 | "nom = torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))\n", 241 | "\n", 242 | "with torch.no_grad():\n", 243 | " mult = 1\n", 244 | " al = unmap_pixels(torch.sigmoid(model(lats()).cpu().float())).numpy()\n", 245 | " for allls in al:\n", 246 | " displ(allls[:3])\n", 247 | " print('\\n')\n", 248 | " # print(torch.topk(lats().view(1, 8192, -1), k=3, dim=-1))" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": { 254 | "id": "e6rIEvpQMgyd" 255 | }, 256 | "source": [ 257 | "# Train" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "id": "m4GLJKWaLzIg" 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "def checkin(loss):\n", 269 | " print('''########################################################## ''',loss, '\\n',itt)\n", 270 | " \n", 271 | " with torch.no_grad():\n", 272 | " al = unmap_pixels(torch.sigmoid(model(lats())[:, :3]).cpu().float()).numpy()\n", 273 | " for allls in al:\n", 274 | " displ(allls)\n", 275 | " display.display(display.Image(str(3)+'.png'))\n", 276 | " print('\\n')\n", 277 | " # the people spoke and they love \"ding\"\n", 278 | " # output.eval_js('new Audio(\"https://freesound.org/data/previews/80/80921_1022651-lq.ogg\").play()')\n", 279 | "\n", 280 | "def ascend_txt():\n", 281 | " out = unmap_pixels(torch.sigmoid(model(lats())[:, :3].float()))\n", 282 | " cutn = 64 # improves quality\n", 283 | " p_s = []\n", 284 | " for ch in range(cutn):\n", 285 | " size = int(sideX*torch.zeros(1,).normal_(mean=.8, std=.3).clip(.5, .98))\n", 286 | " offsetx = torch.randint(0, sideX - size, ())\n", 287 | " offsety = torch.randint(0, sideX - size, ())\n", 288 | " apper = out[:, :, offsetx:offsetx + size, offsety:offsety + size]\n", 289 | " apper = torch.nn.functional.interpolate(apper, (224,224), mode='bilinear')\n", 290 | " p_s.append(apper)\n", 291 | " into = torch.cat(p_s, 0)\n", 292 | " # into = torch.nn.functional.interpolate(out, (224,224), mode='nearest')\n", 293 | " into = nom(into)\n", 294 | " iii = perceptor.encode_image(into)\n", 295 | " llls = lats()\n", 296 | " lat_l = 0\n", 297 | " return [lat_l, 10*-torch.cosine_similarity(t, iii).view(-1, 1).T.mean(1)]\n", 298 | "\n", 299 | "def train(i):\n", 300 | " loss1 = ascend_txt()\n", 301 | " loss = loss1[0] + loss1[1]\n", 302 | " loss = loss.mean()\n", 303 | " optimizer.zero_grad()\n", 304 | " loss.backward()\n", 305 | " optimizer.step()\n", 306 | " \n", 307 | " if itt % 100 == 0:\n", 308 | " checkin(loss1)\n", 309 | " shutil.copy('./3.png', './images/%s.png'%str(int(itt/100)).zfill(6))\n", 310 | "\n", 311 | "import shutil\n", 312 | "\n", 313 | "if os.path.isdir('images'):\n", 314 | " shutil.rmtree('images')\n", 315 | "os.makedirs('images', exist_ok=True)\n", 316 | "\n", 317 | "itt = 0\n", 318 | "for asatreat in range(1100):\n", 319 | " train(itt)\n", 320 | " itt+=1" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": { 326 | "id": "tSr7K9KYMkOL" 327 | }, 328 | "source": [ 329 | "# Make movie" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": { 336 | "id": "RVpbCMthL33Z" 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "if os.path.exists('./output.mp4'):\n", 341 | " os.remove('./output.mp4')\n", 342 | "\n", 343 | "!ffmpeg -r 2 -i images/%06d.png -vcodec libx264 -pix_fmt yuv420p output.mp4" 344 | ] 345 | } 346 | ], 347 | "metadata": { 348 | "accelerator": "GPU", 349 | "colab": { 350 | "authorship_tag": "ABX9TyMFIG/v1z8z/bWEAoh9r2qK", 351 | "include_colab_link": true, 352 | "name": "DALL_e_sample", 353 | "provenance": [] 354 | }, 355 | "kernelspec": { 356 | "display_name": "Python 3", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.7.9" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 1 375 | } 376 | -------------------------------------------------------------------------------- /SwapAE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "SwapAE", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyPjOM3j/DkMSCkMdDsEhQV7", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | }, 18 | "accelerator": "GPU" 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "id": "1fEyCmb3Drl2" 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "# githubからコードをコピー\n", 40 | "! git clone https://github.com/bryandlee/naver-webtoon-faces.git\n", 41 | "%cd naver-webtoon-faces\n", 42 | " \n", 43 | "# 学習済みパラメータのダウンロード\n", 44 | "! pip install --upgrade gdown\n", 45 | "import gdown\n", 46 | "gdown.download('https://drive.google.com/uc?id=1gJ5WPFQIN26xYbujrEAKxG7YduE9S6ch', './checkpoint.zip', quiet=False)\n", 47 | "! unzip checkpoint.zip\n", 48 | " \n", 49 | "# resultsフォルダーを作成\n", 50 | "import os\n", 51 | "os.makedirs('results', exist_ok=True)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "source": [ 57 | "# 関数定義\n", 58 | "import os\n", 59 | "import cv2\n", 60 | "import matplotlib.pyplot as plt\n", 61 | "import torch\n", 62 | "import random\n", 63 | "import numpy as np\n", 64 | "from tqdm import tqdm\n", 65 | " \n", 66 | "def load_image(path, size):\n", 67 | " image = image2tensor(cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB))\n", 68 | " \n", 69 | " w, h = image.shape[-2:]\n", 70 | " if w != h:\n", 71 | " crop_size = min(w, h)\n", 72 | " left = (w - crop_size)//2\n", 73 | " right = left + crop_size\n", 74 | " top = (h - crop_size)//2\n", 75 | " bottom = top + crop_size\n", 76 | " image = image[:,:,left:right, top:bottom]\n", 77 | " \n", 78 | " if image.shape[-1] != size:\n", 79 | " image = torch.nn.functional.interpolate(image, (size, size), mode=\"bilinear\", align_corners=True)\n", 80 | " \n", 81 | " return image\n", 82 | " \n", 83 | "def image2tensor(image):\n", 84 | " image = torch.FloatTensor(image).permute(2,0,1).unsqueeze(0)/255.\n", 85 | " return (image-0.5)/0.5\n", 86 | " \n", 87 | "def tensor2image(tensor):\n", 88 | " tensor = tensor.clamp(-1., 1.).detach().squeeze().permute(1,2,0).cpu().numpy()\n", 89 | " return tensor*0.5 + 0.5\n", 90 | " \n", 91 | "def imshow(img, size=5, cmap='jet'):\n", 92 | " plt.figure(figsize=(size,size))\n", 93 | " plt.imshow(img, cmap=cmap)\n", 94 | " plt.axis('off')\n", 95 | " plt.show()\n", 96 | " \n", 97 | "def horizontal_concat(imgs):\n", 98 | " return torch.cat([img.unsqueeze(0) for img in imgs], 3) \n", 99 | " \n", 100 | "device = 'cuda:0'\n", 101 | "image_size = 256\n", 102 | "torch.set_grad_enabled(False)" 103 | ], 104 | "metadata": { 105 | "id": "9Unj3DF1DzBT" 106 | }, 107 | "execution_count": null, 108 | "outputs": [] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "source": [ 113 | "# SwapAEモデルのロード\n", 114 | "from model import Encoder, Generator\n", 115 | " \n", 116 | "ae_model_path = './checkpoint/002000.pt'\n", 117 | " \n", 118 | "encoder = Encoder(32).to(device)\n", 119 | "generator = Generator(32).to(device)\n", 120 | " \n", 121 | "ckpt = torch.load(ae_model_path, map_location=device)\n", 122 | "encoder.load_state_dict(ckpt[\"e_ema\"])\n", 123 | "generator.load_state_dict(ckpt[\"g_ema\"])\n", 124 | " \n", 125 | "encoder.eval()\n", 126 | "generator.eval()\n", 127 | " \n", 128 | "print(f'[SwapAE model loaded] {ae_model_path}')" 129 | ], 130 | "metadata": { 131 | "id": "GK-YnIxcD6ri" 132 | }, 133 | "execution_count": null, 134 | "outputs": [] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "source": [ 139 | "from stylegan2.model import Generator as StyleGAN\n", 140 | " \n", 141 | "stylegan_model_path = './checkpoint/stylegan2-naverwebtoon-800k.pt'\n", 142 | "stylegan_ckpt = torch.load(stylegan_model_path, map_location=device)\n", 143 | " \n", 144 | "latent_dim = stylegan_ckpt['args'].latent\n", 145 | " \n", 146 | "stylegan = StyleGAN(image_size, latent_dim, 8).to(device)\n", 147 | "stylegan.load_state_dict(stylegan_ckpt[\"g_ema\"], strict=False)\n", 148 | "stylegan.eval()\n", 149 | "print(f'[StyleGAN2 generator loaded] {stylegan_model_path}\\n')\n", 150 | " \n", 151 | "truncation = 0.7\n", 152 | "trunc = stylegan.mean_latent(4096).detach().clone()\n", 153 | " \n", 154 | "num_samples = 8\n", 155 | " \n", 156 | "latent = stylegan.get_latent(torch.randn(num_samples, latent_dim, device=device))\n", 157 | "imgs_gen, _ = stylegan([latent],\n", 158 | " truncation=truncation,\n", 159 | " truncation_latent=trunc,\n", 160 | " input_is_latent=True,\n", 161 | " randomize_noise=True)\n", 162 | " \n", 163 | "print(\"StyleGAN2 generated images:\")\n", 164 | "imshow(tensor2image(horizontal_concat(imgs_gen)), size=20)\n", 165 | " \n", 166 | "structures, textures = encoder(imgs_gen)\n", 167 | "recon_results = generator(structures, textures)\n", 168 | " \n", 169 | "print(\"SwapAE reconstructions:\") \n", 170 | "imshow(tensor2image(horizontal_concat(recon_results)), size=20)\n", 171 | " \n", 172 | "print(\"Swapping results:\") \n", 173 | "swap_results = generator(structures, textures[0].unsqueeze(0).repeat(num_samples,1))\n", 174 | "imshow(tensor2image(horizontal_concat(swap_results)), size=20)" 175 | ], 176 | "metadata": { 177 | "id": "N5ZQWGNpD-2L" 178 | }, 179 | "execution_count": null, 180 | "outputs": [] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "source": [ 185 | "test_image_path = \"./inputs/6.jpg\"\n", 186 | "test_image = load_image(test_image_path, image_size)\n", 187 | " \n", 188 | "num_styles = 5\n", 189 | " \n", 190 | "latent = stylegan.get_latent(torch.randn(num_styles, latent_dim, device=device))\n", 191 | "imgs_gen, _ = stylegan([latent],\n", 192 | " truncation=truncation,\n", 193 | " truncation_latent=trunc,\n", 194 | " input_is_latent=True,\n", 195 | " randomize_noise=True)\n", 196 | " \n", 197 | "inputs = torch.cat([test_image.to(device), imgs_gen])\n", 198 | " \n", 199 | "results = horizontal_concat(inputs.cpu())\n", 200 | " \n", 201 | "structures, target_textures = encoder(inputs)\n", 202 | " \n", 203 | "structure = structures[0].unsqueeze(0).repeat(len(target_textures),1,1,1)\n", 204 | "source_texture = target_textures[0].unsqueeze(0).repeat(len(target_textures),1)\n", 205 | " \n", 206 | "for swap_loc in [1, 5]:\n", 207 | " textures = [source_texture for _ in range(swap_loc)] + [target_textures for _ in range(len(generator.layers) - swap_loc)] \n", 208 | " fake_imgs = generator(structure, textures, noises=0)\n", 209 | " \n", 210 | " results = torch.cat([results, horizontal_concat(fake_imgs).cpu()], dim=2)\n", 211 | " \n", 212 | "imshow(tensor2image(results), 23)\n", 213 | " \n", 214 | "cv2.imwrite('./results/out.jpg', cv2.cvtColor(255*tensor2image(results), cv2.COLOR_BGR2RGB))" 215 | ], 216 | "metadata": { 217 | "id": "1L8XDxnSEG5i" 218 | }, 219 | "execution_count": null, 220 | "outputs": [] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "source": [ 225 | "import imageio\n", 226 | " \n", 227 | "swap_loc = 1\n", 228 | " \n", 229 | "num_anchors = 10\n", 230 | "num_interp = 20\n", 231 | "anchors = stylegan.get_latent(torch.randn(num_anchors, 512, device=device))\n", 232 | " \n", 233 | "photo_input = test_image.to(device)\n", 234 | "ori_structure, ori_textures = encoder(photo_input)\n", 235 | " \n", 236 | "black_image = torch.zeros_like(test_image)\n", 237 | " \n", 238 | "with imageio.get_writer('results/exploration.gif', mode='I', duration=0.05, palettesize=256, subrectangles=False) as writer:\n", 239 | " \n", 240 | " for i in tqdm(range(num_anchors-1)):\n", 241 | " initial = anchors[i]\n", 242 | " final = anchors[i+1]\n", 243 | " \n", 244 | " for j in range(num_interp):\n", 245 | " latent = (float(num_interp-j) * initial + float(j) * final)/num_interp\n", 246 | " \n", 247 | " gen_img, _ = stylegan([latent],\n", 248 | " truncation=truncation,\n", 249 | " truncation_latent=trunc,\n", 250 | " input_is_latent=True,\n", 251 | " randomize_noise=True)\n", 252 | " \n", 253 | " _, target_texture = encoder(gen_img)\n", 254 | " textures = [ori_textures for _ in range(swap_loc)] + [target_texture for _ in range(len(generator.layers) - swap_loc)]\n", 255 | " swap_img = generator(ori_structure, textures, noises=0)\n", 256 | " \n", 257 | " result = torch.cat([black_image, gen_img.cpu()], 3)\n", 258 | " result = torch.cat([\n", 259 | " result,\n", 260 | " torch.cat([test_image, swap_img.cpu()], 3)\n", 261 | " ], 2)\n", 262 | " \n", 263 | " writer.append_data((tensor2image(result)*255).astype(np.uint8))\n", 264 | " \n", 265 | "# output.mp4をリセット\n", 266 | "if os.path.exists('./output.mp4'):\n", 267 | " os.remove('./output.mp4')\n", 268 | " \n", 269 | "# GIFからmp4を作成\n", 270 | "! ffmpeg -i results/exploration.gif -movflags faststart -pix_fmt yuv420p -vf \"scale=trunc(iw/2)*2:trunc(ih/2)*2\" output.mp4" 271 | ], 272 | "metadata": { 273 | "id": "iWz354d1ERBa" 274 | }, 275 | "execution_count": null, 276 | "outputs": [] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "source": [ 281 | "# mp4動画の再生\n", 282 | "from IPython.display import HTML\n", 283 | "from base64 import b64encode\n", 284 | " \n", 285 | "mp4 = open('./output.mp4', 'rb').read()\n", 286 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n", 287 | "HTML(f\"\"\"\n", 288 | "\"\"\")" 291 | ], 292 | "metadata": { 293 | "id": "MCD2bnp_EkCb" 294 | }, 295 | "execution_count": null, 296 | "outputs": [] 297 | } 298 | ] 299 | } -------------------------------------------------------------------------------- /VideoPose3D.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "vcUFywJRYlk0" 17 | }, 18 | "source": [ 19 | "# **Install Pytorch & Caffe2**" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "id": "yBHHFFzAagmJ" 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "!wget https://anaconda.org/pytorch/pytorch/1.2.0/download/linux-64/pytorch-1.2.0-py3.6_cuda10.0.130_cudnn7.6.2_0.tar.bz2\n", 31 | "!tar xvjf pytorch-1.2.0-py3.6_cuda10.0.130_cudnn7.6.2_0.tar.bz2\n", 32 | "!cp -r lib/python3.6/site-packages/* /usr/local/lib/python3.6/dist-packages/" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "id": "qECOVZuJZM5i" 39 | }, 40 | "source": [ 41 | "# check if Caffe2 was build" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "id": "3SQkqTLzbjWC" 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "# To check if Caffe2 build was successful\n", 53 | "!python -c 'from caffe2.python import core' 2>/dev/null && echo \"Success\" || echo \"Failure\"\n", 54 | "\n", 55 | "# To check if Caffe2 GPU build was successful\n", 56 | "!python -c 'from caffe2.python import workspace; print(workspace.NumCudaDevices())'" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "id": "iDxNoOIgZn4y" 63 | }, 64 | "source": [ 65 | "# Install COCO Dataset " 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "id": "PoEjPt55bqix" 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "!apt-get install python-dev\n", 77 | "!pip install cython\n", 78 | "!pip install pycocotools\n", 79 | "!git clone https://github.com/cocodataset/cocoapi.git\n", 80 | "!cd cocoapi/PythonAPI && make install\n", 81 | "\n", 82 | "import os\n", 83 | "os.environ['COCOAPI'] = \":/content/cocoapi\"" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "id": "RAVpAn6EZv3b" 90 | }, 91 | "source": [ 92 | "# Install Detectron" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "id": "kZQYD_SKbw0O" 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "!git clone https://github.com/facebookresearch/detectron\n", 104 | "!pip install -r detectron/requirements.txt\n", 105 | "!cd detectron && make\n", 106 | "!python detectron/detectron/tests/test_spatial_narrow_as_op.py" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "id": "FQOxtKmWZ5Ma" 113 | }, 114 | "source": [ 115 | "# Install VideoPose3D & Copy Video Script to Detectron Tools Folder" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "id": "6vRI4Rn3js85" 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "#copy file from VideoPose3d\n", 127 | "!git clone https://github.com/facebookresearch/VideoPose3D\n", 128 | "!cp VideoPose3D/inference/infer_video.py detectron/tools/infer_video.py" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": { 134 | "id": "GSDS81gsaNex" 135 | }, 136 | "source": [ 137 | "# Download Pretrained Human3.6m Coco Model" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "id": "av6RLcyPmuSH" 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "!mkdir VideoPose3D/checkpoint\n", 149 | "os.chdir('VideoPose3D/checkpoint')\n", 150 | "!wget https://dl.fbaipublicfiles.com/video-pose-3d/pretrained_h36m_detectron_coco.bin\n", 151 | "os.chdir('../..')" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": { 157 | "id": "popZ3evNaffh" 158 | }, 159 | "source": [ 160 | "# Download Youtube Video for 3D Pose Estimation (specify YOUTUBE_ID)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "id": "rDq3zWIfTCaj" 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "YOUTUBE_ID ='cgHZJiyWKIY'\n", 172 | "\n", 173 | "\n", 174 | "!pip install -q youtube-dl\n", 175 | "#download video\n", 176 | "!youtube-dl -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID\n", 177 | "\n", 178 | "!mkdir videos \n", 179 | " \n", 180 | "# cut the 14 seconds\n", 181 | "!ffmpeg -y -loglevel info -i youtube.mp4 -ss 00:00:48 -t 00:00:14 videos/video.mp4\n", 182 | "\n", 183 | "# recommendet alternation to 50fps \n", 184 | "#!ffmpeg -i videos/video.mp4 -filter \"minterpolate='fps=50'\" -crf 0 videos/video50fps.mp4" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": { 190 | "id": "2mN3zRgbbKxK" 191 | }, 192 | "source": [ 193 | "# Compute 2D Coordinates with Detectron" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": { 200 | "id": "2inleQL4Y4qg" 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "!mkdir output\n", 205 | "!python detectron/tools/infer_video.py \\\n", 206 | " --cfg detectron/configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml \\\n", 207 | " --output-dir output \\\n", 208 | " --image-ext mp4 \\\n", 209 | " --wts https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl \\\n", 210 | " videos\n", 211 | " \n", 212 | "#\t --wts https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl \\\n", 213 | "\n", 214 | " " 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "id": "lkedL1VTb_go" 221 | }, 222 | "source": [ 223 | "# Prepare Detectron Output to fit VideoPose3D Input" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "id": "QHrkZReqb2er" 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "\n", 235 | "!mkdir ./VideoPose3D/data/detectronoutput\n", 236 | "!cp output/video.mp4.npz VideoPose3D/data/detectronoutput/video.mp4.npz\n", 237 | "os.chdir('VideoPose3D/data') # This script must be launched from the \"data\" directory\n", 238 | "!python prepare_data_2d_custom.py -i detectronoutput -o myvideos\n", 239 | "os.chdir('../../')" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": { 245 | "id": "ktOkXbNbbiH4" 246 | }, 247 | "source": [ 248 | "# Compute 3D Joints with VideoPose3D" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": { 255 | "id": "J7Kp5czTfdRZ" 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "#os.chdir('../')\n", 260 | "\n", 261 | "#os.chdir('checkpoint')\n", 262 | "#!wget https://dl.fbaipublicfiles.com/video-pose-3d/pretrained_h36m_cpn.bin\n", 263 | "##!wget https://dl.fbaipublicfiles.com/video-pose-3d/d-pt-243.bin\n", 264 | "\n", 265 | "!cp ./videos/video.mp4 VideoPose3D/video.mp4\n", 266 | "os.chdir('VideoPose3D')\n", 267 | "\n", 268 | "\n", 269 | "#!python run.py -d custom -k MyCustomDatasetName -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject S1 --viz-action custom --viz-camera 0 --viz-export My3dDataExport --viz-size 6\n", 270 | "#!python run.py -d custom -k MyCustomDatasetName -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject video.mp4 --viz-action custom --viz-camera 0 --viz-video video.mp4 --viz-output output.mp4 --viz-size 6\n", 271 | "\n", 272 | "#!python run.py -e 80 -k gt -arc 3,3,3,3,3\n", 273 | "\n", 274 | "\n", 275 | "#!python run.py -d custom -k myvideos -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --viz-export My3dDataExport\n", 276 | "\n", 277 | "#!python run.py -d custom -k myvideos -arc 3,3,3,3,3 -c checkpoint --evaluate d-pt-243.bin --render --viz-subject video.mp4 --viz-action Directions --viz-video video.mp4 --viz-camera 0 --viz-output output_scater.mp4 --viz-size 5 --viz-downsample 1 --viz-skip 9\n", 278 | "\n", 279 | "#!python run.py -d custom -k myvideos -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject video.mp4 --viz-action custom --viz-camera 0 --viz-video video.mp4 --viz-output output.mp4 --viz-size 6\n", 280 | "!python run.py -d custom -k myvideos -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject video.mp4 --viz-action custom --viz-camera 0 --viz-video video.mp4 --viz-output output.mp4 --viz-export outputfile --viz-size 6\n", 281 | "\n", 282 | "#working version \n", 283 | "#!python run.py -k gt -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_cpn.bin --viz-export My3dDataExport --viz-output output.mp4" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": { 289 | "id": "Vd_nbXLfcJzY" 290 | }, 291 | "source": [ 292 | "# Display Results - Joint Export " 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": { 299 | "id": "mwHtOf9EE3Lg" 300 | }, 301 | "outputs": [], 302 | "source": [ 303 | "#inspect joints export \n", 304 | "\n", 305 | "import numpy as np\n", 306 | "data = np.load('outputfile.npy')\n", 307 | "lst = data\n", 308 | "for item in lst:\n", 309 | " print(item)\n", 310 | " \n" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": { 316 | "id": "wjDTuELfcVuZ" 317 | }, 318 | "source": [ 319 | "# Display Results - Joint Video" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": { 326 | "id": "rbImoAtIRxdu" 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "#display video\n", 331 | "def show_local_mp4_video(file_name, width=640, height=480):\n", 332 | " import io\n", 333 | " import base64\n", 334 | " from IPython.display import HTML\n", 335 | " video_encoded = base64.b64encode(io.open(file_name, 'rb').read())\n", 336 | " return HTML(data=''''''.format(width, height, video_encoded.decode('ascii')))\n", 339 | "\n", 340 | "show_local_mp4_video('output.mp4', width=960, height=720)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": { 346 | "id": "Yhw2Pe_HceM7" 347 | }, 348 | "source": [ 349 | "#Download Joint Export & Video" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": { 356 | "id": "Ljh6cuahb7kW" 357 | }, 358 | "outputs": [], 359 | "source": [ 360 | "from google.colab import files\n", 361 | "\n", 362 | "\n", 363 | "files.download('output.mp4')\n", 364 | "files.download('outputfile.npy')" 365 | ] 366 | } 367 | ], 368 | "metadata": { 369 | "accelerator": "GPU", 370 | "colab": { 371 | "collapsed_sections": [], 372 | "include_colab_link": true, 373 | "name": "VideoPose3D", 374 | "provenance": [], 375 | "toc_visible": true 376 | }, 377 | "kernelspec": { 378 | "display_name": "Python 3", 379 | "language": "python", 380 | "name": "python3" 381 | }, 382 | "language_info": { 383 | "codemirror_mode": { 384 | "name": "ipython", 385 | "version": 3 386 | }, 387 | "file_extension": ".py", 388 | "mimetype": "text/x-python", 389 | "name": "python", 390 | "nbconvert_exporter": "python", 391 | "pygments_lexer": "ipython3", 392 | "version": "3.7.9" 393 | } 394 | }, 395 | "nbformat": 4, 396 | "nbformat_minor": 1 397 | } 398 | -------------------------------------------------------------------------------- /DeepDream.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "F4RBFfIWNbG0" 17 | }, 18 | "source": [ 19 | "## セットアップ\n", 20 | "ライブラリーの読み込み、クラスと関数の定義" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "id": "qRScWg_VNqvj" 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import tensorflow as tf\n", 32 | "import numpy as np\n", 33 | "import matplotlib as mpl\n", 34 | "import IPython.display as display\n", 35 | "import PIL.Image\n", 36 | "from tensorflow.keras.preprocessing import image\n", 37 | "\n", 38 | "# Input image\n", 39 | "def input(image, max_dim=None):\n", 40 | " img = PIL.Image.open(image)\n", 41 | " if max_dim:\n", 42 | " img.thumbnail((max_dim, max_dim))\n", 43 | " return np.array(img)\n", 44 | "\n", 45 | "# Normalize an image\n", 46 | "def deprocess(img):\n", 47 | " img = 255*(img + 1.0)/2.0\n", 48 | " return tf.cast(img, tf.uint8)\n", 49 | "\n", 50 | "# Display an image\n", 51 | "def show(img):\n", 52 | " display.display(PIL.Image.fromarray(np.array(img)))\n", 53 | "\n", 54 | "# Calc loss\n", 55 | "def calc_loss(img, model):\n", 56 | " img_batch = tf.expand_dims(img, axis=0)\n", 57 | " layer_activations = model(img_batch)\n", 58 | " if len(layer_activations) == 1:\n", 59 | " layer_activations = [layer_activations]\n", 60 | "\n", 61 | " losses = []\n", 62 | " for act in layer_activations:\n", 63 | " loss = tf.math.reduce_mean(act)\n", 64 | " losses.append(loss)\n", 65 | "\n", 66 | " return tf.reduce_sum(losses)\n", 67 | "\n", 68 | "# Class DeepDream\n", 69 | "class DeepDream(tf.Module):\n", 70 | " def __init__(self, model):\n", 71 | " self.model = model\n", 72 | "\n", 73 | " @tf.function(\n", 74 | " input_signature=(\n", 75 | " tf.TensorSpec(shape=[None,None,3], dtype=tf.float32),\n", 76 | " tf.TensorSpec(shape=[], dtype=tf.int32),\n", 77 | " tf.TensorSpec(shape=[], dtype=tf.float32),)\n", 78 | " )\n", 79 | " def __call__(self, img, steps, step_size):\n", 80 | " loss = tf.constant(0.0)\n", 81 | " for n in tf.range(steps):\n", 82 | " with tf.GradientTape() as tape:\n", 83 | " tape.watch(img)\n", 84 | " loss = calc_loss(img, self.model)\n", 85 | "\n", 86 | " gradients = tape.gradient(loss, img)\n", 87 | " gradients /= tf.math.reduce_std(gradients) + 1e-8 \n", 88 | " \n", 89 | " img = img + gradients*step_size\n", 90 | " img = tf.clip_by_value(img, -1, 1)\n", 91 | "\n", 92 | " return loss, img\n", 93 | "\n", 94 | "# run_simple\n", 95 | "def run_deep_dream_simple(img, steps=100, step_size=0.01):\n", 96 | " img = tf.keras.applications.inception_v3.preprocess_input(img)\n", 97 | " img = tf.convert_to_tensor(img)\n", 98 | " step_size = tf.convert_to_tensor(step_size)\n", 99 | " steps_remaining = steps\n", 100 | " step = 0\n", 101 | " while steps_remaining:\n", 102 | " if steps_remaining>100:\n", 103 | " run_steps = tf.constant(100)\n", 104 | " else:\n", 105 | " run_steps = tf.constant(steps_remaining)\n", 106 | " steps_remaining -= run_steps\n", 107 | " step += run_steps\n", 108 | "\n", 109 | " loss, img = deepdream(img, run_steps, tf.constant(step_size))\n", 110 | "\n", 111 | " result = deprocess(img) \n", 112 | " return result\n", 113 | "\n", 114 | "# run_octave\n", 115 | "def octave(original_img):\n", 116 | " OCTAVE_SCALE = 1.30\n", 117 | " img = tf.constant(np.array(original_img))\n", 118 | " base_shape = tf.shape(img)[:-1]\n", 119 | " float_base_shape = tf.cast(base_shape, tf.float32)\n", 120 | "\n", 121 | " for n in range(-2, 3):\n", 122 | " new_shape = tf.cast(float_base_shape*(OCTAVE_SCALE**n), tf.int32)\n", 123 | " img = tf.image.resize(img, new_shape).numpy()\n", 124 | " img = run_deep_dream_simple(img=img, steps=50, step_size=0.01)\n", 125 | " img = tf.image.resize(img, base_shape) \n", 126 | " img = tf.image.convert_image_dtype(img/255.0, dtype=tf.uint8)\n", 127 | " return img" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": { 133 | "id": "f0WWczheOwDf" 134 | }, 135 | "source": [ 136 | "# サンプルデータのダウンロード" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "id": "5VsZijq0M7kW" 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "!git clone https://github.com/cedro3/Sample.git" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": { 153 | "id": "O2oFtzu-ETlo" 154 | }, 155 | "source": [ 156 | "## モデルの作成" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "id": "VkHkYEqbDC7E" 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "# ベースモデル InceptionV3 のダウンロード\n", 168 | "base_model = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet')\n", 169 | "\n", 170 | "# Maximize the activations of these layers\n", 171 | "names = ['mixed3', 'mixed5']\n", 172 | "layers = [base_model.get_layer(name).output for name in names]\n", 173 | "\n", 174 | "# Create the feature extraction model\n", 175 | "dream_model = tf.keras.Model(inputs=base_model.input, outputs=layers)\n", 176 | "\n", 177 | "# make model\n", 178 | "deepdream = DeepDream(dream_model)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "id": "B-VFUjetXFi-" 185 | }, 186 | "source": [ 187 | "# octave バージョン" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "id": "T39U0ZWSNDbi" 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "# 静止画をDeepDreamに変換(octave)\n", 199 | "original_img = input('./Sample/animal_pic/dog.png')\n", 200 | "img = octave(original_img)\n", 201 | "show(original_img)\n", 202 | "show(img)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "id": "OfT1RmgEUH9d" 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "# ビデオを静止画に変換\n", 214 | "import os\n", 215 | "import shutil\n", 216 | "import cv2\n", 217 | "\n", 218 | "# 既にimagesフォルダーがあれば削除\n", 219 | "if os.path.isdir('images'):\n", 220 | " shutil.rmtree('images')\n", 221 | "\n", 222 | "os.makedirs('images', exist_ok=True)\n", 223 | " \n", 224 | "def video_2_images(video_file= './Sample/video/elephant.mp4', # ビデオの指定\n", 225 | " image_dir='./images/', \n", 226 | " image_file='%s.png'):\n", 227 | " \n", 228 | " # Initial setting\n", 229 | " i = 0\n", 230 | " interval = 6\n", 231 | " length = 300 # 最大フレーム数\n", 232 | " \n", 233 | " cap = cv2.VideoCapture(video_file)\n", 234 | " while(cap.isOpened()):\n", 235 | " flag, frame = cap.read() \n", 236 | " if flag == False: \n", 237 | " break\n", 238 | " if i == length*interval:\n", 239 | " break\n", 240 | " if i % interval == 0: \n", 241 | " cv2.imwrite(image_dir+image_file % str(int(i/interval)).zfill(6), frame)\n", 242 | " i += 1 \n", 243 | " cap.release() \n", 244 | " \n", 245 | "def main():\n", 246 | " video_2_images()\n", 247 | " \n", 248 | "if __name__ == '__main__':\n", 249 | " main()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "id": "pRTrM2SZyoSh" 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "# 静止画をDeepDream画像へ変換(octave)\n", 261 | "import glob\n", 262 | "from tqdm import tqdm\n", 263 | "\n", 264 | "files=[]\n", 265 | "for name in sorted(glob.glob('./images/*.png')):\n", 266 | " files.append(name)\n", 267 | "\n", 268 | "for file in tqdm(files):\n", 269 | " original_img=input(file)\n", 270 | " dream_img = octave(original_img)\n", 271 | " PIL.Image.fromarray(np.array(dream_img)).save(file) " 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "id": "x8jrxvrgcJ2Q" 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "# DeepDream画像をmp4に変換\n", 283 | "!ffmpeg -r 6 -i images/%06d.png -vcodec libx264 -pix_fmt yuv420p output.mp4" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "id": "aVLTmQL7cJ_8" 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "# mp4動画の再生\n", 295 | "from IPython.display import HTML\n", 296 | "from base64 import b64encode\n", 297 | "\n", 298 | "mp4 = open('./output.mp4', 'rb').read()\n", 299 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n", 300 | "HTML(f\"\"\"\n", 301 | "\"\"\")" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": { 309 | "id": "pQ5t215rUPlS" 310 | }, 311 | "source": [ 312 | "# simple バージョン" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": { 319 | "id": "_xWdtQ_IR7CR" 320 | }, 321 | "outputs": [], 322 | "source": [ 323 | "# 静止画をDeepDreamに変換(simple)\n", 324 | "original_img = input('./Sample/animal_pic/dog.png')\n", 325 | "img = run_deep_dream_simple(original_img)\n", 326 | "show(original_img)\n", 327 | "show(img)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": { 334 | "id": "bG_RI44NTLhE" 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "# ビデオを静止画に変換\n", 339 | "import os\n", 340 | "import shutil\n", 341 | "import cv2\n", 342 | "\n", 343 | "# 既にimagesフォルダーがあれば削除\n", 344 | "if os.path.isdir('images'):\n", 345 | " shutil.rmtree('images')\n", 346 | "\n", 347 | "os.makedirs('images', exist_ok=True)\n", 348 | " \n", 349 | "def video_2_images(video_file= './Sample/video/elephant.mp4', # ビデオの指定\n", 350 | " image_dir='./images/', \n", 351 | " image_file='%s.png'):\n", 352 | " \n", 353 | " # Initial setting\n", 354 | " i = 0\n", 355 | " interval = 6\n", 356 | " length = 300 # 最大フレーム数\n", 357 | " \n", 358 | " cap = cv2.VideoCapture(video_file)\n", 359 | " while(cap.isOpened()):\n", 360 | " flag, frame = cap.read() \n", 361 | " if flag == False: \n", 362 | " break\n", 363 | " if i == length*interval:\n", 364 | " break\n", 365 | " if i % interval == 0: \n", 366 | " cv2.imwrite(image_dir+image_file % str(int(i/interval)).zfill(6), frame)\n", 367 | " i += 1 \n", 368 | " cap.release() \n", 369 | " \n", 370 | "def main():\n", 371 | " video_2_images()\n", 372 | " \n", 373 | "if __name__ == '__main__':\n", 374 | " main()" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": { 381 | "id": "gcyGJkVFtDsK" 382 | }, 383 | "outputs": [], 384 | "source": [ 385 | "# 静止画をDeepDream画像へ変換(simple)\n", 386 | "import glob\n", 387 | "from tqdm import tqdm\n", 388 | "\n", 389 | "files=[]\n", 390 | "for name in sorted(glob.glob('./images/*.png')):\n", 391 | " files.append(name)\n", 392 | "\n", 393 | "for file in tqdm(files):\n", 394 | " original_img=input(file)\n", 395 | " dream_img = run_deep_dream_simple(img=original_img, steps=100, step_size=0.01)\n", 396 | " PIL.Image.fromarray(np.array(dream_img)).save(file) " 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "id": "macVyBdETipx" 404 | }, 405 | "outputs": [], 406 | "source": [ 407 | "# DeepDream画像をmp4に変換\n", 408 | "!ffmpeg -r 6 -i images/%06d.png -vcodec libx264 -pix_fmt yuv420p output2.mp4" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": { 415 | "id": "m8jIlhi9Tt-H" 416 | }, 417 | "outputs": [], 418 | "source": [ 419 | "# mp4動画の再生\n", 420 | "from IPython.display import HTML\n", 421 | "from base64 import b64encode\n", 422 | "\n", 423 | "mp4 = open('./output2.mp4', 'rb').read()\n", 424 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n", 425 | "HTML(f\"\"\"\n", 426 | "\"\"\")" 429 | ] 430 | } 431 | ], 432 | "metadata": { 433 | "accelerator": "GPU", 434 | "colab": { 435 | "collapsed_sections": [], 436 | "include_colab_link": true, 437 | "name": "DeepDream", 438 | "provenance": [] 439 | }, 440 | "kernelspec": { 441 | "display_name": "Python 3", 442 | "language": "python", 443 | "name": "python3" 444 | }, 445 | "language_info": { 446 | "codemirror_mode": { 447 | "name": "ipython", 448 | "version": 3 449 | }, 450 | "file_extension": ".py", 451 | "mimetype": "text/x-python", 452 | "name": "python", 453 | "nbconvert_exporter": "python", 454 | "pygments_lexer": "ipython3", 455 | "version": "3.7.9" 456 | } 457 | }, 458 | "nbformat": 4, 459 | "nbformat_minor": 1 460 | } 461 | -------------------------------------------------------------------------------- /infinite_nature_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "infinite_nature_demo", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "display_name": "Python 3", 13 | "name": "python3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "EvBQf1UZNu0j" 32 | }, 33 | "source": [ 34 | "# セットアップ" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "metadata": { 40 | "id": "hypYi7EJNxJ6", 41 | "cellView": "form" 42 | }, 43 | "source": [ 44 | "#@title download model & install library\n", 45 | "%%shell\n", 46 | "echo Fetching code from github...\n", 47 | "\n", 48 | "apt install subversion\n", 49 | "svn export --force https://github.com/google-research/google-research/trunk/infinite_nature\n", 50 | "\n", 51 | "echo\n", 52 | "echo Fetching trained model weights...\n", 53 | "rm -f autocruise_input*.pkl\n", 54 | "rm -f ckpt.tar.gz\n", 55 | "rm -rf ckpt\n", 56 | "wget https://storage.googleapis.com/gresearch/infinite_nature_public/autocruise_input1.pkl\n", 57 | "wget https://storage.googleapis.com/gresearch/infinite_nature_public/autocruise_input2.pkl\n", 58 | "wget https://storage.googleapis.com/gresearch/infinite_nature_public/autocruise_input3.pkl\n", 59 | "wget https://storage.googleapis.com/gresearch/infinite_nature_public/ckpt.tar.gz\n", 60 | "tar -xf ckpt.tar.gz\n", 61 | "\n", 62 | "echo\n", 63 | "echo Installing required dependencies...\n", 64 | "pip install -r infinite_nature/requirements.txt\n", 65 | "\n", 66 | "echo\n", 67 | "echo Fetching tf_mesh_renderer and compiling kernels...\n", 68 | "cd infinite_nature\n", 69 | "rm -rf tf_mesh_renderer\n", 70 | "source download_tf_mesh_renderer.sh\n", 71 | "\n", 72 | "echo Done.\n" 73 | ], 74 | "execution_count": null, 75 | "outputs": [] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "metadata": { 80 | "id": "08MXs7cBPDwO", 81 | "cellView": "form" 82 | }, 83 | "source": [ 84 | "#@title setting path\n", 85 | "import tensorflow as tf\n", 86 | "import os\n", 87 | "import sys\n", 88 | "\n", 89 | "# Make sure dynamic linking can find tensorflow libraries.\n", 90 | "os.system('ldconfig ' + tf.sysconfig.get_lib())\n", 91 | "\n", 92 | "# Make sure python can find our libraries.\n", 93 | "sys.path.append('infinite_nature')\n", 94 | "sys.path.append('infinite_nature/tf_mesh_renderer/mesh_renderer')\n", 95 | "\n", 96 | "# Make sure the mesh renderer library knows where to load its .so file from.\n", 97 | "os.environ['TEST_SRCDIR'] = 'infinite_nature'" 98 | ], 99 | "execution_count": null, 100 | "outputs": [] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "metadata": { 105 | "id": "nvJVkxMbGy6D", 106 | "cellView": "form" 107 | }, 108 | "source": [ 109 | "#@title build model & difine function\n", 110 | "import imageio\n", 111 | "import IPython\n", 112 | "import numpy as np\n", 113 | "import pickle\n", 114 | "import tensorflow as tf\n", 115 | "import tensorflow_hub as hub\n", 116 | "\n", 117 | "import config\n", 118 | "import fly_camera\n", 119 | "import infinite_nature_lib\n", 120 | "from PIL import Image ###\n", 121 | "\n", 122 | "# Build model and restore checkpoint.\n", 123 | "config.set_training(False)\n", 124 | "model_path = \"ckpt/model.ckpt-6935893\"\n", 125 | "render_refine, style_encoding = infinite_nature_lib.load_model(model_path)\n", 126 | "initial_rgbds = [\n", 127 | " pickle.load(open(\"autocruise_input1.pkl\", \"rb\"))['input_rgbd'],\n", 128 | " pickle.load(open(\"autocruise_input2.pkl\", \"rb\"))['input_rgbd'],\n", 129 | " pickle.load(open(\"autocruise_input3.pkl\", \"rb\"))['input_rgbd']]\n", 130 | "\n", 131 | "# Code for an autopilot demo. We expose two functions that will be invoked\n", 132 | "# from an HTML/JS frontend: reset and step.\n", 133 | "\n", 134 | "# The state that we need to remember while flying:\n", 135 | "state = {\n", 136 | " 'intrinsics': None,\n", 137 | " 'pose': None,\n", 138 | " 'rgbd': None,\n", 139 | " 'start_rgbd': None,\n", 140 | " 'style_noise': None,\n", 141 | " 'next_pose_function': None,\n", 142 | " 'direction_offset': None, # Direction controlled by user's mouse clicks.\n", 143 | "}\n", 144 | "\n", 145 | "def current_image_as_png():\n", 146 | " imgdata = tf.image.encode_png(\n", 147 | " tf.image.convert_image_dtype(state['rgbd'][..., :3], dtype=tf.uint8))\n", 148 | " \n", 149 | " img = IPython.display.Image(data=imgdata.numpy()) \n", 150 | " global cnt\n", 151 | " with open('pic/'+str(cnt).zfill(6)+'.png', 'wb') as png:\n", 152 | " png.write(img.data)\n", 153 | " print('\\r{0}'.format(cnt), end='')\n", 154 | " cnt += 1\n", 155 | "\n", 156 | " return IPython.display.Image(data=imgdata.numpy())\n", 157 | "\n", 158 | "def reset(rgbd=None):\n", 159 | " if rgbd is None:\n", 160 | " rgbd = state['start_rgbd']\n", 161 | "\n", 162 | " height, width, _ = rgbd.shape\n", 163 | " aspect_ratio = width / float(height)\n", 164 | "\n", 165 | " rgbd = tf.image.resize(rgbd, [160, 256])\n", 166 | " state['rgbd'] = rgbd\n", 167 | " state['start_rgbd'] = rgbd\n", 168 | " state['pose'] = np.array(\n", 169 | " [[1.0, 0.0, 0.0, 0.0],\n", 170 | " [0.0, 1.0, 0.0, 0.0],\n", 171 | " [0.0, 0.0, 1.0, 0.0]],\n", 172 | " dtype=np.float32)\n", 173 | " # 0.8 focal_x corresponds to a FOV of ~64 degrees.\n", 174 | " state['intrinsics'] = np.array(\n", 175 | " [0.8, 0.8 * aspect_ratio, .5, .5],\n", 176 | " dtype=np.float32)\n", 177 | " state['direction_offset'] = (0.0, 0.0)\n", 178 | " state['style_noise'] = style_encoding(rgbd)\n", 179 | " state['next_pose_function'] = fly_camera.fly_dynamic(\n", 180 | " state['intrinsics'],\n", 181 | " state['pose'],\n", 182 | " turn_function=(lambda _: state['direction_offset']))\n", 183 | " return current_image_as_png()\n", 184 | "\n", 185 | "\n", 186 | "def step(offsetx, offsety):\n", 187 | " state['direction_offset'] = (offsetx, offsety)\n", 188 | " next_pose = state['next_pose_function'](state['rgbd'])\n", 189 | " next_rgbd = render_refine(\n", 190 | " state['rgbd'], state['style_noise'],\n", 191 | " state['pose'], state['intrinsics'],\n", 192 | " next_pose, state['intrinsics'])\n", 193 | " state['pose'] = next_pose\n", 194 | " state['rgbd'] = next_rgbd\n", 195 | " return current_image_as_png()\n", 196 | "\n", 197 | "\n", 198 | "# To run on user-supplied images, we use MiDaS V2 to obtain initial disparity.\n", 199 | "midas_model = hub.load('https://tfhub.dev/intel/midas/v2/2', tags=['serve'])\n", 200 | "\n", 201 | "def midas_disparity(rgb):\n", 202 | " \"\"\"Computes MiDaS v2 disparity on an RGB input image.\n", 203 | "\n", 204 | " Args:\n", 205 | " rgb: [H, W, 3] Range [0.0, 1.0].\n", 206 | " Returns:\n", 207 | " [H, W, 1] MiDaS disparity resized to the input size and in the range\n", 208 | " [0.0, 1.0]\n", 209 | " \"\"\"\n", 210 | " size = rgb.shape[:2]\n", 211 | " resized = tf.image.resize(rgb, [384, 384], tf.image.ResizeMethod.BICUBIC) #384, 384\n", 212 | " # MiDaS networks wants [1, C, H, W]\n", 213 | " midas_input = tf.transpose(resized, [2, 0, 1])[tf.newaxis]\n", 214 | " prediction = midas_model.signatures['serving_default'](midas_input)['default'][0]\n", 215 | " disp_min = tf.reduce_min(prediction)\n", 216 | " disp_max = tf.reduce_max(prediction)\n", 217 | " prediction = (prediction - disp_min) / (disp_max - disp_min)\n", 218 | " return tf.image.resize(\n", 219 | " prediction[..., tf.newaxis], size, method=tf.image.ResizeMethod.AREA)\n", 220 | "\n", 221 | "\n", 222 | "def load_initial(i):\n", 223 | " return reset(rgbd=initial_rgbds[i])\n", 224 | "\n", 225 | "\n", 226 | "def load_image(data):\n", 227 | " # Data converted from JS ends up as a string, needs to be converted to\n", 228 | " # bytes using Latin-1 encoding (which just maps 0-255 to 0-255).\n", 229 | " data = data.encode('Latin-1')\n", 230 | " rgb = tf.image.decode_image(data, channels=3, dtype=tf.float32)\n", 231 | " resized = tf.image.resize(rgb, [160, 256], tf.image.ResizeMethod.AREA)\n", 232 | " rgbd = tf.concat([resized, midas_disparity(resized)], axis=-1)\n", 233 | " return reset(rgbd=rgbd)\n" 234 | ], 235 | "execution_count": null, 236 | "outputs": [] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "metadata": { 241 | "id": "sCuRX1liUEVM", 242 | "cellView": "form" 243 | }, 244 | "source": [ 245 | "#@title setting html\n", 246 | "import IPython\n", 247 | "from google.colab import output\n", 248 | "\n", 249 | "# The front-end for our interactive demo.\n", 250 | "\n", 251 | "html='''\n", 252 | "\n", 307 | "

Infinite Nature interactive demo

\n", 308 | "
\n", 309 | "
\n", 310 | "Click Play to run or Step to advance frame by frame.\n", 311 | "Click mouse over image to steer.

\n", 312 | "
Restart
Play
Pause
Step
\n", 313 | "

\n", 314 | "Select starting image (be patient…):

\n", 315 | "
Image 1
Image 2
Image 3
Upload…

\n", 316 | "\n", 317 | "
\n", 318 | "\n", 417 | "'''\n" 418 | ], 419 | "execution_count": null, 420 | "outputs": [] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": { 425 | "id": "RHzgjz_nqoca" 426 | }, 427 | "source": [ 428 | "# ビデオ生成" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "metadata": { 434 | "cellView": "form", 435 | "id": "EX8id-WDdLnM" 436 | }, 437 | "source": [ 438 | "#@title 1. 静止画保存フォルダークリア\n", 439 | "import os\n", 440 | "import shutil\n", 441 | "if os.path.isdir('pic'):\n", 442 | " shutil.rmtree('pic')\n", 443 | "os.makedirs('pic', exist_ok=True)\n", 444 | "cnt = 0" 445 | ], 446 | "execution_count": null, 447 | "outputs": [] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "metadata": { 452 | "id": "X9kIasyhmXhe", 453 | "cellView": "form" 454 | }, 455 | "source": [ 456 | "#@title 2. インタラクティブデモ(静止画自動保存)\n", 457 | "display(IPython.display.HTML(html))\n", 458 | "output.register_callback('load_initial', load_initial)\n", 459 | "output.register_callback('load_image', load_image)\n", 460 | "output.register_callback('reset', reset)\n", 461 | "output.register_callback('step', step)" 462 | ], 463 | "execution_count": null, 464 | "outputs": [] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "metadata": { 469 | "id": "GT6Kl3lGuH-G", 470 | "cellView": "form" 471 | }, 472 | "source": [ 473 | "#@title 3. 静止画からmp4動画を作成\n", 474 | "# 既に output.mp4 があれば削除\n", 475 | "import os\n", 476 | "if os.path.exists('./output.mp4'):\n", 477 | " os.remove('./output.mp4')\n", 478 | "\n", 479 | "# pic フォルダーの静止画から動画を作成\n", 480 | "! ffmpeg -r 10 -i pic/%6d.png\\\n", 481 | " -vcodec libx264 -pix_fmt yuv420p output.mp4" 482 | ], 483 | "execution_count": null, 484 | "outputs": [] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "metadata": { 489 | "id": "laulM36fxr-3", 490 | "cellView": "form" 491 | }, 492 | "source": [ 493 | "#@title 4. mp4動画の再生\n", 494 | "from IPython.display import HTML\n", 495 | "from base64 import b64encode\n", 496 | "\n", 497 | "mp4 = open('./output.mp4', 'rb').read()\n", 498 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n", 499 | "HTML(f\"\"\"\n", 500 | "\"\"\")" 503 | ], 504 | "execution_count": null, 505 | "outputs": [] 506 | } 507 | ] 508 | } -------------------------------------------------------------------------------- /PIFuHD_movie.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "PIFuHD_movie", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "WYhlsDkg1Hwb" 32 | }, 33 | "source": [ 34 | "## セットアップ" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "metadata": { 40 | "id": "8B1jmr82DtjG" 41 | }, 42 | "source": [ 43 | "# ライブラリー取得\n", 44 | "!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n", 45 | "!pip install pytorch3d\n", 46 | "\n", 47 | "# githubからpifuhdのコードをコピー\n", 48 | "!git clone https://github.com/facebookresearch/pifuhd\n", 49 | "\n", 50 | "# githubからpose-estimationのコードをコピーし、学習済み重みをダウンロード\n", 51 | "!git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git\n", 52 | "%cd /content/lightweight-human-pose-estimation.pytorch/\n", 53 | "!wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth\n", 54 | "\n", 55 | "# pifuhdの学習済み重みをダンロード\n", 56 | "%cd /content/pifuhd/\n", 57 | "!sh ./scripts/download_trained_model.sh\n", 58 | "\n", 59 | "# サンプルビデオダウンロード\n", 60 | "import gdown\n", 61 | "gdown.download('https://drive.google.com/uc?id=1rrccXA-k-45cUx1MDoLH6O3M4RF5pA_E', 'movie.zip', quiet=False)\n", 62 | "! unzip movie.zip" 63 | ], 64 | "execution_count": null, 65 | "outputs": [] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "id": "QvQm-A8ESKb2" 71 | }, 72 | "source": [ 73 | "## サンプルビデオから静止画を切り出す\n", 74 | "video_file = : ビデオ指定\\\n", 75 | "interval = : 静止画の切り出し間隔" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "metadata": { 81 | "id": "E0iVZpOdwC3d" 82 | }, 83 | "source": [ 84 | "import os\n", 85 | "import shutil\n", 86 | "import cv2\n", 87 | " \n", 88 | "# imagesフォルダーリセット\n", 89 | "if os.path.isdir('images'):\n", 90 | " shutil.rmtree('images') \n", 91 | "os.makedirs('images', exist_ok=True)\n", 92 | " \n", 93 | "def video_2_images(video_file= './movie/01.mp4', \n", 94 | " image_dir='./images/', \n", 95 | " image_file='%s.png'):\n", 96 | " \n", 97 | " # Initial setting\n", 98 | " i = 0\n", 99 | " interval = 3\n", 100 | " length = 300 # リミッター\n", 101 | " \n", 102 | " cap = cv2.VideoCapture(video_file)\n", 103 | " while(cap.isOpened()):\n", 104 | " flag, frame = cap.read() \n", 105 | " if flag == False: \n", 106 | " break\n", 107 | " if i == length*interval:\n", 108 | " break\n", 109 | " if i % interval == 0: \n", 110 | " cv2.imwrite(image_dir+image_file % str(int(i/interval)).zfill(6), frame)\n", 111 | " i += 1 \n", 112 | " cap.release() \n", 113 | " \n", 114 | "video_2_images()\n", 115 | "list_d = os.listdir('./images')\n", 116 | "num = len(list_d)\n", 117 | "print(num)" 118 | ], 119 | "execution_count": null, 120 | "outputs": [] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": { 125 | "id": "0x-pUOPT6bIM" 126 | }, 127 | "source": [ 128 | "## クロップ" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "metadata": { 134 | "id": "PdRcDXe38lHB" 135 | }, 136 | "source": [ 137 | "%cd /content/lightweight-human-pose-estimation.pytorch/\n", 138 | "import torch\n", 139 | "import cv2\n", 140 | "import numpy as np\n", 141 | "from models.with_mobilenet import PoseEstimationWithMobileNet\n", 142 | "from modules.keypoints import extract_keypoints, group_keypoints\n", 143 | "from modules.load_state import load_state\n", 144 | "from modules.pose import Pose, track_poses\n", 145 | "import demo\n", 146 | "\n", 147 | "def get_rect(net, images, height_size):\n", 148 | " net = net.eval()\n", 149 | "\n", 150 | " stride = 8\n", 151 | " upsample_ratio = 4\n", 152 | " num_keypoints = Pose.num_kpts\n", 153 | " previous_poses = []\n", 154 | " delay = 33\n", 155 | " for image in images:\n", 156 | " rect_path = image.replace('.%s' % (image.split('.')[-1]), '_rect.txt')\n", 157 | " img = cv2.imread(image, cv2.IMREAD_COLOR)\n", 158 | " orig_img = img.copy()\n", 159 | " orig_img = img.copy()\n", 160 | " heatmaps, pafs, scale, pad = demo.infer_fast(net, img, height_size, stride, upsample_ratio, cpu=False)\n", 161 | "\n", 162 | " total_keypoints_num = 0\n", 163 | " all_keypoints_by_type = []\n", 164 | " for kpt_idx in range(num_keypoints): # 19th for bg\n", 165 | " total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)\n", 166 | "\n", 167 | " pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) #, demo=True)\n", 168 | " for kpt_id in range(all_keypoints.shape[0]):\n", 169 | " all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale\n", 170 | " all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale\n", 171 | " current_poses = []\n", 172 | "\n", 173 | " rects = []\n", 174 | " for n in range(len(pose_entries)):\n", 175 | " if len(pose_entries[n]) == 0:\n", 176 | " continue\n", 177 | " pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1\n", 178 | " valid_keypoints = []\n", 179 | " for kpt_id in range(num_keypoints):\n", 180 | " if pose_entries[n][kpt_id] != -1.0: # keypoint was found\n", 181 | " pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])\n", 182 | " pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])\n", 183 | " valid_keypoints.append([pose_keypoints[kpt_id, 0], pose_keypoints[kpt_id, 1]])\n", 184 | " valid_keypoints = np.array(valid_keypoints)\n", 185 | " \n", 186 | " if pose_entries[n][10] != -1.0 or pose_entries[n][13] != -1.0:\n", 187 | " pmin = valid_keypoints.min(0)\n", 188 | " pmax = valid_keypoints.max(0)\n", 189 | "\n", 190 | " center = (0.5 * (pmax[:2] + pmin[:2])).astype(np.int)\n", 191 | " radius = int(0.65 * max(pmax[0]-pmin[0], pmax[1]-pmin[1]))\n", 192 | " elif pose_entries[n][10] == -1.0 and pose_entries[n][13] == -1.0 and pose_entries[n][8] != -1.0 and pose_entries[n][11] != -1.0:\n", 193 | " # if leg is missing, use pelvis to get cropping\n", 194 | " center = (0.5 * (pose_keypoints[8] + pose_keypoints[11])).astype(np.int)\n", 195 | " radius = int(1.45*np.sqrt(((center[None,:] - valid_keypoints)**2).sum(1)).max(0))\n", 196 | " center[1] += int(0.05*radius)\n", 197 | " else:\n", 198 | " center = np.array([img.shape[1]//2,img.shape[0]//2])\n", 199 | " radius = max(img.shape[1]//2,img.shape[0]//2)\n", 200 | "\n", 201 | " x1 = center[0] - radius\n", 202 | " y1 = center[1] - radius\n", 203 | "\n", 204 | " rects.append([x1, y1, 2*radius, 2*radius])\n", 205 | "\n", 206 | " np.savetxt(rect_path, np.array(rects), fmt='%d')\n", 207 | "\n", 208 | "net = PoseEstimationWithMobileNet()\n", 209 | "checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu')\n", 210 | "load_state(net, checkpoint)\n", 211 | "\n", 212 | "# get_rect ループ\n", 213 | "from tqdm import trange\n", 214 | "for i in trange(num):\n", 215 | " image_path = '/content/pifuhd/images/'+str(i).zfill(6)+'.png'\n", 216 | " get_rect(net.cuda(), [image_path], 512)\n", 217 | "\n", 218 | "# 全ての rect.txt を同じ値に揃える\n", 219 | "import shutil\n", 220 | "for i in range(1,num):\n", 221 | " shutil.copyfile('/content/pifuhd/images/000000_rect.txt', '/content/pifuhd/images/'+str(i).zfill(6)+'_rect.txt')" 222 | ], 223 | "execution_count": null, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "id": "FlWPwwuj6fTb" 230 | }, 231 | "source": [ 232 | "## objファイルの作成" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "metadata": { 238 | "id": "40UE4Cp37HV1" 239 | }, 240 | "source": [ 241 | "import os\n", 242 | "import shutil\n", 243 | "\n", 244 | "# results/pifuhd_final/reconフォルダーリセット\n", 245 | "if os.path.isdir('results/pifuhd_final/recon'):\n", 246 | " shutil.rmtree('results/pifuhd_final/recon') \n", 247 | "os.makedirs('results/pifuhd_final/recon', exist_ok=True)\n", 248 | "\n", 249 | "# objファイル作成\n", 250 | "%cd /content/pifuhd/\n", 251 | "!python -m apps.simple_test -r 256 --use_rect -i './images'\n" 252 | ], 253 | "execution_count": null, 254 | "outputs": [] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": { 259 | "id": "gFR6HYh36sEU" 260 | }, 261 | "source": [ 262 | "## objファイルから3Dモデル画像生成" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": { 268 | "id": "bELoZ47I1NW-" 269 | }, 270 | "source": [ 271 | "**関数定義**" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "metadata": { 277 | "id": "zcEs-e3zX2xd" 278 | }, 279 | "source": [ 280 | "import io\n", 281 | "import os\n", 282 | "import torch\n", 283 | "from skimage.io import imread\n", 284 | "import numpy as np\n", 285 | "import cv2\n", 286 | "from tqdm import tqdm_notebook as tqdm\n", 287 | "import base64\n", 288 | "from IPython.display import HTML\n", 289 | "\n", 290 | "# Util function for loading meshes\n", 291 | "from pytorch3d.io import load_objs_as_meshes\n", 292 | "\n", 293 | "from IPython.display import HTML\n", 294 | "from base64 import b64encode\n", 295 | "\n", 296 | "# Data structures and functions for rendering\n", 297 | "from pytorch3d.structures import Meshes\n", 298 | "from pytorch3d.renderer import (\n", 299 | " look_at_view_transform,\n", 300 | " OpenGLOrthographicCameras, \n", 301 | " PointLights, \n", 302 | " DirectionalLights, \n", 303 | " Materials, \n", 304 | " RasterizationSettings, \n", 305 | " MeshRenderer, \n", 306 | " MeshRasterizer, \n", 307 | " HardPhongShader,\n", 308 | " TexturesVertex\n", 309 | ")\n", 310 | "\n", 311 | "def set_renderer():\n", 312 | " # Setup\n", 313 | " device = torch.device(\"cuda:0\")\n", 314 | " torch.cuda.set_device(device)\n", 315 | "\n", 316 | " # Initialize an OpenGL perspective camera.\n", 317 | " R, T = look_at_view_transform(2.0, 0, 180) \n", 318 | " cameras = OpenGLOrthographicCameras(device=device, R=R, T=T)\n", 319 | "\n", 320 | " raster_settings = RasterizationSettings(\n", 321 | " image_size=512, \n", 322 | " blur_radius=0.0, \n", 323 | " faces_per_pixel=1, \n", 324 | " bin_size = None, \n", 325 | " max_faces_per_bin = None\n", 326 | " )\n", 327 | "\n", 328 | " lights = PointLights(device=device, location=((2.0, 2.0, 2.0),))\n", 329 | "\n", 330 | " renderer = MeshRenderer(\n", 331 | " rasterizer=MeshRasterizer(\n", 332 | " cameras=cameras, \n", 333 | " raster_settings=raster_settings\n", 334 | " ),\n", 335 | " shader=HardPhongShader(\n", 336 | " device=device, \n", 337 | " cameras=cameras,\n", 338 | " lights=lights\n", 339 | " )\n", 340 | " )\n", 341 | " return renderer\n", 342 | "\n", 343 | "def get_verts_rgb_colors(obj_path):\n", 344 | " rgb_colors = []\n", 345 | "\n", 346 | " f = open(obj_path)\n", 347 | " lines = f.readlines()\n", 348 | " for line in lines:\n", 349 | " ls = line.split(' ')\n", 350 | " if len(ls) == 7:\n", 351 | " rgb_colors.append(ls[-3:])\n", 352 | "\n", 353 | " return np.array(rgb_colors, dtype='float32')[None, :, :]\n", 354 | "\n", 355 | "def generate_image_from_obj(obj_path, image_path, renderer): \n", 356 | " input_image = cv2.imread(image_path)\n", 357 | " input_image = input_image[:,:input_image.shape[1]//3]\n", 358 | " input_image = cv2.resize(input_image, (512,512))\n", 359 | "\n", 360 | " # Setup\n", 361 | " device = torch.device(\"cuda:0\")\n", 362 | " torch.cuda.set_device(device)\n", 363 | "\n", 364 | " # Load obj file\n", 365 | " verts_rgb_colors = get_verts_rgb_colors(obj_path)\n", 366 | " verts_rgb_colors = torch.from_numpy(verts_rgb_colors).to(device)\n", 367 | " textures = TexturesVertex(verts_features=verts_rgb_colors)\n", 368 | "\n", 369 | " # Load obj\n", 370 | " mesh = load_objs_as_meshes([obj_path], device=device)\n", 371 | "\n", 372 | " # Set mesh\n", 373 | " vers = mesh._verts_list\n", 374 | " faces = mesh._faces_list\n", 375 | " mesh_w_tex = Meshes(vers, faces, textures)\n", 376 | "\n", 377 | " R, T = look_at_view_transform(1.8, 0, 0, device=device)\n", 378 | " images_w_tex = renderer(mesh_w_tex, R=R, T=T)\n", 379 | " images_w_tex = np.clip(images_w_tex[0, ..., :3].cpu().numpy(), 0.0, 1.0)[:, :, ::-1] * 255\n", 380 | " cv2.imwrite('./out1/'+filename, images_w_tex) \n", 381 | "\n", 382 | " image = np.concatenate([input_image, images_w_tex], axis=1)\n", 383 | " cv2.imwrite('./out2/'+filename, image)\n" 384 | ], 385 | "execution_count": null, 386 | "outputs": [] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": { 391 | "id": "uS8khy7e1T4c" 392 | }, 393 | "source": [ 394 | "**3Dモデル画像生成**" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "metadata": { 400 | "id": "V-ajEW-eepOl" 401 | }, 402 | "source": [ 403 | "import os\n", 404 | "import shutil\n", 405 | "from tqdm import trange\n", 406 | "\n", 407 | "if os.path.isdir('out1'):\n", 408 | " shutil.rmtree('out1')\n", 409 | "os.makedirs('out1', exist_ok=True)\n", 410 | "\n", 411 | "if os.path.isdir('out2'):\n", 412 | " shutil.rmtree('out2')\n", 413 | "os.makedirs('out2', exist_ok=True)\n", 414 | "\n", 415 | "for i in trange(num):\n", 416 | " filename = str(i).zfill(6)+'.png'\n", 417 | " out_img_path = '/content/pifuhd/results/pifuhd_final/recon/result_'+str(i).zfill(6)+'_256.png'\n", 418 | " obj_path = '/content/pifuhd/results/pifuhd_final/recon/result_'+str(i).zfill(6)+'_256.obj'\n", 419 | " renderer = set_renderer()\n", 420 | " generate_image_from_obj(obj_path, out_img_path, renderer)" 421 | ], 422 | "execution_count": null, 423 | "outputs": [] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": { 428 | "id": "ilbSPGC86x86" 429 | }, 430 | "source": [ 431 | "## 画像から動画の生成" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": { 437 | "id": "_nHKzjrG4B_N" 438 | }, 439 | "source": [ 440 | "**3Dモデル**" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "metadata": { 446 | "id": "PqjTPzNf8fnN" 447 | }, 448 | "source": [ 449 | "# 既に output1.mp4 があれば削除\n", 450 | "import os\n", 451 | "if os.path.exists('./output1.mp4'):\n", 452 | " os.remove('./output1.mp4')\n", 453 | "\n", 454 | "! ffmpeg -r 10 -i out1/%6d.png\\\n", 455 | " -vcodec libx264 -pix_fmt yuv420p output1.mp4" 456 | ], 457 | "execution_count": null, 458 | "outputs": [] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "metadata": { 463 | "id": "CEF9wEih3bCz" 464 | }, 465 | "source": [ 466 | "# --- mp4動画の再生 ---\n", 467 | "from IPython.display import HTML\n", 468 | "from base64 import b64encode\n", 469 | "\n", 470 | "mp4 = open('./output1.mp4', 'rb').read()\n", 471 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n", 472 | "HTML(f\"\"\"\n", 473 | "\"\"\")" 476 | ], 477 | "execution_count": null, 478 | "outputs": [] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "metadata": { 483 | "id": "fUDyy7Bs4UE0" 484 | }, 485 | "source": [ 486 | "**ビデオ+3Dモデル**" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "metadata": { 492 | "id": "jtEeBO8N20w7" 493 | }, 494 | "source": [ 495 | "# 既に output2.mp4 があれば削除\n", 496 | "import os\n", 497 | "if os.path.exists('./output2.mp4'):\n", 498 | " os.remove('./output2.mp4')\n", 499 | "\n", 500 | "! ffmpeg -r 10 -i out2/%6d.png\\\n", 501 | " -vcodec libx264 -pix_fmt yuv420p output2.mp4" 502 | ], 503 | "execution_count": null, 504 | "outputs": [] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "metadata": { 509 | "id": "nzvADdr43dt4" 510 | }, 511 | "source": [ 512 | "# --- mp4動画の再生 ---\n", 513 | "from IPython.display import HTML\n", 514 | "from base64 import b64encode\n", 515 | "\n", 516 | "mp4 = open('./output2.mp4', 'rb').read()\n", 517 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n", 518 | "HTML(f\"\"\"\n", 519 | "\"\"\")" 522 | ], 523 | "execution_count": null, 524 | "outputs": [] 525 | } 526 | ] 527 | } --------------------------------------------------------------------------------