├── Big_Sleep.ipynb
├── pymaf_demo.ipynb
├── LSE_OpenPose.ipynb
├── CLIP_search.ipynb
├── StyleCLIP_global.ipynb
├── PIFuHD_Demo.ipynb
├── DALL_E_demo.ipynb
├── ArtLine_make_gif.ipynb
├── CLIP_demo.ipynb
├── DALL_E.ipynb
├── DALL_e_sample.ipynb
├── SwapAE.ipynb
├── VideoPose3D.ipynb
├── DeepDream.ipynb
├── infinite_nature_demo.ipynb
└── PIFuHD_movie.ipynb
/Big_Sleep.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Big Sleep",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "toc_visible": true,
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "accelerator": "GPU"
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {
22 | "id": "view-in-github",
23 | "colab_type": "text"
24 | },
25 | "source": [
26 | ""
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "w5HPIGUSA9jf"
33 | },
34 | "source": [
35 | "# セットアップ"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "metadata": {
41 | "id": "9c-dAOUylyYt"
42 | },
43 | "source": [
44 | "# 接続GPUのチェック\n",
45 | "! nvidia-smi -L"
46 | ],
47 | "execution_count": null,
48 | "outputs": []
49 | },
50 | {
51 | "cell_type": "code",
52 | "metadata": {
53 | "id": "D2jUsCZXaqcw"
54 | },
55 | "source": [
56 | "# big-sleep インストール\n",
57 | "!pip install big-sleep --upgrade"
58 | ],
59 | "execution_count": null,
60 | "outputs": []
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {
65 | "id": "E232NSiQBi6X"
66 | },
67 | "source": [
68 | "# テキストから画像生成"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "metadata": {
74 | "id": "2_JetNvHCP6l"
75 | },
76 | "source": [
77 | "TEXT = 'an armchair in the shape of an avocado' "
78 | ],
79 | "execution_count": null,
80 | "outputs": []
81 | },
82 | {
83 | "cell_type": "code",
84 | "metadata": {
85 | "id": "NfJ0RMCAauV3"
86 | },
87 | "source": [
88 | "from tqdm.notebook import trange\n",
89 | "from IPython.display import Image, display\n",
90 | "from big_sleep import Imagine\n",
91 | "import random\n",
92 | "\n",
93 | "# パラメータ設定\n",
94 | "SAVE_EVERY = 100 \n",
95 | "SAVE_PROGRESS = False \n",
96 | "LEARNING_RATE = 8e-2 \n",
97 | "ITERATIONS = 1050 \n",
98 | "SEED = random.randint(0, 10000) \n",
99 | "\n",
100 | "# モデルセッティング\n",
101 | "model = Imagine(\n",
102 | " text = TEXT,\n",
103 | " save_every = SAVE_EVERY,\n",
104 | " lr = LEARNING_RATE,\n",
105 | " iterations = ITERATIONS,\n",
106 | " save_progress = SAVE_PROGRESS,\n",
107 | " seed = SEED\n",
108 | ")\n",
109 | "\n",
110 | "# 探索ループ\n",
111 | "for epoch in range(1):\n",
112 | " for i in trange(1000, desc = 'iteration'):\n",
113 | " model.train_step(epoch, i)\n",
114 | "\n",
115 | " if i == 0 or i % model.save_every != 0:\n",
116 | " continue\n",
117 | "\n",
118 | " filename = TEXT.replace(' ', '_')\n",
119 | " image = Image(f'./{filename}.png')\n",
120 | " display(image)"
121 | ],
122 | "execution_count": null,
123 | "outputs": []
124 | }
125 | ]
126 | }
--------------------------------------------------------------------------------
/pymaf_demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "accelerator": "GPU",
6 | "colab": {
7 | "name": "pymaf_demo",
8 | "provenance": [],
9 | "collapsed_sections": [],
10 | "toc_visible": true,
11 | "include_colab_link": true
12 | },
13 | "kernelspec": {
14 | "display_name": "Python 3",
15 | "name": "python3"
16 | },
17 | "language_info": {
18 | "name": "python"
19 | }
20 | },
21 | "cells": [
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "id": "view-in-github",
26 | "colab_type": "text"
27 | },
28 | "source": [
29 | "
"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {
35 | "id": "mryOv98_qlod"
36 | },
37 | "source": [
38 | "# セットアップ"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "metadata": {
44 | "id": "TzS8Y_oIWzNt"
45 | },
46 | "source": [
47 | "# GPUチェッック\n",
48 | "!nvidia-smi -L"
49 | ],
50 | "execution_count": null,
51 | "outputs": []
52 | },
53 | {
54 | "cell_type": "code",
55 | "metadata": {
56 | "id": "Plo2YZZmaDtY",
57 | "collapsed": true
58 | },
59 | "source": [
60 | "# githubからコードを取得\n",
61 | "! git clone https://github.com/HongwenZhang/PyMAF.git\n",
62 | "%cd PyMAF\n",
63 | "\n",
64 | "# 必要なファイルをダウンロード\n",
65 | "! pip install --upgrade gdown\n",
66 | "import gdown\n",
67 | "gdown.download('https://drive.google.com/u/1/uc?id=1XvE73SWbwYMoPTZncmHGwipbsrE4-zyq', 'pymaf.zip', quiet=False)\n",
68 | "! unzip pymaf.zip\n",
69 | "! rm pymaf.zip\n",
70 | "\n",
71 | "# pytorchバージョン変更\n",
72 | "! pip install -U https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl\n",
73 | "! pip install -U https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp37-cp37m-linux_x86_64.whl\n",
74 | "\n",
75 | "# ライブラリーインストール\n",
76 | "! pip install -r requirements.txt\n",
77 | "! pip install imageio==2.4.1\n",
78 | "! pip install pyglet==1.5.27"
79 | ],
80 | "execution_count": null,
81 | "outputs": []
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {
86 | "id": "dXa9qrbwqwuA"
87 | },
88 | "source": [
89 | "# 3Dポーズ推定"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "metadata": {
95 | "id": "KmHCtKpHqt9V"
96 | },
97 | "source": [
98 | "! python3 demo.py --checkpoint=data/pretrained_model/PyMAF_model_checkpoint.pt\\\n",
99 | " --vid_file ./dance.mp4"
100 | ],
101 | "execution_count": null,
102 | "outputs": []
103 | },
104 | {
105 | "cell_type": "code",
106 | "metadata": {
107 | "id": "mwlxnPuCxxqD"
108 | },
109 | "source": [
110 | "# Play the generated video\n",
111 | "from IPython.display import HTML\n",
112 | "from base64 import b64encode\n",
113 | "\n",
114 | "def video(path):\n",
115 | " mp4 = open(path,'rb').read()\n",
116 | " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
117 | " return HTML('' % data_url)\n",
118 | "\n",
119 | "video('output/dance/dance_result.mp4')"
120 | ],
121 | "execution_count": null,
122 | "outputs": []
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "metadata": {
127 | "id": "2SnSeng37gpg"
128 | },
129 | "source": [
130 | "# フレームレート調整"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "metadata": {
136 | "id": "THTbfAFNutBn"
137 | },
138 | "source": [
139 | "! ffmpeg -r 30 -i output/dance/dance_mp4_output/%6d.png\\\n",
140 | " -vcodec libx264 -pix_fmt yuv420p out_dance.mp4"
141 | ],
142 | "execution_count": null,
143 | "outputs": []
144 | },
145 | {
146 | "cell_type": "code",
147 | "metadata": {
148 | "id": "5VQv5qu2vsAc"
149 | },
150 | "source": [
151 | "# Play the generated video\n",
152 | "from IPython.display import HTML\n",
153 | "from base64 import b64encode\n",
154 | "\n",
155 | "def video(path):\n",
156 | " mp4 = open(path,'rb').read()\n",
157 | " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
158 | " return HTML('' % data_url)\n",
159 | "\n",
160 | "video('out_dance.mp4')"
161 | ],
162 | "execution_count": null,
163 | "outputs": []
164 | }
165 | ]
166 | }
167 |
--------------------------------------------------------------------------------
/LSE_OpenPose.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "accelerator": "GPU",
6 | "colab": {
7 | "name": "LSE OpenPose",
8 | "provenance": [],
9 | "collapsed_sections": [],
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "display_name": "Python 3",
14 | "language": "python",
15 | "name": "python3"
16 | },
17 | "language_info": {
18 | "codemirror_mode": {
19 | "name": "ipython",
20 | "version": 3
21 | },
22 | "file_extension": ".py",
23 | "mimetype": "text/x-python",
24 | "name": "python",
25 | "nbconvert_exporter": "python",
26 | "pygments_lexer": "ipython3",
27 | "version": "3.7.9"
28 | }
29 | },
30 | "cells": [
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {
34 | "id": "view-in-github",
35 | "colab_type": "text"
36 | },
37 | "source": [
38 | "
"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {
44 | "id": "X38L6tanrnrB"
45 | },
46 | "source": [
47 | "## セットアップ"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "metadata": {
53 | "id": "FOdkDhb6ga6N"
54 | },
55 | "source": [
56 | "import os\n",
57 | "from os.path import exists, join, basename, splitext\n",
58 | "\n",
59 | "git_repo_url = 'https://github.com/CMU-Perceptual-Computing-Lab/openpose.git'\n",
60 | "project_name = splitext(basename(git_repo_url))[0]\n",
61 | "if not exists(project_name):\n",
62 | " # see: https://github.com/CMU-Perceptual-Computing-Lab/openpose/issues/949\n",
63 | " # install new CMake becaue of CUDA10\n",
64 | " !wget -q https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.tar.gz\n",
65 | " !tar xfz cmake-3.13.0-Linux-x86_64.tar.gz --strip-components=1 -C /usr/local\n",
66 | " # clone openpose\n",
67 | " !git clone -q --depth 1 $git_repo_url\n",
68 | " !sed -i 's/execute_process(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\\/3rdparty\\/caffe)/execute_process(COMMAND git checkout f019d0dfe86f49d1140961f8c7dec22130c83154 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\\/3rdparty\\/caffe)/g' openpose/CMakeLists.txt\n",
69 | " # install system dependencies\n",
70 | " !apt-get -qq install -y libatlas-base-dev libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev protobuf-compiler libgflags-dev libgoogle-glog-dev liblmdb-dev opencl-headers ocl-icd-opencl-dev libviennacl-dev\n",
71 | " # install python dependencies\n",
72 | " !pip install -q youtube-dl\n",
73 | " # build openpose\n",
74 | " !cd openpose && rm -rf build || true && mkdir build && cd build && cmake .. && make -j`nproc`"
75 | ],
76 | "execution_count": null,
77 | "outputs": []
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {
82 | "id": "n5L3Z5YVrZ2R"
83 | },
84 | "source": [
85 | "## ダウンロードする Youtube ビデオの確認"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "metadata": {
91 | "id": "xIt-eyIDO6XG"
92 | },
93 | "source": [
94 | "from IPython.display import YouTubeVideo\n",
95 | "YOUTUBE_ID ='Sa3k_7ZtoCA'\n",
96 | "YouTubeVideo(YOUTUBE_ID)"
97 | ],
98 | "execution_count": null,
99 | "outputs": []
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {
104 | "id": "5qq6g97DwMON"
105 | },
106 | "source": [
107 | "## Youtube ビデオのダウンロードと編集\n",
108 | "・ビデオをダウンロードし20秒に編集して、content/video.mp4に保存します。"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "metadata": {
114 | "id": "oNASdyyiO65I"
115 | },
116 | "source": [
117 | "# download the youtube with the given ID\n",
118 | "!youtube-dl -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID\n",
119 | "# cut the seconds 20 from 03:13\n",
120 | "!ffmpeg -y -loglevel info -i youtube.mp4 -ss 00:03:13.0 -t 20 video.mp4"
121 | ],
122 | "execution_count": null,
123 | "outputs": []
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {
128 | "id": "pPiFxw36wjsL"
129 | },
130 | "source": [
131 | "## 動画からポーズ推定\n",
132 | "・content/video.mp4からポーズ推定した動画を作成\\\n",
133 | "・自分の用意した動画を使う場合は、content/video.mp4を置き換えて下さい。\n"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "metadata": {
139 | "id": "knxPE2w9wXX6"
140 | },
141 | "source": [
142 | "# detect poses\n",
143 | "!cd openpose && ./build/examples/openpose/openpose.bin --video ../video.mp4 --write_json ./output/ --display 0 --write_video ../openpose.avi --face --hand\n",
144 | "# convert the result into MP4\n",
145 | "!ffmpeg -y -loglevel info -i openpose.avi output.mp4"
146 | ],
147 | "execution_count": null,
148 | "outputs": []
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {
153 | "id": "kDDkgCCSrFTv"
154 | },
155 | "source": [
156 | "・動画の再生"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "metadata": {
162 | "id": "nZ3Ud9zLgOoQ"
163 | },
164 | "source": [
165 | "def show_local_mp4_video(file_name, width=640, height=480):\n",
166 | " import io\n",
167 | " import base64\n",
168 | " from IPython.display import HTML\n",
169 | " video_encoded = base64.b64encode(io.open(file_name, 'rb').read())\n",
170 | " return HTML(data=''''''.format(width, height, video_encoded.decode('ascii')))\n",
173 | "\n",
174 | "show_local_mp4_video('output.mp4', width=960, height=720)"
175 | ],
176 | "execution_count": null,
177 | "outputs": []
178 | }
179 | ]
180 | }
--------------------------------------------------------------------------------
/CLIP_search.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "accelerator": "GPU",
6 | "colab": {
7 | "name": "CLIP_search",
8 | "provenance": [],
9 | "collapsed_sections": [],
10 | "toc_visible": true,
11 | "include_colab_link": true
12 | },
13 | "kernelspec": {
14 | "display_name": "Python 3",
15 | "language": "python",
16 | "name": "python3"
17 | },
18 | "language_info": {
19 | "codemirror_mode": {
20 | "name": "ipython",
21 | "version": 3
22 | },
23 | "file_extension": ".py",
24 | "mimetype": "text/x-python",
25 | "name": "python",
26 | "nbconvert_exporter": "python",
27 | "pygments_lexer": "ipython3",
28 | "version": "3.7.9"
29 | }
30 | },
31 | "cells": [
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {
35 | "id": "view-in-github",
36 | "colab_type": "text"
37 | },
38 | "source": [
39 | "
"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {
45 | "id": "EXnkFjoZy9kd"
46 | },
47 | "source": [
48 | "# セットアップ"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "metadata": {
54 | "id": "itUEF1Ltb5r3"
55 | },
56 | "source": [
57 | "# Pytorchバージョン変更\n",
58 | "! pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html \n",
59 | "\n",
60 | "# CLIP関連コードのコピー\n",
61 | "! git clone https://github.com/openai/CLIP.git\n",
62 | "%cd /content/CLIP/\n",
63 | "\n",
64 | "# CLIPのモデル化\n",
65 | "! pip install ftfy regex\n",
66 | "import clip\n",
67 | "model, preprocess = clip.load('ViT-B/32', jit=True) \n",
68 | "model = model.eval() \n",
69 | "\n",
70 | "# サンプル画像ダウンロード\n",
71 | "! pip install --upgrade gdown\n",
72 | "import gdown\n",
73 | "gdown.download('https://drive.google.com/uc?id=1xIYYYzw9aZhjhyjMM12nz4XjnWUzpp6v', 'img.zip', quiet=False)\n",
74 | "! unzip img.zip"
75 | ],
76 | "execution_count": null,
77 | "outputs": []
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {
82 | "id": "6H6tX85TKA0n"
83 | },
84 | "source": [
85 | "# 検索する画像の読み込み\n"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "metadata": {
91 | "id": "d6cpiIFHp9N6"
92 | },
93 | "source": [
94 | "# --- 画像の前処理 ----\n",
95 | "import torch\n",
96 | "import numpy as np\n",
97 | "from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize\n",
98 | "from PIL import Image\n",
99 | "import glob\n",
100 | "from tqdm import tqdm\n",
101 | "\n",
102 | "# 前処理設定\n",
103 | "preprocess = Compose([\n",
104 | " Resize(224, interpolation=Image.BICUBIC),\n",
105 | " CenterCrop(224),\n",
106 | " ToTensor()\n",
107 | "])\n",
108 | "image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()\n",
109 | "image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()\n",
110 | "\n",
111 | "\n",
112 | "# 画像の読み込み\n",
113 | "images =[]\n",
114 | "files = glob.glob('./img/*.png')\n",
115 | "files.sort()\n",
116 | "\n",
117 | "for i, file in enumerate(tqdm(files)):\n",
118 | " image = preprocess(Image.open(file).convert(\"RGB\"))\n",
119 | " images.append(image)\n",
120 | "\n",
121 | "image_input = torch.tensor(np.stack(images)).cuda()\n",
122 | "image_input -= image_mean[:, None, None]\n",
123 | "image_input /= image_std[:, None, None]\n",
124 | "\n",
125 | "print('image_input.shape = ', image_input.shape)"
126 | ],
127 | "execution_count": null,
128 | "outputs": []
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {
133 | "id": "L_uKiB2nKQJX"
134 | },
135 | "source": [
136 | "# 検索テキストの入力\n"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "metadata": {
142 | "id": "C4S__zCGy2MT"
143 | },
144 | "source": [
145 | "text = 'She is a charming woman with blonde hair and blue eyes'\n",
146 | "text_input = clip.tokenize(text)\n",
147 | "text_input = text_input.cuda()\n",
148 | "\n",
149 | "print('text_input = ', text_input)\n",
150 | "print('text_input.shape = ', text_input.shape)"
151 | ],
152 | "execution_count": null,
153 | "outputs": []
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {
158 | "id": "2u4oePsAKdJO"
159 | },
160 | "source": [
161 | "# 画像とテキストのcos類似度の計算\n",
162 | "\n"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "metadata": {
168 | "id": "iqjF6NbBCT0a"
169 | },
170 | "source": [
171 | "# --- 画像とテキストのCOS類似度の計算 ----\n",
172 | "\n",
173 | "# 特徴ベクトルを抽出\n",
174 | "with torch.no_grad():\n",
175 | " image_features = model.encode_image(image_input).float()\n",
176 | " text_features = model.encode_text(text_input).float()\n",
177 | " text_features /= text_features.norm(dim=-1, keepdim=True) \n",
178 | "\n",
179 | "# COS類似度を計算\n",
180 | "text_probs = torch.cosine_similarity(image_features, text_features)\n",
181 | "\n",
182 | "print('image_features.shape = ', image_features.shape)\n",
183 | "print('text_features.shape = ', text_features.shape)\n",
184 | "print('text_probs.shape = ', text_probs.shape)"
185 | ],
186 | "execution_count": null,
187 | "outputs": []
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {
192 | "id": "OIM5PWmSKlVm"
193 | },
194 | "source": [
195 | "# 検索結果の表示"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "metadata": {
201 | "id": "LxPbA0_ScWUk"
202 | },
203 | "source": [
204 | "# --- 検索結果の表示 ---\n",
205 | "\n",
206 | "import matplotlib.pyplot as plt\n",
207 | "\n",
208 | "# 検索テキスト表示\n",
209 | "print('text = ', text)\n",
210 | "print()\n",
211 | "\n",
212 | "# COS類似度の高い順にインデックスをソート\n",
213 | "x = np.argsort(-text_probs.cpu(), axis=0)\n",
214 | "\n",
215 | "# COS類似度TOP3を表示\n",
216 | "fig = plt.figure(figsize=(30, 40))\n",
217 | "for i in range(3):\n",
218 | " name = str(x[i].item()).zfill(6)+'.png'\n",
219 | " img = Image.open('./img/'+name) \n",
220 | " images = np.asarray(img)\n",
221 | " ax = fig.add_subplot(10, 10, i+1, xticks=[], yticks=[])\n",
222 | " image_plt = np.array(images)\n",
223 | " ax.imshow(image_plt)\n",
224 | " cos_value = round(text_probs[x[i].item()].item(), 3)\n",
225 | " ax.set_xlabel(cos_value, fontsize=12) \n",
226 | "plt.show()\n",
227 | "plt.close() "
228 | ],
229 | "execution_count": null,
230 | "outputs": []
231 | }
232 | ]
233 | }
234 |
--------------------------------------------------------------------------------
/StyleCLIP_global.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "StyleCLIP_global",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "language_info": {
16 | "name": "python"
17 | },
18 | "accelerator": "GPU"
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {
34 | "id": "cFh0PtHAw5ax"
35 | },
36 | "source": [
37 | "# セットアップ"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "metadata": {
43 | "id": "5hlml6ebZ9xa"
44 | },
45 | "source": [
46 | "%tensorflow_version 1.x\n",
47 | "! pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html\n",
48 | "! pip install ftfy regex tqdm\n",
49 | "!pip install git+https://github.com/openai/CLIP.git\n",
50 | "! git clone https://github.com/orpatashnik/StyleCLIP"
51 | ],
52 | "execution_count": null,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "metadata": {
58 | "id": "WaZbI-6maJin"
59 | },
60 | "source": [
61 | "# input dataset name \n",
62 | "dataset_name='ffhq' # input dataset name, currently, only support ffhq\n",
63 | "\n",
64 | "% cd StyleCLIP/global/\n",
65 | "\n",
66 | "# input prepare data \n",
67 | "!python GetCode.py --dataset_name $dataset_name --code_type 'w'\n",
68 | "!python GetCode.py --dataset_name $dataset_name --code_type 's'\n",
69 | "!python GetCode.py --dataset_name $dataset_name --code_type 's_mean_std'\n",
70 | "\n",
71 | "import tensorflow as tf\n",
72 | "import numpy as np \n",
73 | "import torch\n",
74 | "import clip\n",
75 | "from PIL import Image\n",
76 | "import pickle\n",
77 | "import copy\n",
78 | "import matplotlib.pyplot as plt\n",
79 | "from MapTS import GetFs,GetBoundary,GetDt\n",
80 | "from manipulate import Manipulator\n",
81 | "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
82 | "model, preprocess = clip.load(\"ViT-B/32\", device=device)\n",
83 | "\n",
84 | "M=Manipulator(dataset_name='ffhq')\n",
85 | "fs3=np.load('./npy/ffhq/fs3.npy')\n",
86 | "np.set_printoptions(suppress=True)"
87 | ],
88 | "execution_count": null,
89 | "outputs": []
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {
94 | "id": "NQNEpDYfpup0"
95 | },
96 | "source": [
97 | "# GUIによる画像編集"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "metadata": {
103 | "id": "u6Y31y0hpXbF"
104 | },
105 | "source": [
106 | "# 画像選択\n",
107 | "img_indexs=[1259]\n",
108 | "dlatent_tmp=[tmp[img_indexs] for tmp in M.dlatents]\n",
109 | "M.num_images=len(img_indexs)"
110 | ],
111 | "execution_count": null,
112 | "outputs": []
113 | },
114 | {
115 | "cell_type": "code",
116 | "metadata": {
117 | "id": "mlekymCvpXlo"
118 | },
119 | "source": [
120 | "#テキスト入力\n",
121 | "neutral='face'\n",
122 | "target='smile face'\n",
123 | "classnames=[target,neutral]\n",
124 | "dt=GetDt(classnames,model)"
125 | ],
126 | "execution_count": null,
127 | "outputs": []
128 | },
129 | {
130 | "cell_type": "code",
131 | "metadata": {
132 | "id": "hjAf0M3ttM7x"
133 | },
134 | "source": [
135 | "#@markdown ###元画像表示\n",
136 | "beta = 0.1 \n",
137 | "alpha = 0 \n",
138 | "M.alpha=[alpha]\n",
139 | "boundary_tmp2,c=GetBoundary(fs3,dt,M,threshold=beta)\n",
140 | "codes=M.MSCode(dlatent_tmp,boundary_tmp2)\n",
141 | "out=M.GenerateImg(codes)\n",
142 | "Image.fromarray(out[0,0])"
143 | ],
144 | "execution_count": null,
145 | "outputs": []
146 | },
147 | {
148 | "cell_type": "code",
149 | "metadata": {
150 | "id": "oXgtSUvVpX64"
151 | },
152 | "source": [
153 | "#@markdown ###編集画像表示\n",
154 | "beta = 0.1 #@param {type:\"slider\", min:0.08, max:0.3, step:0.01}\n",
155 | "alpha = 2 #@param {type:\"slider\", min:-10, max:10, step:0.1}\n",
156 | "M.alpha=[alpha]\n",
157 | "boundary_tmp2,c=GetBoundary(fs3,dt,M,threshold=beta)\n",
158 | "codes=M.MSCode(dlatent_tmp,boundary_tmp2)\n",
159 | "out=M.GenerateImg(codes)\n",
160 | "Image.fromarray(out[0,0])"
161 | ],
162 | "execution_count": null,
163 | "outputs": []
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {
168 | "id": "VKF-2zrFqILy"
169 | },
170 | "source": [
171 | "# 画像編集ビデオ"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "metadata": {
177 | "id": "-fhnU7Lnq2Tj"
178 | },
179 | "source": [
180 | "# 画像の選択\n",
181 | "img_indexs=[1276]\n",
182 | "dlatent_tmp=[tmp[img_indexs] for tmp in M.dlatents]\n",
183 | "M.num_images=len(img_indexs)"
184 | ],
185 | "execution_count": null,
186 | "outputs": []
187 | },
188 | {
189 | "cell_type": "code",
190 | "metadata": {
191 | "id": "DxUlRCeOqn-J"
192 | },
193 | "source": [
194 | "# テキスト入力\n",
195 | "neutral='face with hair'\n",
196 | "target='Curly Hair'\n",
197 | "classnames=[target,neutral]\n",
198 | "dt=GetDt(classnames,model)"
199 | ],
200 | "execution_count": null,
201 | "outputs": []
202 | },
203 | {
204 | "cell_type": "code",
205 | "metadata": {
206 | "id": "07NGF7LFqoHD"
207 | },
208 | "source": [
209 | "# 段階的な編集画像の保存\n",
210 | "import os\n",
211 | "import shutil\n",
212 | "if os.path.isdir('pic'):\n",
213 | " shutil.rmtree('pic')\n",
214 | "os.makedirs('pic', exist_ok=True)\n",
215 | "cnt = 0\n",
216 | "for i in range(0,20,1):\n",
217 | " beta = 0.1 \n",
218 | " alpha = i/10 \n",
219 | " M.alpha=[alpha]\n",
220 | " boundary_tmp2,c=GetBoundary(fs3,dt,M,threshold=beta)\n",
221 | " codes=M.MSCode(dlatent_tmp,boundary_tmp2)\n",
222 | " out=M.GenerateImg(codes)\n",
223 | " pic = Image.fromarray(out[0,0])\n",
224 | " pic.save('./pic/'+str(cnt).zfill(6)+'.png') \n",
225 | " cnt +=1\n",
226 | "\n",
227 | "for i in range(20,0,-1):\n",
228 | " beta = 0.1 \n",
229 | " alpha = i/10 \n",
230 | " M.alpha=[alpha]\n",
231 | " boundary_tmp2,c=GetBoundary(fs3,dt,M,threshold=beta)\n",
232 | " codes=M.MSCode(dlatent_tmp,boundary_tmp2)\n",
233 | " out=M.GenerateImg(codes)\n",
234 | " pic = Image.fromarray(out[0,0])\n",
235 | " pic.save('./pic/'+str(cnt).zfill(6)+'.png') \n",
236 | " cnt +=1 "
237 | ],
238 | "execution_count": null,
239 | "outputs": []
240 | },
241 | {
242 | "cell_type": "code",
243 | "metadata": {
244 | "id": "CKYEVW4PJXWk"
245 | },
246 | "source": [
247 | "# 段階的な編集画像を動画に変換\n",
248 | "! ffmpeg -r 10 -i pic/%6d.png\\\n",
249 | " -vcodec libx264 -pix_fmt yuv420p output.mp4"
250 | ],
251 | "execution_count": null,
252 | "outputs": []
253 | },
254 | {
255 | "cell_type": "code",
256 | "metadata": {
257 | "id": "9caPRkYaFCiJ"
258 | },
259 | "source": [
260 | "# Play the generated video\n",
261 | "from IPython.display import HTML\n",
262 | "from base64 import b64encode\n",
263 | "\n",
264 | "def video(path):\n",
265 | " mp4 = open(path,'rb').read()\n",
266 | " data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
267 | " return HTML('' % data_url)\n",
268 | "\n",
269 | "video('output.mp4')"
270 | ],
271 | "execution_count": null,
272 | "outputs": []
273 | }
274 | ]
275 | }
--------------------------------------------------------------------------------
/PIFuHD_Demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "WYhlsDkg1Hwb"
17 | },
18 | "source": [
19 | "## セットアップ"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "8B1jmr82DtjG"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "# ライブラリー取得\n",
31 | "!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n",
32 | "!pip install pytorch3d\n",
33 | "\n",
34 | "# githubからpifuhdのコードをコピー\n",
35 | "!git clone https://github.com/facebookresearch/pifuhd\n",
36 | "\n",
37 | "# githubからpose-estimationのコードをコピーし、学習済み重みをダウンロード\n",
38 | "!git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git\n",
39 | "%cd /content/lightweight-human-pose-estimation.pytorch/\n",
40 | "!wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth\n",
41 | "\n",
42 | "# pifuhdの学習済み重みをダウンロード\n",
43 | "%cd /content/pifuhd/\n",
44 | "!sh ./scripts/download_trained_model.sh"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {
50 | "id": "QvQm-A8ESKb2"
51 | },
52 | "source": [
53 | "## PIFuHDの実行"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {
60 | "id": "jaV_7Yi8fM-B"
61 | },
62 | "outputs": [],
63 | "source": [
64 | "# 自分の画像のアップロード(このブロックを実行しなければテスト画像を使用します)\n",
65 | "# Google Chrome 推奨(Safariではエラーが出ます)\n",
66 | "%cd /content/pifuhd/sample_images\n",
67 | "from google.colab import files\n",
68 | "filename = list(files.upload().keys())[0]"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {
75 | "id": "AEzmmB01SOZp"
76 | },
77 | "outputs": [],
78 | "source": [
79 | "# セッティング\n",
80 | "import os\n",
81 | "\n",
82 | "try:\n",
83 | " image_path = '/content/pifuhd/sample_images/%s' % filename\n",
84 | "except:\n",
85 | " image_path = '/content/pifuhd/sample_images/test.png' # example image\n",
86 | "image_dir = os.path.dirname(image_path)\n",
87 | "file_name = os.path.splitext(os.path.basename(image_path))[0]\n",
88 | "\n",
89 | "# output pathes\n",
90 | "obj_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.obj' % file_name\n",
91 | "out_img_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.png' % file_name\n",
92 | "video_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.mp4' % file_name\n",
93 | "video_display_path = '/content/pifuhd/results/pifuhd_final/result_%s_256_display.mp4' % file_name"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {
100 | "id": "PdRcDXe38lHB"
101 | },
102 | "outputs": [],
103 | "source": [
104 | "# クロッピング\n",
105 | "%cd /content/lightweight-human-pose-estimation.pytorch/\n",
106 | "import torch\n",
107 | "import cv2\n",
108 | "import numpy as np\n",
109 | "from models.with_mobilenet import PoseEstimationWithMobileNet\n",
110 | "from modules.keypoints import extract_keypoints, group_keypoints\n",
111 | "from modules.load_state import load_state\n",
112 | "from modules.pose import Pose, track_poses\n",
113 | "import demo\n",
114 | "\n",
115 | "def get_rect(net, images, height_size):\n",
116 | " net = net.eval()\n",
117 | "\n",
118 | " stride = 8\n",
119 | " upsample_ratio = 4\n",
120 | " num_keypoints = Pose.num_kpts\n",
121 | " previous_poses = []\n",
122 | " delay = 33\n",
123 | " for image in images:\n",
124 | " rect_path = image.replace('.%s' % (image.split('.')[-1]), '_rect.txt')\n",
125 | " img = cv2.imread(image, cv2.IMREAD_COLOR)\n",
126 | " orig_img = img.copy()\n",
127 | " orig_img = img.copy()\n",
128 | " heatmaps, pafs, scale, pad = demo.infer_fast(net, img, height_size, stride, upsample_ratio, cpu=False)\n",
129 | "\n",
130 | " total_keypoints_num = 0\n",
131 | " all_keypoints_by_type = []\n",
132 | " for kpt_idx in range(num_keypoints): # 19th for bg\n",
133 | " total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)\n",
134 | "\n",
135 | " pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs)\n",
136 | " for kpt_id in range(all_keypoints.shape[0]):\n",
137 | " all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale\n",
138 | " all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale\n",
139 | " current_poses = []\n",
140 | "\n",
141 | " rects = []\n",
142 | " for n in range(len(pose_entries)):\n",
143 | " if len(pose_entries[n]) == 0:\n",
144 | " continue\n",
145 | " pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1\n",
146 | " valid_keypoints = []\n",
147 | " for kpt_id in range(num_keypoints):\n",
148 | " if pose_entries[n][kpt_id] != -1.0: # keypoint was found\n",
149 | " pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])\n",
150 | " pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])\n",
151 | " valid_keypoints.append([pose_keypoints[kpt_id, 0], pose_keypoints[kpt_id, 1]])\n",
152 | " valid_keypoints = np.array(valid_keypoints)\n",
153 | " \n",
154 | " if pose_entries[n][10] != -1.0 or pose_entries[n][13] != -1.0:\n",
155 | " pmin = valid_keypoints.min(0)\n",
156 | " pmax = valid_keypoints.max(0)\n",
157 | "\n",
158 | " center = (0.5 * (pmax[:2] + pmin[:2])).astype(np.int)\n",
159 | " radius = int(0.65 * max(pmax[0]-pmin[0], pmax[1]-pmin[1]))\n",
160 | " elif pose_entries[n][10] == -1.0 and pose_entries[n][13] == -1.0 and pose_entries[n][8] != -1.0 and pose_entries[n][11] != -1.0:\n",
161 | " # if leg is missing, use pelvis to get cropping\n",
162 | " center = (0.5 * (pose_keypoints[8] + pose_keypoints[11])).astype(np.int)\n",
163 | " radius = int(1.45*np.sqrt(((center[None,:] - valid_keypoints)**2).sum(1)).max(0))\n",
164 | " center[1] += int(0.05*radius)\n",
165 | " else:\n",
166 | " center = np.array([img.shape[1]//2,img.shape[0]//2])\n",
167 | " radius = max(img.shape[1]//2,img.shape[0]//2)\n",
168 | "\n",
169 | " x1 = center[0] - radius\n",
170 | " y1 = center[1] - radius\n",
171 | "\n",
172 | " rects.append([x1, y1, 2*radius, 2*radius])\n",
173 | "\n",
174 | " np.savetxt(rect_path, np.array(rects), fmt='%d')\n",
175 | "\n",
176 | "net = PoseEstimationWithMobileNet()\n",
177 | "checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu')\n",
178 | "load_state(net, checkpoint)\n",
179 | "\n",
180 | "get_rect(net.cuda(), [image_path], 512)"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": null,
186 | "metadata": {
187 | "id": "5995t2PnQTmG"
188 | },
189 | "outputs": [],
190 | "source": [
191 | "# レンダリング\n",
192 | "%cd /content/pifuhd/\n",
193 | "!python -m apps.simple_test -r 256 --use_rect -i $image_dir"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": null,
199 | "metadata": {
200 | "id": "afwL_-ROCmDf"
201 | },
202 | "outputs": [],
203 | "source": [
204 | "# mp4の作成\n",
205 | "from lib.colab_util import generate_video_from_obj, set_renderer, video\n",
206 | "\n",
207 | "renderer = set_renderer()\n",
208 | "generate_video_from_obj(obj_path, out_img_path, video_path, renderer)\n",
209 | "\n",
210 | "# we cannot play a mp4 video generated by cv2\n",
211 | "!ffmpeg -i $video_path -vcodec libx264 $video_display_path -y -loglevel quiet\n",
212 | "video(video_display_path)"
213 | ]
214 | }
215 | ],
216 | "metadata": {
217 | "accelerator": "GPU",
218 | "colab": {
219 | "collapsed_sections": [],
220 | "include_colab_link": true,
221 | "name": "PIFuHD Demo",
222 | "provenance": [],
223 | "toc_visible": true
224 | },
225 | "kernelspec": {
226 | "display_name": "Python 3",
227 | "language": "python",
228 | "name": "python3"
229 | },
230 | "language_info": {
231 | "codemirror_mode": {
232 | "name": "ipython",
233 | "version": 3
234 | },
235 | "file_extension": ".py",
236 | "mimetype": "text/x-python",
237 | "name": "python",
238 | "nbconvert_exporter": "python",
239 | "pygments_lexer": "ipython3",
240 | "version": "3.7.9"
241 | }
242 | },
243 | "nbformat": 4,
244 | "nbformat_minor": 1
245 | }
246 |
--------------------------------------------------------------------------------
/DALL_E_demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "accelerator": "GPU",
6 | "colab": {
7 | "name": "DALL_E_demo",
8 | "provenance": [],
9 | "collapsed_sections": [],
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "display_name": "Python 3",
14 | "name": "python3"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "id": "2nD1n0xEBcko"
32 | },
33 | "source": [
34 | "# セットアップ"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "metadata": {
40 | "id": "F4iTie2EKrbb"
41 | },
42 | "source": [
43 | "# 1.Pytorchバージョン変更\n",
44 | "! pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html \n",
45 | "\n",
46 | "# 2.Pytorch画像処理ライブラリー・インストール\n",
47 | "! pip install kornia==0.5.0\n",
48 | "\n",
49 | "# 3.CLIP関連コードのコピー\n",
50 | "! git clone https://github.com/openai/CLIP.git\n",
51 | "%cd /content/CLIP/\n",
52 | "\n",
53 | "# 4.CLIPのモデル化\n",
54 | "! pip install ftfy regex\n",
55 | "import clip\n",
56 | "model, preprocess = clip.load('ViT-B/32', jit=True) \n",
57 | "model = model.eval() \n",
58 | "\n",
59 | "# 5.DALL-Eのモデル化\n",
60 | "! pip install DALL-E\n",
61 | "from dall_e import map_pixels, unmap_pixels, load_model\n",
62 | "dec = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", 'cuda') \n"
63 | ],
64 | "execution_count": null,
65 | "outputs": []
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {
70 | "id": "KAcixx9Z3XYH"
71 | },
72 | "source": [
73 | "# ライブラリー・インポート&関数定義\n"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "metadata": {
79 | "id": "piJOg9MY7khd"
80 | },
81 | "source": [
82 | "import torch\n",
83 | "import numpy as np\n",
84 | "import torchvision\n",
85 | "import torchvision.transforms.functional as TF\n",
86 | "import torchvision.transforms as T\n",
87 | "import kornia\n",
88 | "import PIL\n",
89 | "import os, io, sys\n",
90 | "import random\n",
91 | "import imageio\n",
92 | "from IPython import display\n",
93 | "from IPython.core.interactiveshell import InteractiveShell\n",
94 | "InteractiveShell.ast_node_interactivity = \"all\"\n",
95 | "from google.colab import output\n",
96 | "import requests\n",
97 | "\n",
98 | "# 初期設定\n",
99 | "im_shape = [512, 512, 3]\n",
100 | "sideX, sideY, channels = im_shape\n",
101 | "target_image_size = sideX\n",
102 | "tau_value = 2.\n",
103 | "\n",
104 | "# 画像表示・保存\n",
105 | "def displ(img):\n",
106 | " img = np.array(img)[:,:,:]\n",
107 | " img = np.transpose(img, (1, 2, 0))\n",
108 | " imageio.imwrite('output.png', np.array(img))\n",
109 | " return display.Image('output.png')\n",
110 | "\n",
111 | "# 画像のランダム切り出し\n",
112 | "def augment(out, cutn=16):\n",
113 | " p_s = []\n",
114 | " for ch in range(cutn):\n",
115 | " sizey = int(torch.zeros(1,).uniform_(.5, .99)*sideY)\n",
116 | " sizex = int(torch.zeros(1,).uniform_(.5, .99)*sideX)\n",
117 | " offsetx = torch.randint(0, sideX - sizex, ())\n",
118 | " offsety = torch.randint(0, sideY - sizey, ())\n",
119 | " apper = out[:, :, offsetx:offsetx + sizex, offsety:offsety + sizey]\n",
120 | " apper = apper + .1*torch.rand(1,1,1,1).cuda()*torch.randn_like(apper, requires_grad=True)\n",
121 | " apper = torch.nn.functional.interpolate(apper, (224,224), mode='bilinear')\n",
122 | " p_s.append(apper)\n",
123 | " into = augs(torch.cat(p_s, 0))\n",
124 | " return into\n",
125 | "\n",
126 | "# 正規化と回転設定\n",
127 | "nom = torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))\n",
128 | "augs = kornia.augmentation.RandomRotation(30).cuda()\n"
129 | ],
130 | "execution_count": null,
131 | "outputs": []
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "metadata": {
136 | "id": "XaocGDQXz3Zx"
137 | },
138 | "source": [
139 | "# テキストから特徴ベクトルを抽出"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "metadata": {
145 | "id": "PGBTOiJqWgZ3"
146 | },
147 | "source": [
148 | "# テキスト入力\n",
149 | "text_input = 'a beautiful and mysterious house designed by Escher'\n",
150 | "\n",
151 | "# テキストを特徴ベクトルに変換\n",
152 | "token = clip.tokenize(text_input) \n",
153 | "text_v = model.encode_text(token.cuda()).detach().clone() "
154 | ],
155 | "execution_count": null,
156 | "outputs": []
157 | },
158 | {
159 | "cell_type": "code",
160 | "metadata": {
161 | "id": "TSqoQrpGCUp0"
162 | },
163 | "source": [
164 | "# 【チェック】token, text_vのシェイプ\n",
165 | "print('token.shape = ', token.shape)\n",
166 | "print('token = ', token)\n",
167 | "print('text_v.shape = ', text_v.shape)"
168 | ],
169 | "execution_count": null,
170 | "outputs": []
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {
175 | "id": "oiAD3aRNMC4l"
176 | },
177 | "source": [
178 | "# パラメータ・最適化手法の設定"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "metadata": {
184 | "id": "GdCh2D8Dt8Xd"
185 | },
186 | "source": [
187 | "# パラメータの設定\n",
188 | "class Pars(torch.nn.Module):\n",
189 | " def __init__(self):\n",
190 | " super(Pars, self).__init__()\n",
191 | " hots = torch.nn.functional.one_hot((torch.arange(0, 8192).to(torch.int64)), num_classes=8192)\n",
192 | " rng = torch.zeros(1, 64*64, 8192).uniform_()\n",
193 | " for i in range(64*64):\n",
194 | " rng[0,i] = hots[[np.random.randint(8191)]]\n",
195 | " rng = rng.permute(0, 2, 1)\n",
196 | " self.normu = torch.nn.Parameter(rng.cuda().view(1, 8192, 64*64))\n",
197 | " \n",
198 | " def forward(self): \n",
199 | " normu = torch.nn.functional.gumbel_softmax(self.normu.reshape(1,64*64,8192), dim=1, tau=tau_value).view(1, 8192, 64, 64)\n",
200 | " return normu \n",
201 | "\n",
202 | "# 最適化手法の設定\n",
203 | "latent = Pars().cuda() \n",
204 | "param = [latent.normu] \n",
205 | "optimizer = torch.optim.Adam([{'params': param, 'lr': .01}]) \n"
206 | ],
207 | "execution_count": null,
208 | "outputs": []
209 | },
210 | {
211 | "cell_type": "code",
212 | "metadata": {
213 | "id": "3ZyFGu6IC5Rx"
214 | },
215 | "source": [
216 | "# 【チェック】パラメータから画像生成\n",
217 | "with torch.no_grad():\n",
218 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3].float()))\n",
219 | " displ(out.cpu()[0])\n",
220 | "\n",
221 | " print('latent().shape = ', latent().shape)\n",
222 | " print('dec(latent()).shape = ', dec(latent()).shape)\n",
223 | " print('out.shape = ', out.shape) "
224 | ],
225 | "execution_count": null,
226 | "outputs": []
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "metadata": {
231 | "id": "WztSrRF23Rqg"
232 | },
233 | "source": [
234 | "# 学習"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "metadata": {
240 | "id": "NwYNUzzovPEW"
241 | },
242 | "source": [
243 | "# 学習ループ\n",
244 | "for iteration in range(1001):\n",
245 | "\n",
246 | " # --- 順伝播 ---\n",
247 | " # パラメータから画像を生成\n",
248 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3].float()))\n",
249 | " # 画像をランダム切り出し・回転 \n",
250 | " into = augment(out)\n",
251 | " # 画像を正規化\n",
252 | " into = nom((into))\n",
253 | " # 画像から特徴ベクトルを取得\n",
254 | " image_v = model.encode_image(into)\n",
255 | " # テキストと画像の特徴ベクトルのCOS類似度を計算 \n",
256 | " loss = -torch.cosine_similarity(text_v, image_v).mean() \n",
257 | "\n",
258 | " # 逆伝播\n",
259 | " optimizer.zero_grad()\n",
260 | " loss.backward()\n",
261 | " optimizer.step() \n",
262 | "\n",
263 | " # 学習率の調整\n",
264 | " for g in optimizer.param_groups:\n",
265 | " g['lr'] = g['lr']*1.005\n",
266 | " g['lr'] = min(g['lr'], .12)\n",
267 | "\n",
268 | " # ログ表示 \n",
269 | " if iteration % 50 == 0:\n",
270 | " with torch.no_grad():\n",
271 | "\n",
272 | " # 生成画像の表示・保存\n",
273 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3]).float()) ###\n",
274 | " displ(out.cpu()[0]) ###\n",
275 | "\n",
276 | " # データ表示\n",
277 | " print('iter = ',iteration)\n",
278 | " for g in optimizer.param_groups:\n",
279 | " print('lr = ', g['lr'])\n",
280 | " print('tau_value = ', tau_value)\n",
281 | " print('loss = ',loss.item())\n",
282 | " print('\\n')\n"
283 | ],
284 | "execution_count": null,
285 | "outputs": []
286 | }
287 | ]
288 | }
289 |
--------------------------------------------------------------------------------
/ArtLine_make_gif.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "ArtLine_make_gif",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "accelerator": "GPU"
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "id": "eOhPqC6fysD4"
32 | },
33 | "source": [
34 | "# **ArtLine_make_gif**\n",
35 | "**Create** **Amazing** **Line** **Art**."
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "metadata": {
41 | "id": "xzHW4dq4ys7_"
42 | },
43 | "source": [
44 | "# copy github\n",
45 | "!git clone https://github.com/vijishmadhavan/ArtLine.git ArtLine\n",
46 | "%cd ArtLine/\n",
47 | "\n",
48 | "# get libralies\n",
49 | "!pip install -r colab_requirements.txt\n",
50 | "!pip install -q youtube-dl"
51 | ],
52 | "execution_count": null,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {
58 | "id": "2cjGDScH86iU"
59 | },
60 | "source": [
61 | "# **Runtime**\n",
62 | "\n",
63 | "* Hardware Accelerator = GPU \n",
64 | "You have to click twice\n"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "metadata": {
70 | "id": "qnC6OObV3sNk"
71 | },
72 | "source": [
73 | "import fastai\n",
74 | "from fastai.vision import *\n",
75 | "from fastai.utils.mem import *\n",
76 | "from fastai.vision import open_image, load_learner, image, torch\n",
77 | "import numpy as np\n",
78 | "import urllib.request\n",
79 | "import PIL.Image\n",
80 | "from io import BytesIO\n",
81 | "import torchvision.transforms as T\n",
82 | "from PIL import Image\n",
83 | "import requests\n",
84 | "from io import BytesIO\n",
85 | "import fastai\n",
86 | "from fastai.vision import *\n",
87 | "from fastai.utils.mem import *\n",
88 | "from fastai.vision import open_image, load_learner, image, torch\n",
89 | "import numpy as np\n",
90 | "import urllib.request\n",
91 | "import PIL.Image\n",
92 | "from io import BytesIO\n",
93 | "import torchvision.transforms as T\n",
94 | "\n",
95 | "class FeatureLoss(nn.Module):\n",
96 | " def __init__(self, m_feat, layer_ids, layer_wgts):\n",
97 | " super().__init__()\n",
98 | " self.m_feat = m_feat\n",
99 | " self.loss_features = [self.m_feat[i] for i in layer_ids]\n",
100 | " self.hooks = hook_outputs(self.loss_features, detach=False)\n",
101 | " self.wgts = layer_wgts\n",
102 | " self.metric_names = ['pixel',] + [f'feat_{i}' for i in range(len(layer_ids))\n",
103 | " ] + [f'gram_{i}' for i in range(len(layer_ids))]\n",
104 | "\n",
105 | " def make_features(self, x, clone=False):\n",
106 | " self.m_feat(x)\n",
107 | " return [(o.clone() if clone else o) for o in self.hooks.stored]\n",
108 | " \n",
109 | " def forward(self, input, target):\n",
110 | " out_feat = self.make_features(target, clone=True)\n",
111 | " in_feat = self.make_features(input)\n",
112 | " self.feat_losses = [base_loss(input,target)]\n",
113 | " self.feat_losses += [base_loss(f_in, f_out)*w\n",
114 | " for f_in, f_out, w in zip(in_feat, out_feat, self.wgts)]\n",
115 | " self.feat_losses += [base_loss(gram_matrix(f_in), gram_matrix(f_out))*w**2 * 5e3\n",
116 | " for f_in, f_out, w in zip(in_feat, out_feat, self.wgts)]\n",
117 | " self.metrics = dict(zip(self.metric_names, self.feat_losses))\n",
118 | " return sum(self.feat_losses)\n",
119 | " \n",
120 | " def __del__(self): self.hooks.remove()"
121 | ],
122 | "execution_count": null,
123 | "outputs": []
124 | },
125 | {
126 | "cell_type": "code",
127 | "metadata": {
128 | "id": "qmLIGUuu3vp5"
129 | },
130 | "source": [
131 | "MODEL_URL = \"https://www.dropbox.com/s/p9lynpwygjmeed2/ArtLine_500.pkl?dl=1 \"\n",
132 | "urllib.request.urlretrieve(MODEL_URL, \"ArtLine_500.pkl\")\n",
133 | "path = Path(\".\")\n",
134 | "learn=load_learner(path, 'ArtLine_500.pkl')"
135 | ],
136 | "execution_count": null,
137 | "outputs": []
138 | },
139 | {
140 | "cell_type": "code",
141 | "metadata": {
142 | "id": "teAfEed9GOdX"
143 | },
144 | "source": [
145 | "# check YouTubeVideo\n",
146 | "from IPython.display import YouTubeVideo\n",
147 | "YOUTUBE_ID ='m0u0uAhoxq4'\n",
148 | "YouTubeVideo(YOUTUBE_ID)"
149 | ],
150 | "execution_count": null,
151 | "outputs": []
152 | },
153 | {
154 | "cell_type": "code",
155 | "metadata": {
156 | "id": "dl9MwHi89M4C"
157 | },
158 | "source": [
159 | "# download YouTubeVideo\n",
160 | "!rm -rf youtube.mp4\n",
161 | "!youtube-dl -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID"
162 | ],
163 | "execution_count": null,
164 | "outputs": []
165 | },
166 | {
167 | "cell_type": "code",
168 | "metadata": {
169 | "id": "yoVhz1KEiWcV"
170 | },
171 | "source": [
172 | "# edit YouTubeVideo\n",
173 | "import os\n",
174 | "os.makedirs('video', exist_ok=True)\n",
175 | "!ffmpeg -i youtube.mp4 -filter:v 'crop=300:300:170:0' -ss 00:00:01 -t 00:00:02 -async 1 ./video/takikuri.mp4"
176 | ],
177 | "execution_count": null,
178 | "outputs": []
179 | },
180 | {
181 | "cell_type": "code",
182 | "metadata": {
183 | "id": "_LUgHbTyMlJt"
184 | },
185 | "source": [
186 | "# video2frames\n",
187 | "import os\n",
188 | "import cv2\n",
189 | "\n",
190 | "os.makedirs('images', exist_ok=True)\n",
191 | " \n",
192 | "def video_2_frames(video_file='./video/takikuri.mp4', \n",
193 | " image_dir='./images/', \n",
194 | " image_file='img_%s.png'):\n",
195 | " \n",
196 | " # Initial setting\n",
197 | " i = 0\n",
198 | " interval = 3\n",
199 | " length = 30\n",
200 | " \n",
201 | " cap = cv2.VideoCapture(video_file)\n",
202 | " while(cap.isOpened()):\n",
203 | " flag, frame = cap.read() \n",
204 | " if flag == False: \n",
205 | " break\n",
206 | " if i == length*interval:\n",
207 | " break\n",
208 | " if i % interval == 0: \n",
209 | " cv2.imwrite(image_dir+image_file % str(i).zfill(6), frame)\n",
210 | " print('Save', image_dir+image_file % str(i).zfill(6))\n",
211 | " i += 1 \n",
212 | " cap.release() \n",
213 | " \n",
214 | "def main():\n",
215 | " video_2_frames()\n",
216 | " \n",
217 | "if __name__ == '__main__':\n",
218 | " main() \n"
219 | ],
220 | "execution_count": null,
221 | "outputs": []
222 | },
223 | {
224 | "cell_type": "code",
225 | "metadata": {
226 | "id": "aeSLtsxqHqEV"
227 | },
228 | "source": [
229 | "# frames2ArtLines\n",
230 | "import os\n",
231 | "import torchvision.utils as vutils\n",
232 | "\n",
233 | "os.makedirs('out', exist_ok=True)\n",
234 | "\n",
235 | "input_path = './images' \n",
236 | "output_path = './out'\n",
237 | "files = os.listdir(input_path)\n",
238 | "files.sort()\n",
239 | "\n",
240 | "temp =[add_metrics]\n",
241 | "\n",
242 | "for file in files:\n",
243 | " print(file)\n",
244 | " if file == '.ipynb_checkpoints':\n",
245 | " continue\n",
246 | " img = PIL.Image.open(input_path+'/'+file).convert(\"RGB\")\n",
247 | " img_t = T.ToTensor()(img)\n",
248 | " img_fast = Image(img_t)\n",
249 | " p,img_hr,b = learn.predict(img_fast)\n",
250 | " vutils.save_image(img_hr,output_path+'/'+file)\n",
251 | " "
252 | ],
253 | "execution_count": null,
254 | "outputs": []
255 | },
256 | {
257 | "cell_type": "code",
258 | "metadata": {
259 | "id": "NbmPUrXcQNRm"
260 | },
261 | "source": [
262 | "# ArtLines2GIF\n",
263 | "from PIL import Image\n",
264 | "import glob\n",
265 | " \n",
266 | "files = sorted(glob.glob('./out/*.png'))\n",
267 | "images = list(map(lambda file: Image.open(file), files))\n",
268 | "images[0].save('./takikuri.gif', save_all=True, \n",
269 | " append_images=images[1:], \n",
270 | " duration=100, loop=0)"
271 | ],
272 | "execution_count": null,
273 | "outputs": []
274 | },
275 | {
276 | "cell_type": "code",
277 | "metadata": {
278 | "id": "Tu3MDH6Q1pzV"
279 | },
280 | "source": [
281 | "# display GIF\n",
282 | "from IPython.display import Image\n",
283 | "Image('./takikuri.gif', format='png')"
284 | ],
285 | "execution_count": null,
286 | "outputs": []
287 | }
288 | ]
289 | }
--------------------------------------------------------------------------------
/CLIP_demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "EXnkFjoZy9kd"
17 | },
18 | "source": [
19 | "# セットアップ"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "0BpdJkdBssk9"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "# --- セットアップ ---\n",
31 | "\n",
32 | "# 1.pytorchバージョン変更\n",
33 | "! pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html #ftfy regex\n",
34 | "\n",
35 | "# 2.GithubからCLIPをコピー\n",
36 | "! git clone https://github.com/openai/CLIP.git\n",
37 | "%cd CLIP/clip\n",
38 | "\n",
39 | "# 3.CLIPモデルの重みをダウンロード\n",
40 | "MODELS = {\n",
41 | " \"RN50\": \"https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt\",\n",
42 | " \"RN101\": \"https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt\",\n",
43 | " \"RN50x4\": \"https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt\",\n",
44 | " \"ViT-B/32\": \"https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt\", \n",
45 | "}\n",
46 | "! wget {MODELS[\"ViT-B/32\"]} -O model.pt\n",
47 | "\n",
48 | "# 4.simple_tokenizer インストール\n",
49 | "! pip install ftfy regex\n",
50 | "from simple_tokenizer import *\n",
51 | "tokenizer = SimpleTokenizer()\n",
52 | "\n",
53 | "# 5.サンプル画像ダウンロード\n",
54 | "! pip install --upgrade gdown\n",
55 | "import gdown\n",
56 | "gdown.download('https://drive.google.com/uc?id=1vcxH6JOtwh_-FoZ8SNXYlHF9qCi3YoDH', 'food_101.zip', quiet=False)\n",
57 | "! unzip food_101.zip"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {
63 | "id": "qJCVYoXrK1ty"
64 | },
65 | "source": [
66 | "# CLIPモデルの仕様確認"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": null,
72 | "metadata": {
73 | "id": "IBRVTY9lbGm8"
74 | },
75 | "outputs": [],
76 | "source": [
77 | "# --- CLIPモデルの仕様確認 ----\n",
78 | "\n",
79 | "import numpy as np\n",
80 | "import torch\n",
81 | "\n",
82 | "model = torch.jit.load(\"model.pt\").cuda().eval()\n",
83 | "input_resolution = model.input_resolution.item()\n",
84 | "context_length = model.context_length.item()\n",
85 | "vocab_size = model.vocab_size.item()\n",
86 | "\n",
87 | "print(\"Model parameters:\", f\"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}\")\n",
88 | "print(\"Input resolution:\", input_resolution)\n",
89 | "print(\"Context length:\", context_length)\n",
90 | "print(\"Vocab size:\", vocab_size)"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {
96 | "id": "kmQXf9wrK-6t"
97 | },
98 | "source": [
99 | "# simple_tokenizer の動作確認"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {
106 | "id": "LFyjzC85LKrH"
107 | },
108 | "outputs": [],
109 | "source": [
110 | "# テキストをトークンへ変換1\n",
111 | "index = tokenizer.encode('I ate an apple')\n",
112 | "print(index)"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "metadata": {
119 | "id": "SgmKMg6mV5RA"
120 | },
121 | "outputs": [],
122 | "source": [
123 | "# テキストをトークンへ変換2\n",
124 | "index = tokenizer.encode('image segmentation')\n",
125 | "print(index)"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {
131 | "id": "6H6tX85TKA0n"
132 | },
133 | "source": [
134 | "# 画像の前処理\n"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {
141 | "id": "d6cpiIFHp9N6"
142 | },
143 | "outputs": [],
144 | "source": [
145 | "# --- 画像の前処理 ----\n",
146 | "\n",
147 | "from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize\n",
148 | "from PIL import Image\n",
149 | "import glob\n",
150 | "\n",
151 | "# 設定\n",
152 | "preprocess = Compose([\n",
153 | " Resize(input_resolution, interpolation=Image.BICUBIC),\n",
154 | " CenterCrop(input_resolution),\n",
155 | " ToTensor()\n",
156 | "])\n",
157 | "\n",
158 | "image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()\n",
159 | "image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()\n",
160 | "\n",
161 | "# 前処理実行\n",
162 | "images =[]\n",
163 | "files = glob.glob('./food_101/*.jpg')\n",
164 | "files.sort()\n",
165 | "for file in files:\n",
166 | " image = preprocess(Image.open(file).convert(\"RGB\"))\n",
167 | " images.append(image)\n",
168 | "\n",
169 | "image_input = torch.tensor(np.stack(images)).cuda()\n",
170 | "image_input -= image_mean[:, None, None]\n",
171 | "image_input /= image_std[:, None, None]\n",
172 | "\n",
173 | "print('image_input.shape = ', image_input.shape)"
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {
179 | "id": "L_uKiB2nKQJX"
180 | },
181 | "source": [
182 | "# テキストの前処理"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": null,
188 | "metadata": {
189 | "id": "C4S__zCGy2MT"
190 | },
191 | "outputs": [],
192 | "source": [
193 | "# --- テキストの前処理 ----\n",
194 | "\n",
195 | "# 分類ラベルの設定\n",
196 | "labels = ['takoyaki', 'susi', 'spagetti', 'ramen', 'pizza', 'omelette', 'humburger', 'gyoza']\n",
197 | "\n",
198 | "# ラベルを文の形のトークンへ変換\n",
199 | "text_descriptions = [f\"This is a photo of a {label}\" for label in labels] \n",
200 | "sot_token = tokenizer.encoder['<|startoftext|>']\n",
201 | "eot_token = tokenizer.encoder['<|endoftext|>']\n",
202 | "text_tokens = [[sot_token] + tokenizer.encode(desc) + [eot_token] for desc in text_descriptions]\n",
203 | "text_input = torch.zeros(len(text_tokens), model.context_length, dtype=torch.long)\n",
204 | "\n",
205 | "# トークンをテンソルに変換\n",
206 | "for i, tokens in enumerate(text_tokens):\n",
207 | " text_input[i, :len(tokens)] = torch.tensor(tokens)\n",
208 | "\n",
209 | "text_input = text_input.cuda()"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": null,
215 | "metadata": {
216 | "id": "1tFp8PHOKLdE"
217 | },
218 | "outputs": [],
219 | "source": [
220 | "# 各データの先頭を表示\n",
221 | "print(text_descriptions[0]) \n",
222 | "print(text_tokens[0])\n",
223 | "print(text_input[0])\n",
224 | "print(text_input.shape)"
225 | ]
226 | },
227 | {
228 | "cell_type": "markdown",
229 | "metadata": {
230 | "id": "2u4oePsAKdJO"
231 | },
232 | "source": [
233 | "# 画像とテキストのcos類似度を計算"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": null,
239 | "metadata": {
240 | "id": "iqjF6NbBCT0a"
241 | },
242 | "outputs": [],
243 | "source": [
244 | "# --- 画像とテキストのCOS類似度を計算 ----\n",
245 | "\n",
246 | "# CLIPモデルで画像とテキストの特徴を抽出\n",
247 | "with torch.no_grad():\n",
248 | " image_features = model.encode_image(image_input).float()\n",
249 | " text_features = model.encode_text(text_input).float()\n",
250 | " text_features /= text_features.norm(dim=-1, keepdim=True) \n",
251 | "\n",
252 | "# 画像の特徴とテキストの特徴からCOS類似度を計算\n",
253 | "text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)\n",
254 | "top_probs, top_labels = text_probs.cpu().topk(5, dim=-1)\n",
255 | "\n",
256 | "print(image_features.shape)\n",
257 | "print(text_features.shape)"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": null,
263 | "metadata": {
264 | "id": "EYqMIccpabig"
265 | },
266 | "outputs": [],
267 | "source": [
268 | "# COS類似度の計算結果をそのまま表示\n",
269 | "print(text_probs)"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {
275 | "id": "OIM5PWmSKlVm"
276 | },
277 | "source": [
278 | "# 予測結果の表示"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": null,
284 | "metadata": {
285 | "id": "s5HrieUc34n_"
286 | },
287 | "outputs": [],
288 | "source": [
289 | "# --- 予測結果の表示 ---\n",
290 | "\n",
291 | "import matplotlib.pyplot as plt\n",
292 | "\n",
293 | "def pred_disp(i, image):\n",
294 | " plt.figure(figsize=(8, 4))\n",
295 | " plt.subplot(1, 2, 1)\n",
296 | " plt.imshow(image.permute(1, 2, 0))\n",
297 | " plt.axis(\"off\")\n",
298 | "\n",
299 | " plt.subplot(1, 2, 2)\n",
300 | " y = np.arange(top_probs.shape[-1])\n",
301 | " plt.grid()\n",
302 | " plt.barh(y, top_probs[i])\n",
303 | " plt.gca().invert_yaxis()\n",
304 | " plt.gca().set_axisbelow(True)\n",
305 | " plt.yticks(y, [labels[index] for index in top_labels[i].numpy()])\n",
306 | " plt.xlabel(\"probability\")\n",
307 | "\n",
308 | " plt.subplots_adjust(wspace=0.5)\n",
309 | " plt.show()\n",
310 | "\n",
311 | "for i, image in enumerate(images):\n",
312 | " pred_disp(i, image)"
313 | ]
314 | }
315 | ],
316 | "metadata": {
317 | "accelerator": "GPU",
318 | "colab": {
319 | "collapsed_sections": [],
320 | "include_colab_link": true,
321 | "name": "CLIP_demo",
322 | "provenance": [],
323 | "toc_visible": true
324 | },
325 | "kernelspec": {
326 | "display_name": "Python 3",
327 | "language": "python",
328 | "name": "python3"
329 | },
330 | "language_info": {
331 | "codemirror_mode": {
332 | "name": "ipython",
333 | "version": 3
334 | },
335 | "file_extension": ".py",
336 | "mimetype": "text/x-python",
337 | "name": "python",
338 | "nbconvert_exporter": "python",
339 | "pygments_lexer": "ipython3",
340 | "version": "3.7.9"
341 | }
342 | },
343 | "nbformat": 4,
344 | "nbformat_minor": 1
345 | }
346 |
--------------------------------------------------------------------------------
/DALL_E.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "accelerator": "GPU",
6 | "colab": {
7 | "name": "DALL_E",
8 | "provenance": [],
9 | "collapsed_sections": [],
10 | "toc_visible": true,
11 | "include_colab_link": true
12 | },
13 | "kernelspec": {
14 | "display_name": "Python 3",
15 | "name": "python3"
16 | }
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {
22 | "id": "view-in-github",
23 | "colab_type": "text"
24 | },
25 | "source": [
26 | "
"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "2nD1n0xEBcko"
33 | },
34 | "source": [
35 | "# セットアップ"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "metadata": {
41 | "id": "N65H8lL1cR1V"
42 | },
43 | "source": [
44 | "# GPUスペック確認\n",
45 | "!nvidia-smi -L"
46 | ],
47 | "execution_count": null,
48 | "outputs": []
49 | },
50 | {
51 | "cell_type": "code",
52 | "metadata": {
53 | "id": "F4iTie2EKrbb"
54 | },
55 | "source": [
56 | "# Pytorchバージョン変更\n",
57 | "! pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html \n",
58 | "\n",
59 | "# Pytorch画像処理ライブラリー・インストール\n",
60 | "! pip install kornia==0.5.0\n",
61 | "\n",
62 | "# CLIP関連コードのコピー\n",
63 | "! git clone https://github.com/openai/CLIP.git\n",
64 | "%cd /content/CLIP/\n",
65 | "\n",
66 | "# CLIPのモデル化\n",
67 | "! pip install ftfy regex\n",
68 | "import clip\n",
69 | "model, preprocess = clip.load('ViT-B/32', jit=True) \n",
70 | "model = model.eval() \n",
71 | "\n",
72 | "# DALL-Eのモデル化\n",
73 | "! pip install DALL-E\n",
74 | "from dall_e import map_pixels, unmap_pixels, load_model\n",
75 | "dec = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", 'cuda') \n"
76 | ],
77 | "execution_count": null,
78 | "outputs": []
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {
83 | "id": "KAcixx9Z3XYH"
84 | },
85 | "source": [
86 | "# ライブラリー・インポート&関数定義\n"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "metadata": {
92 | "id": "piJOg9MY7khd"
93 | },
94 | "source": [
95 | "import torch\n",
96 | "import numpy as np\n",
97 | "import torchvision\n",
98 | "import torchvision.transforms.functional as TF\n",
99 | "import torchvision.transforms as T\n",
100 | "import kornia\n",
101 | "import PIL\n",
102 | "import os, io, sys\n",
103 | "import random\n",
104 | "import imageio\n",
105 | "from IPython import display\n",
106 | "from IPython.core.interactiveshell import InteractiveShell\n",
107 | "InteractiveShell.ast_node_interactivity = \"all\"\n",
108 | "from google.colab import output\n",
109 | "import requests\n",
110 | "\n",
111 | "# 初期設定\n",
112 | "im_shape = [512, 512, 3]\n",
113 | "sideX, sideY, channels = im_shape\n",
114 | "target_image_size = sideX\n",
115 | "tau_value = 2.\n",
116 | "\n",
117 | "# 画像表示・保存\n",
118 | "def displ(img):\n",
119 | " img = np.array(img)[:,:,:]\n",
120 | " img = np.transpose(img, (1, 2, 0))\n",
121 | " imageio.imwrite('output.png', np.array(img))\n",
122 | " return display.Image('output.png')\n",
123 | "\n",
124 | "# 画像のランダム切り出し\n",
125 | "def augment(out, cutn=16):\n",
126 | " p_s = []\n",
127 | " for ch in range(cutn):\n",
128 | " sizey = int(torch.zeros(1,).uniform_(.5, .99)*sideY)\n",
129 | " sizex = int(torch.zeros(1,).uniform_(.5, .99)*sideX)\n",
130 | " offsetx = torch.randint(0, sideX - sizex, ())\n",
131 | " offsety = torch.randint(0, sideY - sizey, ())\n",
132 | " apper = out[:, :, offsetx:offsetx + sizex, offsety:offsety + sizey]\n",
133 | " apper = apper + .1*torch.rand(1,1,1,1).cuda()*torch.randn_like(apper, requires_grad=True)\n",
134 | " apper = torch.nn.functional.interpolate(apper, (224,224), mode='bilinear')\n",
135 | " p_s.append(apper)\n",
136 | " into = augs(torch.cat(p_s, 0))\n",
137 | " return into\n",
138 | "\n",
139 | "# 正規化と回転設定\n",
140 | "nom = torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))\n",
141 | "augs = kornia.augmentation.RandomRotation(30).cuda()\n",
142 | "\n",
143 | "# パラメータの設定\n",
144 | "class Pars(torch.nn.Module):\n",
145 | " def __init__(self):\n",
146 | " super(Pars, self).__init__()\n",
147 | " hots = torch.nn.functional.one_hot((torch.arange(0, 8192).to(torch.int64)), num_classes=8192)\n",
148 | " rng = torch.zeros(1, 64*64, 8192).uniform_()\n",
149 | " for i in range(64*64):\n",
150 | " rng[0,i] = hots[[np.random.randint(8191)]]\n",
151 | " rng = rng.permute(0, 2, 1)\n",
152 | " self.normu = torch.nn.Parameter(rng.cuda().view(1, 8192, 64*64))\n",
153 | " \n",
154 | " def forward(self): \n",
155 | " normu = torch.nn.functional.gumbel_softmax(self.normu.reshape(1,64*64,8192), dim=1, tau=tau_value).view(1, 8192, 64, 64)\n",
156 | " return normu \n"
157 | ],
158 | "execution_count": null,
159 | "outputs": []
160 | },
161 | {
162 | "cell_type": "markdown",
163 | "metadata": {
164 | "id": "XaocGDQXz3Zx"
165 | },
166 | "source": [
167 | "# テキストから画像の生成"
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {
173 | "id": "mGWtvVB-arNH"
174 | },
175 | "source": [
176 | "**テキストから特徴ベクトルを抽出**"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "metadata": {
182 | "id": "PGBTOiJqWgZ3"
183 | },
184 | "source": [
185 | "# テキスト入力\n",
186 | "text_input = 'an armchair in the shape of an avocado'\n",
187 | "\n",
188 | "# テキストを特徴ベクトルに変換\n",
189 | "token = clip.tokenize(text_input) \n",
190 | "text_v = model.encode_text(token.cuda()).detach().clone() "
191 | ],
192 | "execution_count": null,
193 | "outputs": []
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {
198 | "id": "WztSrRF23Rqg"
199 | },
200 | "source": [
201 | "**学習**"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "metadata": {
207 | "id": "NwYNUzzovPEW"
208 | },
209 | "source": [
210 | "# パラメータリセット\n",
211 | "latent = Pars().cuda() \n",
212 | "param = [latent.normu] \n",
213 | "optimizer = torch.optim.Adam([{'params': param, 'lr': .01}]) \n",
214 | "\n",
215 | "# images フォルダーリセット\n",
216 | "import os\n",
217 | "import shutil\n",
218 | "if os.path.isdir('images'):\n",
219 | " shutil.rmtree('images')\n",
220 | "os.makedirs('images', exist_ok=True)\n",
221 | "\n",
222 | "# 学習ループ\n",
223 | "for iteration in range(1001):\n",
224 | "\n",
225 | " # --- 順伝播 ---\n",
226 | " # パラメータから画像を生成\n",
227 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3].float()))\n",
228 | " # 画像をランダム切り出し・回転 \n",
229 | " into = augment(out)\n",
230 | " # 画像を正規化\n",
231 | " into = nom((into))\n",
232 | " # 画像から特徴ベクトルを取得\n",
233 | " image_v = model.encode_image(into)\n",
234 | " # テキストと画像の特徴ベクトルのCOS類似度を計算 \n",
235 | " loss = -torch.cosine_similarity(text_v, image_v).mean() \n",
236 | "\n",
237 | " # 逆伝播\n",
238 | " optimizer.zero_grad()\n",
239 | " loss.backward()\n",
240 | " optimizer.step() \n",
241 | "\n",
242 | " # 学習率の調整\n",
243 | " for g in optimizer.param_groups:\n",
244 | " g['lr'] = g['lr']*1.005\n",
245 | " g['lr'] = min(g['lr'], .12)\n",
246 | "\n",
247 | " # ログ表示 \n",
248 | " if iteration % 50 == 0:\n",
249 | " with torch.no_grad():\n",
250 | "\n",
251 | " # 生成画像の表示・保存\n",
252 | " out = unmap_pixels(torch.sigmoid(dec(latent())[:, :3]).float()) \n",
253 | " displ(out.cpu()[0]) \n",
254 | " shutil.copy('output.png', './images/%s.png'%str(int(iteration/50)).zfill(6))\n",
255 | "\n",
256 | " # データ表示\n",
257 | " print('iter = ',iteration)\n",
258 | " for g in optimizer.param_groups:\n",
259 | " print('lr = ', g['lr'])\n",
260 | " print('tau_value = ', tau_value)\n",
261 | " print('loss = ',loss.item())\n",
262 | " print('\\n')\n"
263 | ],
264 | "execution_count": null,
265 | "outputs": []
266 | },
267 | {
268 | "cell_type": "markdown",
269 | "metadata": {
270 | "id": "zclMtW3CaSNX"
271 | },
272 | "source": [
273 | "# 学習過程の動画作成"
274 | ]
275 | },
276 | {
277 | "cell_type": "markdown",
278 | "metadata": {
279 | "id": "2zxCTHkJbBD9"
280 | },
281 | "source": [
282 | "**mp4動画の作成**"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "metadata": {
288 | "id": "BcIHq0zsY8OC"
289 | },
290 | "source": [
291 | "# images フォルダーの最後の画像を5枚コピー\n",
292 | "import shutil\n",
293 | "for i in range(21,26,1):\n",
294 | " shutil.copy('output.png', './images/%s.png'%str(int(i)).zfill(6))\n",
295 | "\n",
296 | "# ouput.mp4を一旦削除\n",
297 | "import os \n",
298 | "if os.path.exists('./output.mp4'):\n",
299 | " os.remove('./output.mp4')\n",
300 | "\n",
301 | "# images フォルダーの画像から動画を生成\n",
302 | "! ffmpeg -r 5 -i images/%06d.png -vcodec libx264 -pix_fmt yuv420p output.mp4"
303 | ],
304 | "execution_count": null,
305 | "outputs": []
306 | },
307 | {
308 | "cell_type": "markdown",
309 | "metadata": {
310 | "id": "IzoB1G26bFwX"
311 | },
312 | "source": [
313 | "**mp4動画の再生**"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "metadata": {
319 | "id": "n1rs5DnwZvuh"
320 | },
321 | "source": [
322 | "from IPython.display import HTML\n",
323 | "from base64 import b64encode\n",
324 | " \n",
325 | "mp4 = open('./output.mp4', 'rb').read()\n",
326 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n",
327 | "HTML(f\"\"\"\n",
328 | "\"\"\")"
331 | ],
332 | "execution_count": null,
333 | "outputs": []
334 | }
335 | ]
336 | }
337 |
--------------------------------------------------------------------------------
/DALL_e_sample.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "df134_Y0L9Wv"
17 | },
18 | "source": [
19 | "# SetUP"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "ILu1-B-xLPqx"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "!nvidia-smi -L"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": null,
36 | "metadata": {
37 | "id": "EKnjNf_TLf5g"
38 | },
39 | "outputs": [],
40 | "source": [
41 | "!pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html ftfy regex\n",
42 | "!pip install DALL-E\n",
43 | "!pip install ftfy\n",
44 | "!git clone https://github.com/openai/CLIP.git\n",
45 | "%cd /content/CLIP/"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {
51 | "id": "92iGUGG8MGE2"
52 | },
53 | "source": [
54 | "# Import Library & Define"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {
61 | "id": "qiKjXi57Lic9"
62 | },
63 | "outputs": [],
64 | "source": [
65 | "import torch\n",
66 | "import numpy as np\n",
67 | "import torchvision\n",
68 | "import torchvision.transforms.functional as TF\n",
69 | "import PIL\n",
70 | "import matplotlib.pyplot as plt\n",
71 | "import os\n",
72 | "import random\n",
73 | "import imageio\n",
74 | "from IPython import display\n",
75 | "from IPython.core.interactiveshell import InteractiveShell\n",
76 | "import glob\n",
77 | "from google.colab import output\n",
78 | "InteractiveShell.ast_node_interactivity = \"all\"\n",
79 | "\n",
80 | "# probably don't mess with this unless you're changing generator size\n",
81 | "im_shape = [512, 512, 3]\n",
82 | "sideX, sideY, channels = im_shape\n",
83 | "\n",
84 | "def displ(img, pre_scaled=True):\n",
85 | " img = np.array(img)[:,:,:]\n",
86 | " img = np.transpose(img, (1, 2, 0))\n",
87 | " if not pre_scaled:\n",
88 | " img = scale(img, 48*4, 32*4)\n",
89 | " imageio.imwrite(str(3) + '.png', np.array(img))\n",
90 | " return display.Image(str(3)+'.png')\n",
91 | "\n",
92 | "def gallery(array, ncols=2):\n",
93 | " nindex, height, width, intensity = array.shape\n",
94 | " nrows = nindex//ncols\n",
95 | " assert nindex == nrows*ncols\n",
96 | " # want result.shape = (height*nrows, width*ncols, intensity)\n",
97 | " result = (array.reshape(nrows, ncols, height, width, intensity)\n",
98 | " .swapaxes(1,2)\n",
99 | " .reshape(height*nrows, width*ncols, intensity))\n",
100 | " return result\n",
101 | "\n",
102 | "def card_padded(im, to_pad=3):\n",
103 | " return np.pad(np.pad(np.pad(im, [[1,1], [1,1], [0,0]],constant_values=0), [[2,2], [2,2], [0,0]],constant_values=1),\n",
104 | " [[to_pad,to_pad], [to_pad,to_pad], [0,0]],constant_values=0)\n",
105 | "\n",
106 | "def get_all(img):\n",
107 | " img = np.transpose(img, (0,2,3,1))\n",
108 | " cards = np.zeros((img.shape[0], sideX+12, sideY+12, 3))\n",
109 | " for i in range(len(img)):\n",
110 | " cards[i] = card_padded(img[i])\n",
111 | " print(img.shape)\n",
112 | " cards = gallery(cards)\n",
113 | " imageio.imwrite(str(3) + '.png', np.array(cards))\n",
114 | " return display.Image(str(3)+'.png')"
115 | ]
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {
120 | "id": "kiERo_W-MN0e"
121 | },
122 | "source": [
123 | "# Perceptor"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {
130 | "id": "Yx6ejrn2LnUO"
131 | },
132 | "outputs": [],
133 | "source": [
134 | "import clip\n",
135 | "clip.available_models()\n",
136 | "\n",
137 | "# Load the model\n",
138 | "perceptor, preprocess = clip.load('ViT-B/32', jit=True)\n",
139 | "perceptor = perceptor.eval()"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {
145 | "id": "j6SO-LlEMRm2"
146 | },
147 | "source": [
148 | "# Generator"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "metadata": {
155 | "id": "2X2_bsdnLp7Y"
156 | },
157 | "outputs": [],
158 | "source": [
159 | "import io\n",
160 | "import os, sys\n",
161 | "import requests\n",
162 | "import PIL\n",
163 | "import torch\n",
164 | "import torchvision.transforms as T\n",
165 | "import torchvision.transforms.functional as TF\n",
166 | "from dall_e import map_pixels, unmap_pixels, load_model\n",
167 | "\n",
168 | "target_image_size = sideX\n",
169 | "\n",
170 | "def preprocess(img):\n",
171 | " s = min(img.size)\n",
172 | " \n",
173 | " if s < target_image_size:\n",
174 | " raise ValueError(f'min dim for image {s} < {target_image_size}')\n",
175 | " \n",
176 | " r = target_image_size / s\n",
177 | " s = (round(r * img.size[1]), round(r * img.size[0]))\n",
178 | " img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)\n",
179 | " img = TF.center_crop(img, output_size=2 * [target_image_size])\n",
180 | " img = torch.unsqueeze(T.ToTensor()(img), 0)\n",
181 | " return map_pixels(img)\n",
182 | "\n",
183 | "model = load_model(\"https://cdn.openai.com/dall-e/decoder.pkl\", 'cuda')"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {
189 | "id": "IrZovePXMUsM"
190 | },
191 | "source": [
192 | "# Text input"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": null,
198 | "metadata": {
199 | "id": "QkRilIfDLtYP"
200 | },
201 | "outputs": [],
202 | "source": [
203 | "text_input = \"a beautiful and mysterious castle designed by Escher\" \n",
204 | "tau_value =1.2"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {
210 | "id": "0QO-DW36MYM4"
211 | },
212 | "source": [
213 | "# Latent coordinate"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {
220 | "id": "FHhEmANLLwKF"
221 | },
222 | "outputs": [],
223 | "source": [
224 | "class Pars(torch.nn.Module):\n",
225 | " def __init__(self):\n",
226 | " super(Pars, self).__init__()\n",
227 | " self.normu = torch.nn.Parameter(torch.randn(1, 8192, 64, 64).cuda())\n",
228 | "\n",
229 | " def forward(self):\n",
230 | " # normu = torch.nn.functional.gumbel_softmax(self.normu.view(1, 8192, -1), dim=-1).view(1, 8192, 64, 64)\n",
231 | " normu = torch.nn.functional.gumbel_softmax(self.normu.view(1, 8192, -1), dim=-1, tau=tau_value).view(1, 8192, 64, 64)\n",
232 | " return normu\n",
233 | "\n",
234 | "lats = Pars().cuda()\n",
235 | "mapper = [lats.normu]\n",
236 | "optimizer = torch.optim.Adam([{'params': mapper, 'lr': .1}])\n",
237 | "eps = 0\n",
238 | "tx = clip.tokenize(text_input)\n",
239 | "t = perceptor.encode_text(tx.cuda()).detach().clone()\n",
240 | "nom = torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))\n",
241 | "\n",
242 | "with torch.no_grad():\n",
243 | " mult = 1\n",
244 | " al = unmap_pixels(torch.sigmoid(model(lats()).cpu().float())).numpy()\n",
245 | " for allls in al:\n",
246 | " displ(allls[:3])\n",
247 | " print('\\n')\n",
248 | " # print(torch.topk(lats().view(1, 8192, -1), k=3, dim=-1))"
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "metadata": {
254 | "id": "e6rIEvpQMgyd"
255 | },
256 | "source": [
257 | "# Train"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": null,
263 | "metadata": {
264 | "id": "m4GLJKWaLzIg"
265 | },
266 | "outputs": [],
267 | "source": [
268 | "def checkin(loss):\n",
269 | " print('''########################################################## ''',loss, '\\n',itt)\n",
270 | " \n",
271 | " with torch.no_grad():\n",
272 | " al = unmap_pixels(torch.sigmoid(model(lats())[:, :3]).cpu().float()).numpy()\n",
273 | " for allls in al:\n",
274 | " displ(allls)\n",
275 | " display.display(display.Image(str(3)+'.png'))\n",
276 | " print('\\n')\n",
277 | " # the people spoke and they love \"ding\"\n",
278 | " # output.eval_js('new Audio(\"https://freesound.org/data/previews/80/80921_1022651-lq.ogg\").play()')\n",
279 | "\n",
280 | "def ascend_txt():\n",
281 | " out = unmap_pixels(torch.sigmoid(model(lats())[:, :3].float()))\n",
282 | " cutn = 64 # improves quality\n",
283 | " p_s = []\n",
284 | " for ch in range(cutn):\n",
285 | " size = int(sideX*torch.zeros(1,).normal_(mean=.8, std=.3).clip(.5, .98))\n",
286 | " offsetx = torch.randint(0, sideX - size, ())\n",
287 | " offsety = torch.randint(0, sideX - size, ())\n",
288 | " apper = out[:, :, offsetx:offsetx + size, offsety:offsety + size]\n",
289 | " apper = torch.nn.functional.interpolate(apper, (224,224), mode='bilinear')\n",
290 | " p_s.append(apper)\n",
291 | " into = torch.cat(p_s, 0)\n",
292 | " # into = torch.nn.functional.interpolate(out, (224,224), mode='nearest')\n",
293 | " into = nom(into)\n",
294 | " iii = perceptor.encode_image(into)\n",
295 | " llls = lats()\n",
296 | " lat_l = 0\n",
297 | " return [lat_l, 10*-torch.cosine_similarity(t, iii).view(-1, 1).T.mean(1)]\n",
298 | "\n",
299 | "def train(i):\n",
300 | " loss1 = ascend_txt()\n",
301 | " loss = loss1[0] + loss1[1]\n",
302 | " loss = loss.mean()\n",
303 | " optimizer.zero_grad()\n",
304 | " loss.backward()\n",
305 | " optimizer.step()\n",
306 | " \n",
307 | " if itt % 100 == 0:\n",
308 | " checkin(loss1)\n",
309 | " shutil.copy('./3.png', './images/%s.png'%str(int(itt/100)).zfill(6))\n",
310 | "\n",
311 | "import shutil\n",
312 | "\n",
313 | "if os.path.isdir('images'):\n",
314 | " shutil.rmtree('images')\n",
315 | "os.makedirs('images', exist_ok=True)\n",
316 | "\n",
317 | "itt = 0\n",
318 | "for asatreat in range(1100):\n",
319 | " train(itt)\n",
320 | " itt+=1"
321 | ]
322 | },
323 | {
324 | "cell_type": "markdown",
325 | "metadata": {
326 | "id": "tSr7K9KYMkOL"
327 | },
328 | "source": [
329 | "# Make movie"
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": null,
335 | "metadata": {
336 | "id": "RVpbCMthL33Z"
337 | },
338 | "outputs": [],
339 | "source": [
340 | "if os.path.exists('./output.mp4'):\n",
341 | " os.remove('./output.mp4')\n",
342 | "\n",
343 | "!ffmpeg -r 2 -i images/%06d.png -vcodec libx264 -pix_fmt yuv420p output.mp4"
344 | ]
345 | }
346 | ],
347 | "metadata": {
348 | "accelerator": "GPU",
349 | "colab": {
350 | "authorship_tag": "ABX9TyMFIG/v1z8z/bWEAoh9r2qK",
351 | "include_colab_link": true,
352 | "name": "DALL_e_sample",
353 | "provenance": []
354 | },
355 | "kernelspec": {
356 | "display_name": "Python 3",
357 | "language": "python",
358 | "name": "python3"
359 | },
360 | "language_info": {
361 | "codemirror_mode": {
362 | "name": "ipython",
363 | "version": 3
364 | },
365 | "file_extension": ".py",
366 | "mimetype": "text/x-python",
367 | "name": "python",
368 | "nbconvert_exporter": "python",
369 | "pygments_lexer": "ipython3",
370 | "version": "3.7.9"
371 | }
372 | },
373 | "nbformat": 4,
374 | "nbformat_minor": 1
375 | }
376 |
--------------------------------------------------------------------------------
/SwapAE.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "SwapAE",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyPjOM3j/DkMSCkMdDsEhQV7",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "language_info": {
16 | "name": "python"
17 | },
18 | "accelerator": "GPU"
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": null,
34 | "metadata": {
35 | "id": "1fEyCmb3Drl2"
36 | },
37 | "outputs": [],
38 | "source": [
39 | "# githubからコードをコピー\n",
40 | "! git clone https://github.com/bryandlee/naver-webtoon-faces.git\n",
41 | "%cd naver-webtoon-faces\n",
42 | " \n",
43 | "# 学習済みパラメータのダウンロード\n",
44 | "! pip install --upgrade gdown\n",
45 | "import gdown\n",
46 | "gdown.download('https://drive.google.com/uc?id=1gJ5WPFQIN26xYbujrEAKxG7YduE9S6ch', './checkpoint.zip', quiet=False)\n",
47 | "! unzip checkpoint.zip\n",
48 | " \n",
49 | "# resultsフォルダーを作成\n",
50 | "import os\n",
51 | "os.makedirs('results', exist_ok=True)"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "source": [
57 | "# 関数定義\n",
58 | "import os\n",
59 | "import cv2\n",
60 | "import matplotlib.pyplot as plt\n",
61 | "import torch\n",
62 | "import random\n",
63 | "import numpy as np\n",
64 | "from tqdm import tqdm\n",
65 | " \n",
66 | "def load_image(path, size):\n",
67 | " image = image2tensor(cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB))\n",
68 | " \n",
69 | " w, h = image.shape[-2:]\n",
70 | " if w != h:\n",
71 | " crop_size = min(w, h)\n",
72 | " left = (w - crop_size)//2\n",
73 | " right = left + crop_size\n",
74 | " top = (h - crop_size)//2\n",
75 | " bottom = top + crop_size\n",
76 | " image = image[:,:,left:right, top:bottom]\n",
77 | " \n",
78 | " if image.shape[-1] != size:\n",
79 | " image = torch.nn.functional.interpolate(image, (size, size), mode=\"bilinear\", align_corners=True)\n",
80 | " \n",
81 | " return image\n",
82 | " \n",
83 | "def image2tensor(image):\n",
84 | " image = torch.FloatTensor(image).permute(2,0,1).unsqueeze(0)/255.\n",
85 | " return (image-0.5)/0.5\n",
86 | " \n",
87 | "def tensor2image(tensor):\n",
88 | " tensor = tensor.clamp(-1., 1.).detach().squeeze().permute(1,2,0).cpu().numpy()\n",
89 | " return tensor*0.5 + 0.5\n",
90 | " \n",
91 | "def imshow(img, size=5, cmap='jet'):\n",
92 | " plt.figure(figsize=(size,size))\n",
93 | " plt.imshow(img, cmap=cmap)\n",
94 | " plt.axis('off')\n",
95 | " plt.show()\n",
96 | " \n",
97 | "def horizontal_concat(imgs):\n",
98 | " return torch.cat([img.unsqueeze(0) for img in imgs], 3) \n",
99 | " \n",
100 | "device = 'cuda:0'\n",
101 | "image_size = 256\n",
102 | "torch.set_grad_enabled(False)"
103 | ],
104 | "metadata": {
105 | "id": "9Unj3DF1DzBT"
106 | },
107 | "execution_count": null,
108 | "outputs": []
109 | },
110 | {
111 | "cell_type": "code",
112 | "source": [
113 | "# SwapAEモデルのロード\n",
114 | "from model import Encoder, Generator\n",
115 | " \n",
116 | "ae_model_path = './checkpoint/002000.pt'\n",
117 | " \n",
118 | "encoder = Encoder(32).to(device)\n",
119 | "generator = Generator(32).to(device)\n",
120 | " \n",
121 | "ckpt = torch.load(ae_model_path, map_location=device)\n",
122 | "encoder.load_state_dict(ckpt[\"e_ema\"])\n",
123 | "generator.load_state_dict(ckpt[\"g_ema\"])\n",
124 | " \n",
125 | "encoder.eval()\n",
126 | "generator.eval()\n",
127 | " \n",
128 | "print(f'[SwapAE model loaded] {ae_model_path}')"
129 | ],
130 | "metadata": {
131 | "id": "GK-YnIxcD6ri"
132 | },
133 | "execution_count": null,
134 | "outputs": []
135 | },
136 | {
137 | "cell_type": "code",
138 | "source": [
139 | "from stylegan2.model import Generator as StyleGAN\n",
140 | " \n",
141 | "stylegan_model_path = './checkpoint/stylegan2-naverwebtoon-800k.pt'\n",
142 | "stylegan_ckpt = torch.load(stylegan_model_path, map_location=device)\n",
143 | " \n",
144 | "latent_dim = stylegan_ckpt['args'].latent\n",
145 | " \n",
146 | "stylegan = StyleGAN(image_size, latent_dim, 8).to(device)\n",
147 | "stylegan.load_state_dict(stylegan_ckpt[\"g_ema\"], strict=False)\n",
148 | "stylegan.eval()\n",
149 | "print(f'[StyleGAN2 generator loaded] {stylegan_model_path}\\n')\n",
150 | " \n",
151 | "truncation = 0.7\n",
152 | "trunc = stylegan.mean_latent(4096).detach().clone()\n",
153 | " \n",
154 | "num_samples = 8\n",
155 | " \n",
156 | "latent = stylegan.get_latent(torch.randn(num_samples, latent_dim, device=device))\n",
157 | "imgs_gen, _ = stylegan([latent],\n",
158 | " truncation=truncation,\n",
159 | " truncation_latent=trunc,\n",
160 | " input_is_latent=True,\n",
161 | " randomize_noise=True)\n",
162 | " \n",
163 | "print(\"StyleGAN2 generated images:\")\n",
164 | "imshow(tensor2image(horizontal_concat(imgs_gen)), size=20)\n",
165 | " \n",
166 | "structures, textures = encoder(imgs_gen)\n",
167 | "recon_results = generator(structures, textures)\n",
168 | " \n",
169 | "print(\"SwapAE reconstructions:\") \n",
170 | "imshow(tensor2image(horizontal_concat(recon_results)), size=20)\n",
171 | " \n",
172 | "print(\"Swapping results:\") \n",
173 | "swap_results = generator(structures, textures[0].unsqueeze(0).repeat(num_samples,1))\n",
174 | "imshow(tensor2image(horizontal_concat(swap_results)), size=20)"
175 | ],
176 | "metadata": {
177 | "id": "N5ZQWGNpD-2L"
178 | },
179 | "execution_count": null,
180 | "outputs": []
181 | },
182 | {
183 | "cell_type": "code",
184 | "source": [
185 | "test_image_path = \"./inputs/6.jpg\"\n",
186 | "test_image = load_image(test_image_path, image_size)\n",
187 | " \n",
188 | "num_styles = 5\n",
189 | " \n",
190 | "latent = stylegan.get_latent(torch.randn(num_styles, latent_dim, device=device))\n",
191 | "imgs_gen, _ = stylegan([latent],\n",
192 | " truncation=truncation,\n",
193 | " truncation_latent=trunc,\n",
194 | " input_is_latent=True,\n",
195 | " randomize_noise=True)\n",
196 | " \n",
197 | "inputs = torch.cat([test_image.to(device), imgs_gen])\n",
198 | " \n",
199 | "results = horizontal_concat(inputs.cpu())\n",
200 | " \n",
201 | "structures, target_textures = encoder(inputs)\n",
202 | " \n",
203 | "structure = structures[0].unsqueeze(0).repeat(len(target_textures),1,1,1)\n",
204 | "source_texture = target_textures[0].unsqueeze(0).repeat(len(target_textures),1)\n",
205 | " \n",
206 | "for swap_loc in [1, 5]:\n",
207 | " textures = [source_texture for _ in range(swap_loc)] + [target_textures for _ in range(len(generator.layers) - swap_loc)] \n",
208 | " fake_imgs = generator(structure, textures, noises=0)\n",
209 | " \n",
210 | " results = torch.cat([results, horizontal_concat(fake_imgs).cpu()], dim=2)\n",
211 | " \n",
212 | "imshow(tensor2image(results), 23)\n",
213 | " \n",
214 | "cv2.imwrite('./results/out.jpg', cv2.cvtColor(255*tensor2image(results), cv2.COLOR_BGR2RGB))"
215 | ],
216 | "metadata": {
217 | "id": "1L8XDxnSEG5i"
218 | },
219 | "execution_count": null,
220 | "outputs": []
221 | },
222 | {
223 | "cell_type": "code",
224 | "source": [
225 | "import imageio\n",
226 | " \n",
227 | "swap_loc = 1\n",
228 | " \n",
229 | "num_anchors = 10\n",
230 | "num_interp = 20\n",
231 | "anchors = stylegan.get_latent(torch.randn(num_anchors, 512, device=device))\n",
232 | " \n",
233 | "photo_input = test_image.to(device)\n",
234 | "ori_structure, ori_textures = encoder(photo_input)\n",
235 | " \n",
236 | "black_image = torch.zeros_like(test_image)\n",
237 | " \n",
238 | "with imageio.get_writer('results/exploration.gif', mode='I', duration=0.05, palettesize=256, subrectangles=False) as writer:\n",
239 | " \n",
240 | " for i in tqdm(range(num_anchors-1)):\n",
241 | " initial = anchors[i]\n",
242 | " final = anchors[i+1]\n",
243 | " \n",
244 | " for j in range(num_interp):\n",
245 | " latent = (float(num_interp-j) * initial + float(j) * final)/num_interp\n",
246 | " \n",
247 | " gen_img, _ = stylegan([latent],\n",
248 | " truncation=truncation,\n",
249 | " truncation_latent=trunc,\n",
250 | " input_is_latent=True,\n",
251 | " randomize_noise=True)\n",
252 | " \n",
253 | " _, target_texture = encoder(gen_img)\n",
254 | " textures = [ori_textures for _ in range(swap_loc)] + [target_texture for _ in range(len(generator.layers) - swap_loc)]\n",
255 | " swap_img = generator(ori_structure, textures, noises=0)\n",
256 | " \n",
257 | " result = torch.cat([black_image, gen_img.cpu()], 3)\n",
258 | " result = torch.cat([\n",
259 | " result,\n",
260 | " torch.cat([test_image, swap_img.cpu()], 3)\n",
261 | " ], 2)\n",
262 | " \n",
263 | " writer.append_data((tensor2image(result)*255).astype(np.uint8))\n",
264 | " \n",
265 | "# output.mp4をリセット\n",
266 | "if os.path.exists('./output.mp4'):\n",
267 | " os.remove('./output.mp4')\n",
268 | " \n",
269 | "# GIFからmp4を作成\n",
270 | "! ffmpeg -i results/exploration.gif -movflags faststart -pix_fmt yuv420p -vf \"scale=trunc(iw/2)*2:trunc(ih/2)*2\" output.mp4"
271 | ],
272 | "metadata": {
273 | "id": "iWz354d1ERBa"
274 | },
275 | "execution_count": null,
276 | "outputs": []
277 | },
278 | {
279 | "cell_type": "code",
280 | "source": [
281 | "# mp4動画の再生\n",
282 | "from IPython.display import HTML\n",
283 | "from base64 import b64encode\n",
284 | " \n",
285 | "mp4 = open('./output.mp4', 'rb').read()\n",
286 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n",
287 | "HTML(f\"\"\"\n",
288 | "\"\"\")"
291 | ],
292 | "metadata": {
293 | "id": "MCD2bnp_EkCb"
294 | },
295 | "execution_count": null,
296 | "outputs": []
297 | }
298 | ]
299 | }
--------------------------------------------------------------------------------
/VideoPose3D.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "vcUFywJRYlk0"
17 | },
18 | "source": [
19 | "# **Install Pytorch & Caffe2**"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {
26 | "id": "yBHHFFzAagmJ"
27 | },
28 | "outputs": [],
29 | "source": [
30 | "!wget https://anaconda.org/pytorch/pytorch/1.2.0/download/linux-64/pytorch-1.2.0-py3.6_cuda10.0.130_cudnn7.6.2_0.tar.bz2\n",
31 | "!tar xvjf pytorch-1.2.0-py3.6_cuda10.0.130_cudnn7.6.2_0.tar.bz2\n",
32 | "!cp -r lib/python3.6/site-packages/* /usr/local/lib/python3.6/dist-packages/"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {
38 | "id": "qECOVZuJZM5i"
39 | },
40 | "source": [
41 | "# check if Caffe2 was build"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "metadata": {
48 | "id": "3SQkqTLzbjWC"
49 | },
50 | "outputs": [],
51 | "source": [
52 | "# To check if Caffe2 build was successful\n",
53 | "!python -c 'from caffe2.python import core' 2>/dev/null && echo \"Success\" || echo \"Failure\"\n",
54 | "\n",
55 | "# To check if Caffe2 GPU build was successful\n",
56 | "!python -c 'from caffe2.python import workspace; print(workspace.NumCudaDevices())'"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {
62 | "id": "iDxNoOIgZn4y"
63 | },
64 | "source": [
65 | "# Install COCO Dataset "
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "id": "PoEjPt55bqix"
73 | },
74 | "outputs": [],
75 | "source": [
76 | "!apt-get install python-dev\n",
77 | "!pip install cython\n",
78 | "!pip install pycocotools\n",
79 | "!git clone https://github.com/cocodataset/cocoapi.git\n",
80 | "!cd cocoapi/PythonAPI && make install\n",
81 | "\n",
82 | "import os\n",
83 | "os.environ['COCOAPI'] = \":/content/cocoapi\""
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {
89 | "id": "RAVpAn6EZv3b"
90 | },
91 | "source": [
92 | "# Install Detectron"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": null,
98 | "metadata": {
99 | "id": "kZQYD_SKbw0O"
100 | },
101 | "outputs": [],
102 | "source": [
103 | "!git clone https://github.com/facebookresearch/detectron\n",
104 | "!pip install -r detectron/requirements.txt\n",
105 | "!cd detectron && make\n",
106 | "!python detectron/detectron/tests/test_spatial_narrow_as_op.py"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {
112 | "id": "FQOxtKmWZ5Ma"
113 | },
114 | "source": [
115 | "# Install VideoPose3D & Copy Video Script to Detectron Tools Folder"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "metadata": {
122 | "id": "6vRI4Rn3js85"
123 | },
124 | "outputs": [],
125 | "source": [
126 | "#copy file from VideoPose3d\n",
127 | "!git clone https://github.com/facebookresearch/VideoPose3D\n",
128 | "!cp VideoPose3D/inference/infer_video.py detectron/tools/infer_video.py"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {
134 | "id": "GSDS81gsaNex"
135 | },
136 | "source": [
137 | "# Download Pretrained Human3.6m Coco Model"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": null,
143 | "metadata": {
144 | "id": "av6RLcyPmuSH"
145 | },
146 | "outputs": [],
147 | "source": [
148 | "!mkdir VideoPose3D/checkpoint\n",
149 | "os.chdir('VideoPose3D/checkpoint')\n",
150 | "!wget https://dl.fbaipublicfiles.com/video-pose-3d/pretrained_h36m_detectron_coco.bin\n",
151 | "os.chdir('../..')"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {
157 | "id": "popZ3evNaffh"
158 | },
159 | "source": [
160 | "# Download Youtube Video for 3D Pose Estimation (specify YOUTUBE_ID)"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": null,
166 | "metadata": {
167 | "id": "rDq3zWIfTCaj"
168 | },
169 | "outputs": [],
170 | "source": [
171 | "YOUTUBE_ID ='cgHZJiyWKIY'\n",
172 | "\n",
173 | "\n",
174 | "!pip install -q youtube-dl\n",
175 | "#download video\n",
176 | "!youtube-dl -f 'bestvideo[ext=mp4]' --output \"youtube.%(ext)s\" https://www.youtube.com/watch?v=$YOUTUBE_ID\n",
177 | "\n",
178 | "!mkdir videos \n",
179 | " \n",
180 | "# cut the 14 seconds\n",
181 | "!ffmpeg -y -loglevel info -i youtube.mp4 -ss 00:00:48 -t 00:00:14 videos/video.mp4\n",
182 | "\n",
183 | "# recommendet alternation to 50fps \n",
184 | "#!ffmpeg -i videos/video.mp4 -filter \"minterpolate='fps=50'\" -crf 0 videos/video50fps.mp4"
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {
190 | "id": "2mN3zRgbbKxK"
191 | },
192 | "source": [
193 | "# Compute 2D Coordinates with Detectron"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": null,
199 | "metadata": {
200 | "id": "2inleQL4Y4qg"
201 | },
202 | "outputs": [],
203 | "source": [
204 | "!mkdir output\n",
205 | "!python detectron/tools/infer_video.py \\\n",
206 | " --cfg detectron/configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml \\\n",
207 | " --output-dir output \\\n",
208 | " --image-ext mp4 \\\n",
209 | " --wts https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl \\\n",
210 | " videos\n",
211 | " \n",
212 | "#\t --wts https://dl.fbaipublicfiles.com/detectron/37698009/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml.08_45_57.YkrJgP6O/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl \\\n",
213 | "\n",
214 | " "
215 | ]
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {
220 | "id": "lkedL1VTb_go"
221 | },
222 | "source": [
223 | "# Prepare Detectron Output to fit VideoPose3D Input"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {
230 | "id": "QHrkZReqb2er"
231 | },
232 | "outputs": [],
233 | "source": [
234 | "\n",
235 | "!mkdir ./VideoPose3D/data/detectronoutput\n",
236 | "!cp output/video.mp4.npz VideoPose3D/data/detectronoutput/video.mp4.npz\n",
237 | "os.chdir('VideoPose3D/data') # This script must be launched from the \"data\" directory\n",
238 | "!python prepare_data_2d_custom.py -i detectronoutput -o myvideos\n",
239 | "os.chdir('../../')"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {
245 | "id": "ktOkXbNbbiH4"
246 | },
247 | "source": [
248 | "# Compute 3D Joints with VideoPose3D"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": null,
254 | "metadata": {
255 | "id": "J7Kp5czTfdRZ"
256 | },
257 | "outputs": [],
258 | "source": [
259 | "#os.chdir('../')\n",
260 | "\n",
261 | "#os.chdir('checkpoint')\n",
262 | "#!wget https://dl.fbaipublicfiles.com/video-pose-3d/pretrained_h36m_cpn.bin\n",
263 | "##!wget https://dl.fbaipublicfiles.com/video-pose-3d/d-pt-243.bin\n",
264 | "\n",
265 | "!cp ./videos/video.mp4 VideoPose3D/video.mp4\n",
266 | "os.chdir('VideoPose3D')\n",
267 | "\n",
268 | "\n",
269 | "#!python run.py -d custom -k MyCustomDatasetName -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject S1 --viz-action custom --viz-camera 0 --viz-export My3dDataExport --viz-size 6\n",
270 | "#!python run.py -d custom -k MyCustomDatasetName -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject video.mp4 --viz-action custom --viz-camera 0 --viz-video video.mp4 --viz-output output.mp4 --viz-size 6\n",
271 | "\n",
272 | "#!python run.py -e 80 -k gt -arc 3,3,3,3,3\n",
273 | "\n",
274 | "\n",
275 | "#!python run.py -d custom -k myvideos -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --viz-export My3dDataExport\n",
276 | "\n",
277 | "#!python run.py -d custom -k myvideos -arc 3,3,3,3,3 -c checkpoint --evaluate d-pt-243.bin --render --viz-subject video.mp4 --viz-action Directions --viz-video video.mp4 --viz-camera 0 --viz-output output_scater.mp4 --viz-size 5 --viz-downsample 1 --viz-skip 9\n",
278 | "\n",
279 | "#!python run.py -d custom -k myvideos -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject video.mp4 --viz-action custom --viz-camera 0 --viz-video video.mp4 --viz-output output.mp4 --viz-size 6\n",
280 | "!python run.py -d custom -k myvideos -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_detectron_coco.bin --render --viz-subject video.mp4 --viz-action custom --viz-camera 0 --viz-video video.mp4 --viz-output output.mp4 --viz-export outputfile --viz-size 6\n",
281 | "\n",
282 | "#working version \n",
283 | "#!python run.py -k gt -arc 3,3,3,3,3 -c checkpoint --evaluate pretrained_h36m_cpn.bin --viz-export My3dDataExport --viz-output output.mp4"
284 | ]
285 | },
286 | {
287 | "cell_type": "markdown",
288 | "metadata": {
289 | "id": "Vd_nbXLfcJzY"
290 | },
291 | "source": [
292 | "# Display Results - Joint Export "
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": null,
298 | "metadata": {
299 | "id": "mwHtOf9EE3Lg"
300 | },
301 | "outputs": [],
302 | "source": [
303 | "#inspect joints export \n",
304 | "\n",
305 | "import numpy as np\n",
306 | "data = np.load('outputfile.npy')\n",
307 | "lst = data\n",
308 | "for item in lst:\n",
309 | " print(item)\n",
310 | " \n"
311 | ]
312 | },
313 | {
314 | "cell_type": "markdown",
315 | "metadata": {
316 | "id": "wjDTuELfcVuZ"
317 | },
318 | "source": [
319 | "# Display Results - Joint Video"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": null,
325 | "metadata": {
326 | "id": "rbImoAtIRxdu"
327 | },
328 | "outputs": [],
329 | "source": [
330 | "#display video\n",
331 | "def show_local_mp4_video(file_name, width=640, height=480):\n",
332 | " import io\n",
333 | " import base64\n",
334 | " from IPython.display import HTML\n",
335 | " video_encoded = base64.b64encode(io.open(file_name, 'rb').read())\n",
336 | " return HTML(data=''''''.format(width, height, video_encoded.decode('ascii')))\n",
339 | "\n",
340 | "show_local_mp4_video('output.mp4', width=960, height=720)"
341 | ]
342 | },
343 | {
344 | "cell_type": "markdown",
345 | "metadata": {
346 | "id": "Yhw2Pe_HceM7"
347 | },
348 | "source": [
349 | "#Download Joint Export & Video"
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": null,
355 | "metadata": {
356 | "id": "Ljh6cuahb7kW"
357 | },
358 | "outputs": [],
359 | "source": [
360 | "from google.colab import files\n",
361 | "\n",
362 | "\n",
363 | "files.download('output.mp4')\n",
364 | "files.download('outputfile.npy')"
365 | ]
366 | }
367 | ],
368 | "metadata": {
369 | "accelerator": "GPU",
370 | "colab": {
371 | "collapsed_sections": [],
372 | "include_colab_link": true,
373 | "name": "VideoPose3D",
374 | "provenance": [],
375 | "toc_visible": true
376 | },
377 | "kernelspec": {
378 | "display_name": "Python 3",
379 | "language": "python",
380 | "name": "python3"
381 | },
382 | "language_info": {
383 | "codemirror_mode": {
384 | "name": "ipython",
385 | "version": 3
386 | },
387 | "file_extension": ".py",
388 | "mimetype": "text/x-python",
389 | "name": "python",
390 | "nbconvert_exporter": "python",
391 | "pygments_lexer": "ipython3",
392 | "version": "3.7.9"
393 | }
394 | },
395 | "nbformat": 4,
396 | "nbformat_minor": 1
397 | }
398 |
--------------------------------------------------------------------------------
/DeepDream.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "view-in-github"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "F4RBFfIWNbG0"
17 | },
18 | "source": [
19 | "## セットアップ\n",
20 | "ライブラリーの読み込み、クラスと関数の定義"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {
27 | "id": "qRScWg_VNqvj"
28 | },
29 | "outputs": [],
30 | "source": [
31 | "import tensorflow as tf\n",
32 | "import numpy as np\n",
33 | "import matplotlib as mpl\n",
34 | "import IPython.display as display\n",
35 | "import PIL.Image\n",
36 | "from tensorflow.keras.preprocessing import image\n",
37 | "\n",
38 | "# Input image\n",
39 | "def input(image, max_dim=None):\n",
40 | " img = PIL.Image.open(image)\n",
41 | " if max_dim:\n",
42 | " img.thumbnail((max_dim, max_dim))\n",
43 | " return np.array(img)\n",
44 | "\n",
45 | "# Normalize an image\n",
46 | "def deprocess(img):\n",
47 | " img = 255*(img + 1.0)/2.0\n",
48 | " return tf.cast(img, tf.uint8)\n",
49 | "\n",
50 | "# Display an image\n",
51 | "def show(img):\n",
52 | " display.display(PIL.Image.fromarray(np.array(img)))\n",
53 | "\n",
54 | "# Calc loss\n",
55 | "def calc_loss(img, model):\n",
56 | " img_batch = tf.expand_dims(img, axis=0)\n",
57 | " layer_activations = model(img_batch)\n",
58 | " if len(layer_activations) == 1:\n",
59 | " layer_activations = [layer_activations]\n",
60 | "\n",
61 | " losses = []\n",
62 | " for act in layer_activations:\n",
63 | " loss = tf.math.reduce_mean(act)\n",
64 | " losses.append(loss)\n",
65 | "\n",
66 | " return tf.reduce_sum(losses)\n",
67 | "\n",
68 | "# Class DeepDream\n",
69 | "class DeepDream(tf.Module):\n",
70 | " def __init__(self, model):\n",
71 | " self.model = model\n",
72 | "\n",
73 | " @tf.function(\n",
74 | " input_signature=(\n",
75 | " tf.TensorSpec(shape=[None,None,3], dtype=tf.float32),\n",
76 | " tf.TensorSpec(shape=[], dtype=tf.int32),\n",
77 | " tf.TensorSpec(shape=[], dtype=tf.float32),)\n",
78 | " )\n",
79 | " def __call__(self, img, steps, step_size):\n",
80 | " loss = tf.constant(0.0)\n",
81 | " for n in tf.range(steps):\n",
82 | " with tf.GradientTape() as tape:\n",
83 | " tape.watch(img)\n",
84 | " loss = calc_loss(img, self.model)\n",
85 | "\n",
86 | " gradients = tape.gradient(loss, img)\n",
87 | " gradients /= tf.math.reduce_std(gradients) + 1e-8 \n",
88 | " \n",
89 | " img = img + gradients*step_size\n",
90 | " img = tf.clip_by_value(img, -1, 1)\n",
91 | "\n",
92 | " return loss, img\n",
93 | "\n",
94 | "# run_simple\n",
95 | "def run_deep_dream_simple(img, steps=100, step_size=0.01):\n",
96 | " img = tf.keras.applications.inception_v3.preprocess_input(img)\n",
97 | " img = tf.convert_to_tensor(img)\n",
98 | " step_size = tf.convert_to_tensor(step_size)\n",
99 | " steps_remaining = steps\n",
100 | " step = 0\n",
101 | " while steps_remaining:\n",
102 | " if steps_remaining>100:\n",
103 | " run_steps = tf.constant(100)\n",
104 | " else:\n",
105 | " run_steps = tf.constant(steps_remaining)\n",
106 | " steps_remaining -= run_steps\n",
107 | " step += run_steps\n",
108 | "\n",
109 | " loss, img = deepdream(img, run_steps, tf.constant(step_size))\n",
110 | "\n",
111 | " result = deprocess(img) \n",
112 | " return result\n",
113 | "\n",
114 | "# run_octave\n",
115 | "def octave(original_img):\n",
116 | " OCTAVE_SCALE = 1.30\n",
117 | " img = tf.constant(np.array(original_img))\n",
118 | " base_shape = tf.shape(img)[:-1]\n",
119 | " float_base_shape = tf.cast(base_shape, tf.float32)\n",
120 | "\n",
121 | " for n in range(-2, 3):\n",
122 | " new_shape = tf.cast(float_base_shape*(OCTAVE_SCALE**n), tf.int32)\n",
123 | " img = tf.image.resize(img, new_shape).numpy()\n",
124 | " img = run_deep_dream_simple(img=img, steps=50, step_size=0.01)\n",
125 | " img = tf.image.resize(img, base_shape) \n",
126 | " img = tf.image.convert_image_dtype(img/255.0, dtype=tf.uint8)\n",
127 | " return img"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {
133 | "id": "f0WWczheOwDf"
134 | },
135 | "source": [
136 | "# サンプルデータのダウンロード"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {
143 | "id": "5VsZijq0M7kW"
144 | },
145 | "outputs": [],
146 | "source": [
147 | "!git clone https://github.com/cedro3/Sample.git"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {
153 | "id": "O2oFtzu-ETlo"
154 | },
155 | "source": [
156 | "## モデルの作成"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": null,
162 | "metadata": {
163 | "id": "VkHkYEqbDC7E"
164 | },
165 | "outputs": [],
166 | "source": [
167 | "# ベースモデル InceptionV3 のダウンロード\n",
168 | "base_model = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet')\n",
169 | "\n",
170 | "# Maximize the activations of these layers\n",
171 | "names = ['mixed3', 'mixed5']\n",
172 | "layers = [base_model.get_layer(name).output for name in names]\n",
173 | "\n",
174 | "# Create the feature extraction model\n",
175 | "dream_model = tf.keras.Model(inputs=base_model.input, outputs=layers)\n",
176 | "\n",
177 | "# make model\n",
178 | "deepdream = DeepDream(dream_model)"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {
184 | "id": "B-VFUjetXFi-"
185 | },
186 | "source": [
187 | "# octave バージョン"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {
194 | "id": "T39U0ZWSNDbi"
195 | },
196 | "outputs": [],
197 | "source": [
198 | "# 静止画をDeepDreamに変換(octave)\n",
199 | "original_img = input('./Sample/animal_pic/dog.png')\n",
200 | "img = octave(original_img)\n",
201 | "show(original_img)\n",
202 | "show(img)"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "metadata": {
209 | "id": "OfT1RmgEUH9d"
210 | },
211 | "outputs": [],
212 | "source": [
213 | "# ビデオを静止画に変換\n",
214 | "import os\n",
215 | "import shutil\n",
216 | "import cv2\n",
217 | "\n",
218 | "# 既にimagesフォルダーがあれば削除\n",
219 | "if os.path.isdir('images'):\n",
220 | " shutil.rmtree('images')\n",
221 | "\n",
222 | "os.makedirs('images', exist_ok=True)\n",
223 | " \n",
224 | "def video_2_images(video_file= './Sample/video/elephant.mp4', # ビデオの指定\n",
225 | " image_dir='./images/', \n",
226 | " image_file='%s.png'):\n",
227 | " \n",
228 | " # Initial setting\n",
229 | " i = 0\n",
230 | " interval = 6\n",
231 | " length = 300 # 最大フレーム数\n",
232 | " \n",
233 | " cap = cv2.VideoCapture(video_file)\n",
234 | " while(cap.isOpened()):\n",
235 | " flag, frame = cap.read() \n",
236 | " if flag == False: \n",
237 | " break\n",
238 | " if i == length*interval:\n",
239 | " break\n",
240 | " if i % interval == 0: \n",
241 | " cv2.imwrite(image_dir+image_file % str(int(i/interval)).zfill(6), frame)\n",
242 | " i += 1 \n",
243 | " cap.release() \n",
244 | " \n",
245 | "def main():\n",
246 | " video_2_images()\n",
247 | " \n",
248 | "if __name__ == '__main__':\n",
249 | " main()"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {
256 | "id": "pRTrM2SZyoSh"
257 | },
258 | "outputs": [],
259 | "source": [
260 | "# 静止画をDeepDream画像へ変換(octave)\n",
261 | "import glob\n",
262 | "from tqdm import tqdm\n",
263 | "\n",
264 | "files=[]\n",
265 | "for name in sorted(glob.glob('./images/*.png')):\n",
266 | " files.append(name)\n",
267 | "\n",
268 | "for file in tqdm(files):\n",
269 | " original_img=input(file)\n",
270 | " dream_img = octave(original_img)\n",
271 | " PIL.Image.fromarray(np.array(dream_img)).save(file) "
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {
278 | "id": "x8jrxvrgcJ2Q"
279 | },
280 | "outputs": [],
281 | "source": [
282 | "# DeepDream画像をmp4に変換\n",
283 | "!ffmpeg -r 6 -i images/%06d.png -vcodec libx264 -pix_fmt yuv420p output.mp4"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": null,
289 | "metadata": {
290 | "id": "aVLTmQL7cJ_8"
291 | },
292 | "outputs": [],
293 | "source": [
294 | "# mp4動画の再生\n",
295 | "from IPython.display import HTML\n",
296 | "from base64 import b64encode\n",
297 | "\n",
298 | "mp4 = open('./output.mp4', 'rb').read()\n",
299 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n",
300 | "HTML(f\"\"\"\n",
301 | "\"\"\")"
304 | ]
305 | },
306 | {
307 | "cell_type": "markdown",
308 | "metadata": {
309 | "id": "pQ5t215rUPlS"
310 | },
311 | "source": [
312 | "# simple バージョン"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": null,
318 | "metadata": {
319 | "id": "_xWdtQ_IR7CR"
320 | },
321 | "outputs": [],
322 | "source": [
323 | "# 静止画をDeepDreamに変換(simple)\n",
324 | "original_img = input('./Sample/animal_pic/dog.png')\n",
325 | "img = run_deep_dream_simple(original_img)\n",
326 | "show(original_img)\n",
327 | "show(img)"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": null,
333 | "metadata": {
334 | "id": "bG_RI44NTLhE"
335 | },
336 | "outputs": [],
337 | "source": [
338 | "# ビデオを静止画に変換\n",
339 | "import os\n",
340 | "import shutil\n",
341 | "import cv2\n",
342 | "\n",
343 | "# 既にimagesフォルダーがあれば削除\n",
344 | "if os.path.isdir('images'):\n",
345 | " shutil.rmtree('images')\n",
346 | "\n",
347 | "os.makedirs('images', exist_ok=True)\n",
348 | " \n",
349 | "def video_2_images(video_file= './Sample/video/elephant.mp4', # ビデオの指定\n",
350 | " image_dir='./images/', \n",
351 | " image_file='%s.png'):\n",
352 | " \n",
353 | " # Initial setting\n",
354 | " i = 0\n",
355 | " interval = 6\n",
356 | " length = 300 # 最大フレーム数\n",
357 | " \n",
358 | " cap = cv2.VideoCapture(video_file)\n",
359 | " while(cap.isOpened()):\n",
360 | " flag, frame = cap.read() \n",
361 | " if flag == False: \n",
362 | " break\n",
363 | " if i == length*interval:\n",
364 | " break\n",
365 | " if i % interval == 0: \n",
366 | " cv2.imwrite(image_dir+image_file % str(int(i/interval)).zfill(6), frame)\n",
367 | " i += 1 \n",
368 | " cap.release() \n",
369 | " \n",
370 | "def main():\n",
371 | " video_2_images()\n",
372 | " \n",
373 | "if __name__ == '__main__':\n",
374 | " main()"
375 | ]
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": null,
380 | "metadata": {
381 | "id": "gcyGJkVFtDsK"
382 | },
383 | "outputs": [],
384 | "source": [
385 | "# 静止画をDeepDream画像へ変換(simple)\n",
386 | "import glob\n",
387 | "from tqdm import tqdm\n",
388 | "\n",
389 | "files=[]\n",
390 | "for name in sorted(glob.glob('./images/*.png')):\n",
391 | " files.append(name)\n",
392 | "\n",
393 | "for file in tqdm(files):\n",
394 | " original_img=input(file)\n",
395 | " dream_img = run_deep_dream_simple(img=original_img, steps=100, step_size=0.01)\n",
396 | " PIL.Image.fromarray(np.array(dream_img)).save(file) "
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": null,
402 | "metadata": {
403 | "id": "macVyBdETipx"
404 | },
405 | "outputs": [],
406 | "source": [
407 | "# DeepDream画像をmp4に変換\n",
408 | "!ffmpeg -r 6 -i images/%06d.png -vcodec libx264 -pix_fmt yuv420p output2.mp4"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": null,
414 | "metadata": {
415 | "id": "m8jIlhi9Tt-H"
416 | },
417 | "outputs": [],
418 | "source": [
419 | "# mp4動画の再生\n",
420 | "from IPython.display import HTML\n",
421 | "from base64 import b64encode\n",
422 | "\n",
423 | "mp4 = open('./output2.mp4', 'rb').read()\n",
424 | "data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()\n",
425 | "HTML(f\"\"\"\n",
426 | "\"\"\")"
429 | ]
430 | }
431 | ],
432 | "metadata": {
433 | "accelerator": "GPU",
434 | "colab": {
435 | "collapsed_sections": [],
436 | "include_colab_link": true,
437 | "name": "DeepDream",
438 | "provenance": []
439 | },
440 | "kernelspec": {
441 | "display_name": "Python 3",
442 | "language": "python",
443 | "name": "python3"
444 | },
445 | "language_info": {
446 | "codemirror_mode": {
447 | "name": "ipython",
448 | "version": 3
449 | },
450 | "file_extension": ".py",
451 | "mimetype": "text/x-python",
452 | "name": "python",
453 | "nbconvert_exporter": "python",
454 | "pygments_lexer": "ipython3",
455 | "version": "3.7.9"
456 | }
457 | },
458 | "nbformat": 4,
459 | "nbformat_minor": 1
460 | }
461 |
--------------------------------------------------------------------------------
/infinite_nature_demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "infinite_nature_demo",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "display_name": "Python 3",
13 | "name": "python3"
14 | },
15 | "accelerator": "GPU"
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "id": "EvBQf1UZNu0j"
32 | },
33 | "source": [
34 | "# セットアップ"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "metadata": {
40 | "id": "hypYi7EJNxJ6",
41 | "cellView": "form"
42 | },
43 | "source": [
44 | "#@title download model & install library\n",
45 | "%%shell\n",
46 | "echo Fetching code from github...\n",
47 | "\n",
48 | "apt install subversion\n",
49 | "svn export --force https://github.com/google-research/google-research/trunk/infinite_nature\n",
50 | "\n",
51 | "echo\n",
52 | "echo Fetching trained model weights...\n",
53 | "rm -f autocruise_input*.pkl\n",
54 | "rm -f ckpt.tar.gz\n",
55 | "rm -rf ckpt\n",
56 | "wget https://storage.googleapis.com/gresearch/infinite_nature_public/autocruise_input1.pkl\n",
57 | "wget https://storage.googleapis.com/gresearch/infinite_nature_public/autocruise_input2.pkl\n",
58 | "wget https://storage.googleapis.com/gresearch/infinite_nature_public/autocruise_input3.pkl\n",
59 | "wget https://storage.googleapis.com/gresearch/infinite_nature_public/ckpt.tar.gz\n",
60 | "tar -xf ckpt.tar.gz\n",
61 | "\n",
62 | "echo\n",
63 | "echo Installing required dependencies...\n",
64 | "pip install -r infinite_nature/requirements.txt\n",
65 | "\n",
66 | "echo\n",
67 | "echo Fetching tf_mesh_renderer and compiling kernels...\n",
68 | "cd infinite_nature\n",
69 | "rm -rf tf_mesh_renderer\n",
70 | "source download_tf_mesh_renderer.sh\n",
71 | "\n",
72 | "echo Done.\n"
73 | ],
74 | "execution_count": null,
75 | "outputs": []
76 | },
77 | {
78 | "cell_type": "code",
79 | "metadata": {
80 | "id": "08MXs7cBPDwO",
81 | "cellView": "form"
82 | },
83 | "source": [
84 | "#@title setting path\n",
85 | "import tensorflow as tf\n",
86 | "import os\n",
87 | "import sys\n",
88 | "\n",
89 | "# Make sure dynamic linking can find tensorflow libraries.\n",
90 | "os.system('ldconfig ' + tf.sysconfig.get_lib())\n",
91 | "\n",
92 | "# Make sure python can find our libraries.\n",
93 | "sys.path.append('infinite_nature')\n",
94 | "sys.path.append('infinite_nature/tf_mesh_renderer/mesh_renderer')\n",
95 | "\n",
96 | "# Make sure the mesh renderer library knows where to load its .so file from.\n",
97 | "os.environ['TEST_SRCDIR'] = 'infinite_nature'"
98 | ],
99 | "execution_count": null,
100 | "outputs": []
101 | },
102 | {
103 | "cell_type": "code",
104 | "metadata": {
105 | "id": "nvJVkxMbGy6D",
106 | "cellView": "form"
107 | },
108 | "source": [
109 | "#@title build model & difine function\n",
110 | "import imageio\n",
111 | "import IPython\n",
112 | "import numpy as np\n",
113 | "import pickle\n",
114 | "import tensorflow as tf\n",
115 | "import tensorflow_hub as hub\n",
116 | "\n",
117 | "import config\n",
118 | "import fly_camera\n",
119 | "import infinite_nature_lib\n",
120 | "from PIL import Image ###\n",
121 | "\n",
122 | "# Build model and restore checkpoint.\n",
123 | "config.set_training(False)\n",
124 | "model_path = \"ckpt/model.ckpt-6935893\"\n",
125 | "render_refine, style_encoding = infinite_nature_lib.load_model(model_path)\n",
126 | "initial_rgbds = [\n",
127 | " pickle.load(open(\"autocruise_input1.pkl\", \"rb\"))['input_rgbd'],\n",
128 | " pickle.load(open(\"autocruise_input2.pkl\", \"rb\"))['input_rgbd'],\n",
129 | " pickle.load(open(\"autocruise_input3.pkl\", \"rb\"))['input_rgbd']]\n",
130 | "\n",
131 | "# Code for an autopilot demo. We expose two functions that will be invoked\n",
132 | "# from an HTML/JS frontend: reset and step.\n",
133 | "\n",
134 | "# The state that we need to remember while flying:\n",
135 | "state = {\n",
136 | " 'intrinsics': None,\n",
137 | " 'pose': None,\n",
138 | " 'rgbd': None,\n",
139 | " 'start_rgbd': None,\n",
140 | " 'style_noise': None,\n",
141 | " 'next_pose_function': None,\n",
142 | " 'direction_offset': None, # Direction controlled by user's mouse clicks.\n",
143 | "}\n",
144 | "\n",
145 | "def current_image_as_png():\n",
146 | " imgdata = tf.image.encode_png(\n",
147 | " tf.image.convert_image_dtype(state['rgbd'][..., :3], dtype=tf.uint8))\n",
148 | " \n",
149 | " img = IPython.display.Image(data=imgdata.numpy()) \n",
150 | " global cnt\n",
151 | " with open('pic/'+str(cnt).zfill(6)+'.png', 'wb') as png:\n",
152 | " png.write(img.data)\n",
153 | " print('\\r{0}'.format(cnt), end='')\n",
154 | " cnt += 1\n",
155 | "\n",
156 | " return IPython.display.Image(data=imgdata.numpy())\n",
157 | "\n",
158 | "def reset(rgbd=None):\n",
159 | " if rgbd is None:\n",
160 | " rgbd = state['start_rgbd']\n",
161 | "\n",
162 | " height, width, _ = rgbd.shape\n",
163 | " aspect_ratio = width / float(height)\n",
164 | "\n",
165 | " rgbd = tf.image.resize(rgbd, [160, 256])\n",
166 | " state['rgbd'] = rgbd\n",
167 | " state['start_rgbd'] = rgbd\n",
168 | " state['pose'] = np.array(\n",
169 | " [[1.0, 0.0, 0.0, 0.0],\n",
170 | " [0.0, 1.0, 0.0, 0.0],\n",
171 | " [0.0, 0.0, 1.0, 0.0]],\n",
172 | " dtype=np.float32)\n",
173 | " # 0.8 focal_x corresponds to a FOV of ~64 degrees.\n",
174 | " state['intrinsics'] = np.array(\n",
175 | " [0.8, 0.8 * aspect_ratio, .5, .5],\n",
176 | " dtype=np.float32)\n",
177 | " state['direction_offset'] = (0.0, 0.0)\n",
178 | " state['style_noise'] = style_encoding(rgbd)\n",
179 | " state['next_pose_function'] = fly_camera.fly_dynamic(\n",
180 | " state['intrinsics'],\n",
181 | " state['pose'],\n",
182 | " turn_function=(lambda _: state['direction_offset']))\n",
183 | " return current_image_as_png()\n",
184 | "\n",
185 | "\n",
186 | "def step(offsetx, offsety):\n",
187 | " state['direction_offset'] = (offsetx, offsety)\n",
188 | " next_pose = state['next_pose_function'](state['rgbd'])\n",
189 | " next_rgbd = render_refine(\n",
190 | " state['rgbd'], state['style_noise'],\n",
191 | " state['pose'], state['intrinsics'],\n",
192 | " next_pose, state['intrinsics'])\n",
193 | " state['pose'] = next_pose\n",
194 | " state['rgbd'] = next_rgbd\n",
195 | " return current_image_as_png()\n",
196 | "\n",
197 | "\n",
198 | "# To run on user-supplied images, we use MiDaS V2 to obtain initial disparity.\n",
199 | "midas_model = hub.load('https://tfhub.dev/intel/midas/v2/2', tags=['serve'])\n",
200 | "\n",
201 | "def midas_disparity(rgb):\n",
202 | " \"\"\"Computes MiDaS v2 disparity on an RGB input image.\n",
203 | "\n",
204 | " Args:\n",
205 | " rgb: [H, W, 3] Range [0.0, 1.0].\n",
206 | " Returns:\n",
207 | " [H, W, 1] MiDaS disparity resized to the input size and in the range\n",
208 | " [0.0, 1.0]\n",
209 | " \"\"\"\n",
210 | " size = rgb.shape[:2]\n",
211 | " resized = tf.image.resize(rgb, [384, 384], tf.image.ResizeMethod.BICUBIC) #384, 384\n",
212 | " # MiDaS networks wants [1, C, H, W]\n",
213 | " midas_input = tf.transpose(resized, [2, 0, 1])[tf.newaxis]\n",
214 | " prediction = midas_model.signatures['serving_default'](midas_input)['default'][0]\n",
215 | " disp_min = tf.reduce_min(prediction)\n",
216 | " disp_max = tf.reduce_max(prediction)\n",
217 | " prediction = (prediction - disp_min) / (disp_max - disp_min)\n",
218 | " return tf.image.resize(\n",
219 | " prediction[..., tf.newaxis], size, method=tf.image.ResizeMethod.AREA)\n",
220 | "\n",
221 | "\n",
222 | "def load_initial(i):\n",
223 | " return reset(rgbd=initial_rgbds[i])\n",
224 | "\n",
225 | "\n",
226 | "def load_image(data):\n",
227 | " # Data converted from JS ends up as a string, needs to be converted to\n",
228 | " # bytes using Latin-1 encoding (which just maps 0-255 to 0-255).\n",
229 | " data = data.encode('Latin-1')\n",
230 | " rgb = tf.image.decode_image(data, channels=3, dtype=tf.float32)\n",
231 | " resized = tf.image.resize(rgb, [160, 256], tf.image.ResizeMethod.AREA)\n",
232 | " rgbd = tf.concat([resized, midas_disparity(resized)], axis=-1)\n",
233 | " return reset(rgbd=rgbd)\n"
234 | ],
235 | "execution_count": null,
236 | "outputs": []
237 | },
238 | {
239 | "cell_type": "code",
240 | "metadata": {
241 | "id": "sCuRX1liUEVM",
242 | "cellView": "form"
243 | },
244 | "source": [
245 | "#@title setting html\n",
246 | "import IPython\n",
247 | "from google.colab import output\n",
248 | "\n",
249 | "# The front-end for our interactive demo.\n",
250 | "\n",
251 | "html='''\n",
252 | "\n",
307 | "