├── .gitignore
├── networks
├── .gitattributes
├── stylegan2ada-density1000.pkl
└── stylegan2-stratigraphy6000.pkl
├── stylegan2-ada
├── docs
│ ├── stylegan2-ada-teaser-1024x252.png
│ ├── stylegan2-ada-teaser-600x400.png
│ ├── stylegan2-ada-training-curves.png
│ ├── train-help.txt
│ └── license.html
├── metrics
│ ├── __init__.py
│ ├── inception_score.py
│ ├── metric_defaults.py
│ ├── frechet_inception_distance.py
│ ├── kernel_inception_distance.py
│ ├── metric_base.py
│ ├── perceptual_path_length.py
│ ├── linear_separability.py
│ └── precision_recall.py
├── training
│ ├── __init__.py
│ ├── dataset.py
│ ├── training_loop.py
│ └── loss.py
├── dnnlib
│ ├── tflib
│ │ ├── ops
│ │ │ ├── __init__.py
│ │ │ ├── fused_bias_act.cu
│ │ │ └── fused_bias_act.py
│ │ ├── __init__.py
│ │ ├── custom_ops.py
│ │ ├── autosummary.py
│ │ └── tfutil.py
│ ├── __init__.py
│ └── util.py
├── Dockerfile
├── LICENSE.txt
├── style_mixing.py
├── generate.py
├── calc_metrics.py
└── projector.py
├── README.md
└── generate_samples_example.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 |
--------------------------------------------------------------------------------
/networks/.gitattributes:
--------------------------------------------------------------------------------
1 | *.pkl filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/stylegan2-ada/docs/stylegan2-ada-teaser-1024x252.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vpuzyrev/geolgan/HEAD/stylegan2-ada/docs/stylegan2-ada-teaser-1024x252.png
--------------------------------------------------------------------------------
/stylegan2-ada/docs/stylegan2-ada-teaser-600x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vpuzyrev/geolgan/HEAD/stylegan2-ada/docs/stylegan2-ada-teaser-600x400.png
--------------------------------------------------------------------------------
/stylegan2-ada/docs/stylegan2-ada-training-curves.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vpuzyrev/geolgan/HEAD/stylegan2-ada/docs/stylegan2-ada-training-curves.png
--------------------------------------------------------------------------------
/networks/stylegan2ada-density1000.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2f6cafdca734e58da1e0f09cd39419f22c7f584242f0bcaeaa7c11afd1c0e544
3 | size 351309918
4 |
--------------------------------------------------------------------------------
/networks/stylegan2-stratigraphy6000.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:acb408f1c3fd8dca8ae1ff83c2953fe94c025719cc5d2fe126b5723c3c09c3eb
3 | size 300815087
4 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | # empty
10 |
--------------------------------------------------------------------------------
/stylegan2-ada/training/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | # empty
10 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/tflib/ops/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | # empty
10 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | from .util import EasyDict, make_cache_dir_path
10 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/tflib/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | from . import autosummary
10 | from . import network
11 | from . import optimizer
12 | from . import tfutil
13 | from . import custom_ops
14 |
15 | from .tfutil import *
16 | from .network import Network
17 |
18 | from .optimizer import Optimizer
19 |
20 | from .custom_ops import get_plugin
21 |
--------------------------------------------------------------------------------
/stylegan2-ada/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | ARG BASE_IMAGE=nvcr.io/nvidia/tensorflow:20.10-tf1-py3
10 | FROM $BASE_IMAGE
11 |
12 | RUN pip install scipy==1.3.3
13 | RUN pip install requests==2.22.0
14 | RUN pip install Pillow==6.2.1
15 | RUN pip install h5py==2.9.0
16 | RUN pip install imageio==2.9.0
17 | RUN pip install imageio-ffmpeg==0.4.2
18 | RUN pip install tqdm==4.49.0
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Geophysical model generation with GANs
2 |
3 | Deep neural networks are widely used for estimation of subsurface parameters from geophysical measurements. To be efficient, they require large training datasets consisting of hundreds of thousands to millions of different models. This is not often possible with real data or manually generated models. A recent approach to artificial data generation based on generative adversarial networks (GANs) can be used to address this problem.
4 |
5 | We present a generator of 2D subsurface models based on StyleGAN2 (Karras, Laine, et al., 2020) and StyleGAN2 with adaptive discriminator augmentation (ADA, Karras, Aittala, et al., 2020) and apply it to the generation of synthetic density and stratigraphy models. As a training set, we use a representative set of subsurface models generated using Badlands modelling code (Salles et al., 2018). Once our GANs are trained and reach a sufficient degree of accuracy, they can be used to generate in real-time sufficiently detailed and varied artificial geological models. This allows creating multiple synthetic density and stratigraphy models in a cost-effective manner. A similar approach can be used to create subsurface models with different physical properties such as velocity models. The proposed method can serve as a useful augmentation tool for various deep learning codes, thus facilitating the development of more advanced tools for real-time estimation of subsurface parameters from collected data.
6 |
7 |
8 | ## Requirements
9 |
10 | Re-training the networks will require the following (similar to https://github.com/NVlabs/stylegan2/):
11 |
12 | * Linux and Windows are supported, but we recommend Linux for performance and compatibility reasons.
13 | * 64-bit Python 3.6 or 3.7. We recommend Anaconda3 with numpy 1.14.3 or newer.
14 | * We recommend TensorFlow 1.14, which we used for all experiments in the paper, but TensorFlow 1.15 is also supported on Linux. TensorFlow 2.x is not supported.
15 | * On Windows you need to use TensorFlow 1.14, as the standard 1.15 installation does not include necessary C++ headers.
16 | * 1–8 high-end NVIDIA GPUs with at least 12 GB of GPU memory, NVIDIA drivers, CUDA 10.0 toolkit and cuDNN 7.5.
17 |
18 | ## Getting started
19 |
20 | Pre-trained networks are stored as `*.pkl` files in 'networks' folder
21 |
22 | Running StyleGAN2 generator for stratigraphic models:
23 |
24 | ```.bash
25 | python stylegan2/run_generator.py generate-images --network=networks/stylegan2-stratigraphy6000.pkl \
26 | --seeds=0-100 --truncation-psi=0.50 --result-dir=OUTPUT_DIR
27 | ```
28 |
29 | Running StyleGAN2 ADA generator for density models (change ADA_DIR to your StyleGAN2 ADA path):
30 |
31 | ```.bash
32 | python ADA_DIR/run_generator.py --network=networks/stylegan2ada-density1000.pkl --seeds=0-100 \
33 | --truncation-psi=0.50 --outdir=OUTPUT_DIR
34 | ```
35 |
--------------------------------------------------------------------------------
/generate_samples_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import tensorflow as tf\n",
11 | "import tensorflow.keras\n",
12 | "import PIL\n",
13 | "from tensorflow.keras import optimizers\n",
14 | "import multiprocessing as mp"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "# Device and version checks\n",
24 | "\n",
25 | "#!printenv\n",
26 | "\n",
27 | "#from tensorflow.python.client import device_lib\n",
28 | "#device_lib.list_local_devices()\n",
29 | "\n",
30 | "#print(tf.__version__)\n",
31 | "#tf.test.is_gpu_available()\n",
32 | "\n",
33 | "#print('PIL',PIL.__version__)\n",
34 | "\n",
35 | "#print(f'--> Number of CPUs: {mp.cpu_count()}')\n",
36 | "#print(f'--> Number of GPUs: {len(tf.config.experimental.list_physical_devices(\"GPU\"))}')\n",
37 | "\n",
38 | "#!nvcc stylegan2/test_nvcc.cu -o stylegan2/test_nvcc -run"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "# Set this to the number of GPUs on your system\n",
48 | "!CUDA_AVAILABLE_DEVICES=1\n",
49 | "\n",
50 | "!python3 stylegan2/run_generator.py generate-images --network=networks/stylegan2-stratigraphy6000.pkl --seeds=0-100 --truncation-psi=0.5 --result-dir=results/"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": []
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "# Checking metrics (see https://github.com/NVlabs/stylegan2#evaluation-metrics)\n",
67 | "!CUDA_AVAILABLE_DEVICES=1\n",
68 | "\n",
69 | "!python3 stylegan2/run_metrics.py --data-dir=datasets --network=networks/stylegan2-stratigraphy6000.pkl --metrics=fid50k --dataset=data5000_strati"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "metadata": {},
76 | "outputs": [],
77 | "source": []
78 | }
79 | ],
80 | "metadata": {
81 | "kernelspec": {
82 | "display_name": "Python 3",
83 | "language": "python",
84 | "name": "python3"
85 | },
86 | "language_info": {
87 | "codemirror_mode": {
88 | "name": "ipython",
89 | "version": 3
90 | },
91 | "file_extension": ".py",
92 | "mimetype": "text/x-python",
93 | "name": "python",
94 | "nbconvert_exporter": "python",
95 | "pygments_lexer": "ipython3",
96 | "version": "3.6.9"
97 | }
98 | },
99 | "nbformat": 4,
100 | "nbformat_minor": 2
101 | }
102 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/inception_score.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Inception Score (IS) from the paper
10 | "Improved techniques for training GANs"."""
11 |
12 | import pickle
13 | import numpy as np
14 | import tensorflow as tf
15 | import dnnlib
16 | import dnnlib.tflib as tflib
17 |
18 | from metrics import metric_base
19 |
20 | #----------------------------------------------------------------------------
21 |
22 | class IS(metric_base.MetricBase):
23 | def __init__(self, num_images, num_splits, minibatch_per_gpu, **kwargs):
24 | super().__init__(**kwargs)
25 | self.num_images = num_images
26 | self.num_splits = num_splits
27 | self.minibatch_per_gpu = minibatch_per_gpu
28 |
29 | def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ
30 | minibatch_size = num_gpus * self.minibatch_per_gpu
31 | with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_softmax.pkl') as f:
32 | inception = pickle.load(f)
33 | activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32)
34 |
35 | # Construct TensorFlow graph.
36 | result_expr = []
37 | for gpu_idx in range(num_gpus):
38 | with tf.device(f'/gpu:{gpu_idx}'):
39 | Gs_clone = Gs.clone()
40 | inception_clone = inception.clone()
41 | latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
42 | labels = self._get_random_labels_tf(self.minibatch_per_gpu)
43 | images = Gs_clone.get_output_for(latents, labels, **G_kwargs)
44 | if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1])
45 | images = tflib.convert_images_to_uint8(images)
46 | result_expr.append(inception_clone.get_output_for(images))
47 |
48 | # Calculate activations for fakes.
49 | for begin in range(0, self.num_images, minibatch_size):
50 | self._report_progress(begin, self.num_images)
51 | end = min(begin + minibatch_size, self.num_images)
52 | activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin]
53 |
54 | # Calculate IS.
55 | scores = []
56 | for i in range(self.num_splits):
57 | part = activations[i * self.num_images // self.num_splits : (i + 1) * self.num_images // self.num_splits]
58 | kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
59 | kl = np.mean(np.sum(kl, 1))
60 | scores.append(np.exp(kl))
61 | self._report_result(np.mean(scores), suffix='_mean')
62 | self._report_result(np.std(scores), suffix='_std')
63 |
64 | #----------------------------------------------------------------------------
65 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/metric_defaults.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Default metric definitions."""
10 |
11 | from dnnlib import EasyDict
12 |
13 | #----------------------------------------------------------------------------
14 |
15 | metric_defaults = EasyDict([(args.name, args) for args in [
16 | # ADA paper.
17 | EasyDict(name='fid50k_full', class_name='metrics.frechet_inception_distance.FID', max_reals=None, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)),
18 | EasyDict(name='kid50k_full', class_name='metrics.kernel_inception_distance.KID', max_reals=1000000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)),
19 | EasyDict(name='pr50k3_full', class_name='metrics.precision_recall.PR', max_reals=200000, num_fakes=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)),
20 | EasyDict(name='is50k', class_name='metrics.inception_score.IS', num_images=50000, num_splits=10, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)),
21 |
22 | # Legacy: StyleGAN2.
23 | EasyDict(name='fid50k', class_name='metrics.frechet_inception_distance.FID', max_reals=50000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)),
24 | EasyDict(name='kid50k', class_name='metrics.kernel_inception_distance.KID', max_reals=50000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)),
25 | EasyDict(name='pr50k3', class_name='metrics.precision_recall.PR', max_reals=50000, num_fakes=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000, force_dataset_args=dict(shuffle=False, max_images=None)),
26 | EasyDict(name='ppl2_wend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)),
27 |
28 | # Legacy: StyleGAN.
29 | EasyDict(name='ppl_zfull', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)),
30 | EasyDict(name='ppl_wfull', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)),
31 | EasyDict(name='ppl_zend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)),
32 | EasyDict(name='ppl_wend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)),
33 | EasyDict(name='ls', class_name='metrics.linear_separability.LS', num_samples=200000, num_keep=100000, attrib_indices=range(40), minibatch_per_gpu=4, force_dataset_args=dict(shuffle=False, max_images=None)),
34 | ]])
35 |
36 | #----------------------------------------------------------------------------
37 |
--------------------------------------------------------------------------------
/stylegan2-ada/docs/train-help.txt:
--------------------------------------------------------------------------------
1 | usage: train.py [-h] --outdir DIR [--gpus INT] [--snap INT] [--seed INT] [-n]
2 | --data PATH [--res INT] [--mirror BOOL] [--metrics LIST]
3 | [--metricdata PATH]
4 | [--cfg {auto,stylegan2,paper256,paper512,paper1024,cifar,cifarbaseline}]
5 | [--gamma FLOAT] [--kimg INT] [--aug {noaug,ada,fixed,adarv}]
6 | [--p FLOAT] [--target TARGET]
7 | [--augpipe {blit,geom,color,filter,noise,cutout,bg,bgc,bgcf,bgcfn,bgcfnc}]
8 | [--cmethod {nocmethod,bcr,zcr,pagan,wgangp,auxrot,spectralnorm,shallowmap,adropout}]
9 | [--dcap FLOAT] [--resume RESUME] [--freezed INT]
10 |
11 | Train a GAN using the techniques described in the paper
12 | "Training Generative Adversarial Networks with Limited Data".
13 |
14 | optional arguments:
15 | -h, --help show this help message and exit
16 |
17 | general options:
18 | --outdir DIR Where to save the results (required)
19 | --gpus INT Number of GPUs to use (default: 1 gpu)
20 | --snap INT Snapshot interval (default: 50 ticks)
21 | --seed INT Random seed (default: 1000)
22 | -n, --dry-run Print training options and exit
23 |
24 | training dataset:
25 | --data PATH Training dataset path (required)
26 | --res INT Dataset resolution (default: highest available)
27 | --mirror BOOL Augment dataset with x-flips (default: false)
28 |
29 | metrics:
30 | --metrics LIST Comma-separated list or "none" (default: fid50k_full)
31 | --metricdata PATH Dataset to evaluate metrics against (optional)
32 |
33 | base config:
34 | --cfg {auto,stylegan2,paper256,paper512,paper1024,cifar,cifarbaseline}
35 | Base config (default: auto)
36 | --gamma FLOAT Override R1 gamma
37 | --kimg INT Override training duration
38 |
39 | discriminator augmentation:
40 | --aug {noaug,ada,fixed,adarv}
41 | Augmentation mode (default: ada)
42 | --p FLOAT Specify augmentation probability for --aug=fixed
43 | --target TARGET Override ADA target for --aug=ada and --aug=adarv
44 | --augpipe {blit,geom,color,filter,noise,cutout,bg,bgc,bgcf,bgcfn,bgcfnc}
45 | Augmentation pipeline (default: bgc)
46 |
47 | comparison methods:
48 | --cmethod {nocmethod,bcr,zcr,pagan,wgangp,auxrot,spectralnorm,shallowmap,adropout}
49 | Comparison method (default: nocmethod)
50 | --dcap FLOAT Multiplier for discriminator capacity
51 |
52 | transfer learning:
53 | --resume RESUME Resume from network pickle (default: noresume)
54 | --freezed INT Freeze-D (default: 0 discriminator layers)
55 |
56 | examples:
57 |
58 | # Train custom dataset using 1 GPU.
59 | python train.py --outdir=~/training-runs --gpus=1 --data=~/datasets/custom
60 |
61 | # Train class-conditional CIFAR-10 using 2 GPUs.
62 | python train.py --outdir=~/training-runs --gpus=2 --data=~/datasets/cifar10c \
63 | --cfg=cifar
64 |
65 | # Transfer learn MetFaces from FFHQ using 4 GPUs.
66 | python train.py --outdir=~/training-runs --gpus=4 --data=~/datasets/metfaces \
67 | --cfg=paper1024 --mirror=1 --resume=ffhq1024 --snap=10
68 |
69 | # Reproduce original StyleGAN2 config F.
70 | python train.py --outdir=~/training-runs --gpus=8 --data=~/datasets/ffhq \
71 | --cfg=stylegan2 --res=1024 --mirror=1 --aug=noaug
72 |
73 | available base configs (--cfg):
74 | auto Automatically select reasonable defaults based on resolution
75 | and GPU count. Good starting point for new datasets.
76 | stylegan2 Reproduce results for StyleGAN2 config F at 1024x1024.
77 | paper256 Reproduce results for FFHQ and LSUN Cat at 256x256.
78 | paper512 Reproduce results for BreCaHAD and AFHQ at 512x512.
79 | paper1024 Reproduce results for MetFaces at 1024x1024.
80 | cifar Reproduce results for CIFAR-10 (tuned configuration).
81 | cifarbaseline Reproduce results for CIFAR-10 (baseline configuration).
82 |
83 | transfer learning source networks (--resume):
84 | ffhq256 FFHQ trained at 256x256 resolution.
85 | ffhq512 FFHQ trained at 512x512 resolution.
86 | ffhq1024 FFHQ trained at 1024x1024 resolution.
87 | celebahq256 CelebA-HQ trained at 256x256 resolution.
88 | lsundog256 LSUN Dog trained at 256x256 resolution.
89 | Custom network pickle.
90 |
--------------------------------------------------------------------------------
/stylegan2-ada/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
2 |
3 |
4 | NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator Augmentation (ADA)
5 |
6 |
7 | =======================================================================
8 |
9 | 1. Definitions
10 |
11 | "Licensor" means any person or entity that distributes its Work.
12 |
13 | "Software" means the original work of authorship made available under
14 | this License.
15 |
16 | "Work" means the Software and any additions to or derivative works of
17 | the Software that are made available under this License.
18 |
19 | The terms "reproduce," "reproduction," "derivative works," and
20 | "distribution" have the meaning as provided under U.S. copyright law;
21 | provided, however, that for the purposes of this License, derivative
22 | works shall not include works that remain separable from, or merely
23 | link (or bind by name) to the interfaces of, the Work.
24 |
25 | Works, including the Software, are "made available" under this License
26 | by including in or with the Work either (a) a copyright notice
27 | referencing the applicability of this License to the Work, or (b) a
28 | copy of this License.
29 |
30 | 2. License Grants
31 |
32 | 2.1 Copyright Grant. Subject to the terms and conditions of this
33 | License, each Licensor grants to you a perpetual, worldwide,
34 | non-exclusive, royalty-free, copyright license to reproduce,
35 | prepare derivative works of, publicly display, publicly perform,
36 | sublicense and distribute its Work and any resulting derivative
37 | works in any form.
38 |
39 | 3. Limitations
40 |
41 | 3.1 Redistribution. You may reproduce or distribute the Work only
42 | if (a) you do so under this License, (b) you include a complete
43 | copy of this License with your distribution, and (c) you retain
44 | without modification any copyright, patent, trademark, or
45 | attribution notices that are present in the Work.
46 |
47 | 3.2 Derivative Works. You may specify that additional or different
48 | terms apply to the use, reproduction, and distribution of your
49 | derivative works of the Work ("Your Terms") only if (a) Your Terms
50 | provide that the use limitation in Section 3.3 applies to your
51 | derivative works, and (b) you identify the specific derivative
52 | works that are subject to Your Terms. Notwithstanding Your Terms,
53 | this License (including the redistribution requirements in Section
54 | 3.1) will continue to apply to the Work itself.
55 |
56 | 3.3 Use Limitation. The Work and any derivative works thereof only
57 | may be used or intended for use non-commercially. Notwithstanding
58 | the foregoing, NVIDIA and its affiliates may use the Work and any
59 | derivative works commercially. As used herein, "non-commercially"
60 | means for research or evaluation purposes only.
61 |
62 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim
63 | against any Licensor (including any claim, cross-claim or
64 | counterclaim in a lawsuit) to enforce any patents that you allege
65 | are infringed by any Work, then your rights under this License from
66 | such Licensor (including the grant in Section 2.1) will terminate
67 | immediately.
68 |
69 | 3.5 Trademarks. This License does not grant any rights to use any
70 | Licensor’s or its affiliates’ names, logos, or trademarks, except
71 | as necessary to reproduce the notices described in this License.
72 |
73 | 3.6 Termination. If you violate any term of this License, then your
74 | rights under this License (including the grant in Section 2.1) will
75 | terminate immediately.
76 |
77 | 4. Disclaimer of Warranty.
78 |
79 | THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
80 | KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
81 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
82 | NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
83 | THIS LICENSE.
84 |
85 | 5. Limitation of Liability.
86 |
87 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
88 | THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
89 | SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
90 | INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
91 | OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
92 | (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
93 | LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
94 | COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
95 | THE POSSIBILITY OF SUCH DAMAGES.
96 |
97 | =======================================================================
98 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/frechet_inception_distance.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Frechet Inception Distance (FID) from the paper
10 | "GANs trained by a two time-scale update rule converge to a local Nash equilibrium"."""
11 |
12 | import os
13 | import pickle
14 | import numpy as np
15 | import scipy
16 | import tensorflow as tf
17 | import dnnlib
18 | import dnnlib.tflib as tflib
19 |
20 | from metrics import metric_base
21 |
22 | #----------------------------------------------------------------------------
23 |
24 | class FID(metric_base.MetricBase):
25 | def __init__(self, max_reals, num_fakes, minibatch_per_gpu, use_cached_real_stats=True, **kwargs):
26 | super().__init__(**kwargs)
27 | self.max_reals = max_reals
28 | self.num_fakes = num_fakes
29 | self.minibatch_per_gpu = minibatch_per_gpu
30 | self.use_cached_real_stats = use_cached_real_stats
31 |
32 | def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ
33 | minibatch_size = num_gpus * self.minibatch_per_gpu
34 | with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_features.pkl') as f: # identical to http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
35 | feature_net = pickle.load(f)
36 |
37 | # Calculate statistics for reals.
38 | cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals)
39 | os.makedirs(os.path.dirname(cache_file), exist_ok=True)
40 | if self.use_cached_real_stats and os.path.isfile(cache_file):
41 | with open(cache_file, 'rb') as f:
42 | mu_real, sigma_real = pickle.load(f)
43 | else:
44 | nfeat = feature_net.output_shape[1]
45 | mu_real = np.zeros(nfeat)
46 | sigma_real = np.zeros([nfeat, nfeat])
47 | num_real = 0
48 | for images, _labels, num in self._iterate_reals(minibatch_size):
49 | if self.max_reals is not None:
50 | num = min(num, self.max_reals - num_real)
51 | if images.shape[1] == 1:
52 | images = np.tile(images, [1, 3, 1, 1])
53 | for feat in list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num]:
54 | mu_real += feat
55 | sigma_real += np.outer(feat, feat)
56 | num_real += 1
57 | if self.max_reals is not None and num_real >= self.max_reals:
58 | break
59 | mu_real /= num_real
60 | sigma_real /= num_real
61 | sigma_real -= np.outer(mu_real, mu_real)
62 | with open(cache_file, 'wb') as f:
63 | pickle.dump((mu_real, sigma_real), f)
64 |
65 | # Construct TensorFlow graph.
66 | result_expr = []
67 | for gpu_idx in range(num_gpus):
68 | with tf.device('/gpu:%d' % gpu_idx):
69 | Gs_clone = Gs.clone()
70 | feature_net_clone = feature_net.clone()
71 | latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
72 | labels = self._get_random_labels_tf(self.minibatch_per_gpu)
73 | images = Gs_clone.get_output_for(latents, labels, **G_kwargs)
74 | if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1])
75 | images = tflib.convert_images_to_uint8(images)
76 | result_expr.append(feature_net_clone.get_output_for(images))
77 |
78 | # Calculate statistics for fakes.
79 | feat_fake = []
80 | for begin in range(0, self.num_fakes, minibatch_size):
81 | self._report_progress(begin, self.num_fakes)
82 | feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0))
83 | feat_fake = np.stack(feat_fake[:self.num_fakes])
84 | mu_fake = np.mean(feat_fake, axis=0)
85 | sigma_fake = np.cov(feat_fake, rowvar=False)
86 |
87 | # Calculate FID.
88 | m = np.square(mu_fake - mu_real).sum()
89 | s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member
90 | dist = m + np.trace(sigma_fake + sigma_real - 2*s)
91 | self._report_result(np.real(dist))
92 |
93 | #----------------------------------------------------------------------------
94 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/kernel_inception_distance.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Kernel Inception Distance (KID) from the paper
10 | "Demystifying MMD GANs"."""
11 |
12 | import os
13 | import pickle
14 | import numpy as np
15 | import tensorflow as tf
16 | import dnnlib
17 | import dnnlib.tflib as tflib
18 |
19 | from metrics import metric_base
20 |
21 | #----------------------------------------------------------------------------
22 |
23 | def compute_kid(feat_real, feat_fake, num_subsets=100, max_subset_size=1000):
24 | n = feat_real.shape[1]
25 | m = min(min(feat_real.shape[0], feat_fake.shape[0]), max_subset_size)
26 | t = 0
27 | for _subset_idx in range(num_subsets):
28 | x = feat_fake[np.random.choice(feat_fake.shape[0], m, replace=False)]
29 | y = feat_real[np.random.choice(feat_real.shape[0], m, replace=False)]
30 | a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3
31 | b = (x @ y.T / n + 1) ** 3
32 | t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m
33 | return t / num_subsets / m
34 |
35 | #----------------------------------------------------------------------------
36 |
37 | class KID(metric_base.MetricBase):
38 | def __init__(self, max_reals, num_fakes, minibatch_per_gpu, use_cached_real_stats=True, **kwargs):
39 | super().__init__(**kwargs)
40 | self.max_reals = max_reals
41 | self.num_fakes = num_fakes
42 | self.minibatch_per_gpu = minibatch_per_gpu
43 | self.use_cached_real_stats = use_cached_real_stats
44 |
45 | def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ
46 | minibatch_size = num_gpus * self.minibatch_per_gpu
47 | with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_features.pkl') as f: # identical to http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
48 | feature_net = pickle.load(f)
49 |
50 | # Calculate statistics for reals.
51 | cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals)
52 | os.makedirs(os.path.dirname(cache_file), exist_ok=True)
53 | if self.use_cached_real_stats and os.path.isfile(cache_file):
54 | with open(cache_file, 'rb') as f:
55 | feat_real = pickle.load(f)
56 | else:
57 | feat_real = []
58 | for images, _labels, num in self._iterate_reals(minibatch_size):
59 | if self.max_reals is not None:
60 | num = min(num, self.max_reals - len(feat_real))
61 | if images.shape[1] == 1:
62 | images = np.tile(images, [1, 3, 1, 1])
63 | feat_real += list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num]
64 | if self.max_reals is not None and len(feat_real) >= self.max_reals:
65 | break
66 | feat_real = np.stack(feat_real)
67 | with open(cache_file, 'wb') as f:
68 | pickle.dump(feat_real, f)
69 |
70 | # Construct TensorFlow graph.
71 | result_expr = []
72 | for gpu_idx in range(num_gpus):
73 | with tf.device('/gpu:%d' % gpu_idx):
74 | Gs_clone = Gs.clone()
75 | feature_net_clone = feature_net.clone()
76 | latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
77 | labels = self._get_random_labels_tf(self.minibatch_per_gpu)
78 | images = Gs_clone.get_output_for(latents, labels, **G_kwargs)
79 | if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1])
80 | images = tflib.convert_images_to_uint8(images)
81 | result_expr.append(feature_net_clone.get_output_for(images))
82 |
83 | # Calculate statistics for fakes.
84 | feat_fake = []
85 | for begin in range(0, self.num_fakes, minibatch_size):
86 | self._report_progress(begin, self.num_fakes)
87 | feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0))
88 | feat_fake = np.stack(feat_fake[:self.num_fakes])
89 |
90 | # Calculate KID.
91 | kid = compute_kid(feat_real, feat_fake)
92 | self._report_result(np.real(kid), fmt='%-12.8f')
93 |
94 | #----------------------------------------------------------------------------
95 |
--------------------------------------------------------------------------------
/stylegan2-ada/style_mixing.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Generate style mixing image matrix using pretrained network pickle."""
10 |
11 | import argparse
12 | import os
13 | import pickle
14 | import re
15 |
16 | import numpy as np
17 | import PIL.Image
18 |
19 | import dnnlib
20 | import dnnlib.tflib as tflib
21 |
22 | #----------------------------------------------------------------------------
23 |
24 | def style_mixing_example(network_pkl, row_seeds, col_seeds, truncation_psi, col_styles, outdir, minibatch_size=4):
25 | tflib.init_tf()
26 | print('Loading networks from "%s"...' % network_pkl)
27 | with dnnlib.util.open_url(network_pkl) as fp:
28 | _G, _D, Gs = pickle.load(fp)
29 |
30 | w_avg = Gs.get_var('dlatent_avg') # [component]
31 | Gs_syn_kwargs = {
32 | 'output_transform': dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True),
33 | 'randomize_noise': False,
34 | 'minibatch_size': minibatch_size
35 | }
36 |
37 | print('Generating W vectors...')
38 | all_seeds = list(set(row_seeds + col_seeds))
39 | all_z = np.stack([np.random.RandomState(seed).randn(*Gs.input_shape[1:]) for seed in all_seeds]) # [minibatch, component]
40 | all_w = Gs.components.mapping.run(all_z, None) # [minibatch, layer, component]
41 | all_w = w_avg + (all_w - w_avg) * truncation_psi # [minibatch, layer, component]
42 | w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} # [layer, component]
43 |
44 | print('Generating images...')
45 | all_images = Gs.components.synthesis.run(all_w, **Gs_syn_kwargs) # [minibatch, height, width, channel]
46 | image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))}
47 |
48 | print('Generating style-mixed images...')
49 | for row_seed in row_seeds:
50 | for col_seed in col_seeds:
51 | w = w_dict[row_seed].copy()
52 | w[col_styles] = w_dict[col_seed][col_styles]
53 | image = Gs.components.synthesis.run(w[np.newaxis], **Gs_syn_kwargs)[0]
54 | image_dict[(row_seed, col_seed)] = image
55 |
56 | print('Saving images...')
57 | os.makedirs(outdir, exist_ok=True)
58 | for (row_seed, col_seed), image in image_dict.items():
59 | PIL.Image.fromarray(image, 'RGB').save(f'{outdir}/{row_seed}-{col_seed}.png')
60 |
61 | print('Saving image grid...')
62 | _N, _C, H, W = Gs.output_shape
63 | canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black')
64 | for row_idx, row_seed in enumerate([None] + row_seeds):
65 | for col_idx, col_seed in enumerate([None] + col_seeds):
66 | if row_seed is None and col_seed is None:
67 | continue
68 | key = (row_seed, col_seed)
69 | if row_seed is None:
70 | key = (col_seed, col_seed)
71 | if col_seed is None:
72 | key = (row_seed, row_seed)
73 | canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx))
74 | canvas.save(f'{outdir}/grid.png')
75 |
76 | #----------------------------------------------------------------------------
77 |
78 | def _parse_num_range(s):
79 | '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.'''
80 |
81 | range_re = re.compile(r'^(\d+)-(\d+)$')
82 | m = range_re.match(s)
83 | if m:
84 | return list(range(int(m.group(1)), int(m.group(2))+1))
85 | vals = s.split(',')
86 | return [int(x) for x in vals]
87 |
88 | #----------------------------------------------------------------------------
89 |
90 | _examples = '''examples:
91 |
92 | python %(prog)s --outdir=out --trunc=1 --rows=85,100,75,458,1500 --cols=55,821,1789,293 \\
93 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl
94 | '''
95 |
96 | #----------------------------------------------------------------------------
97 |
98 | def main():
99 | parser = argparse.ArgumentParser(
100 | description='Generate style mixing image matrix using pretrained network pickle.',
101 | epilog=_examples,
102 | formatter_class=argparse.RawDescriptionHelpFormatter
103 | )
104 |
105 | parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
106 | parser.add_argument('--rows', dest='row_seeds', type=_parse_num_range, help='Random seeds to use for image rows', required=True)
107 | parser.add_argument('--cols', dest='col_seeds', type=_parse_num_range, help='Random seeds to use for image columns', required=True)
108 | parser.add_argument('--styles', dest='col_styles', type=_parse_num_range, help='Style layer range (default: %(default)s)', default='0-6')
109 | parser.add_argument('--trunc', dest='truncation_psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5)
110 | parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR')
111 |
112 | args = parser.parse_args()
113 | style_mixing_example(**vars(args))
114 |
115 | #----------------------------------------------------------------------------
116 |
117 | if __name__ == "__main__":
118 | main()
119 |
120 | #----------------------------------------------------------------------------
121 |
--------------------------------------------------------------------------------
/stylegan2-ada/generate.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Generate images using pretrained network pickle."""
10 |
11 | import argparse
12 | import os
13 | import pickle
14 | import re
15 |
16 | import numpy as np
17 | import PIL.Image
18 |
19 | import dnnlib
20 | import dnnlib.tflib as tflib
21 |
22 | #----------------------------------------------------------------------------
23 |
24 | def generate_images(network_pkl, seeds, truncation_psi, outdir, class_idx, dlatents_npz):
25 | tflib.init_tf()
26 | print('Loading networks from "%s"...' % network_pkl)
27 | with dnnlib.util.open_url(network_pkl) as fp:
28 | _G, _D, Gs = pickle.load(fp)
29 |
30 | os.makedirs(outdir, exist_ok=True)
31 |
32 | # Render images for a given dlatent vector.
33 | if dlatents_npz is not None:
34 | print(f'Generating images from dlatents file "{dlatents_npz}"')
35 | dlatents = np.load(dlatents_npz)['dlatents']
36 | assert dlatents.shape[1:] == (18, 512) # [N, 18, 512]
37 | imgs = Gs.components.synthesis.run(dlatents, output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True))
38 | for i, img in enumerate(imgs):
39 | fname = f'{outdir}/dlatent{i:02d}.png'
40 | print (f'Saved {fname}')
41 | PIL.Image.fromarray(img, 'RGB').save(fname)
42 | return
43 |
44 | # Render images for dlatents initialized from random seeds.
45 | Gs_kwargs = {
46 | 'output_transform': dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True),
47 | 'randomize_noise': False
48 | }
49 | if truncation_psi is not None:
50 | Gs_kwargs['truncation_psi'] = truncation_psi
51 |
52 | noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')]
53 | label = np.zeros([1] + Gs.input_shapes[1][1:])
54 | if class_idx is not None:
55 | label[:, class_idx] = 1
56 |
57 | for seed_idx, seed in enumerate(seeds):
58 | print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))
59 | rnd = np.random.RandomState(seed)
60 | z = rnd.randn(1, *Gs.input_shape[1:]) # [minibatch, component]
61 | tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width]
62 | images = Gs.run(z, label, **Gs_kwargs) # [minibatch, height, width, channel]
63 | PIL.Image.fromarray(images[0], 'RGB').save(f'{outdir}/seed{seed:04d}.png')
64 |
65 | #----------------------------------------------------------------------------
66 |
67 | def _parse_num_range(s):
68 | '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.'''
69 |
70 | range_re = re.compile(r'^(\d+)-(\d+)$')
71 | m = range_re.match(s)
72 | if m:
73 | return list(range(int(m.group(1)), int(m.group(2))+1))
74 | vals = s.split(',')
75 | return [int(x) for x in vals]
76 |
77 | #----------------------------------------------------------------------------
78 |
79 | _examples = '''examples:
80 |
81 | # Generate curated MetFaces images without truncation (Fig.10 left)
82 | python %(prog)s --outdir=out --trunc=1 --seeds=85,265,297,849 \\
83 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl
84 |
85 | # Generate uncurated MetFaces images with truncation (Fig.12 upper left)
86 | python %(prog)s --outdir=out --trunc=0.7 --seeds=600-605 \\
87 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl
88 |
89 | # Generate class conditional CIFAR-10 images (Fig.17 left, Car)
90 | python %(prog)s --outdir=out --trunc=1 --seeds=0-35 --class=1 \\
91 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/cifar10.pkl
92 |
93 | # Render image from projected latent vector
94 | python %(prog)s --outdir=out --dlatents=out/dlatents.npz \\
95 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl
96 | '''
97 |
98 | #----------------------------------------------------------------------------
99 |
100 | def main():
101 | parser = argparse.ArgumentParser(
102 | description='Generate images using pretrained network pickle.',
103 | epilog=_examples,
104 | formatter_class=argparse.RawDescriptionHelpFormatter
105 | )
106 |
107 | parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
108 | g = parser.add_mutually_exclusive_group(required=True)
109 | g.add_argument('--seeds', type=_parse_num_range, help='List of random seeds')
110 | g.add_argument('--dlatents', dest='dlatents_npz', help='Generate images for saved dlatents')
111 | parser.add_argument('--trunc', dest='truncation_psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5)
112 | parser.add_argument('--class', dest='class_idx', type=int, help='Class label (default: unconditional)')
113 | parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR')
114 |
115 | args = parser.parse_args()
116 | generate_images(**vars(args))
117 |
118 | #----------------------------------------------------------------------------
119 |
120 | if __name__ == "__main__":
121 | main()
122 |
123 | #----------------------------------------------------------------------------
124 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/metric_base.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Common definitions for quality metrics."""
10 |
11 | import os
12 | import time
13 | import hashlib
14 | import pickle
15 | import numpy as np
16 | import tensorflow as tf
17 | import dnnlib
18 | import dnnlib.tflib as tflib
19 |
20 | from training import dataset
21 |
22 | #----------------------------------------------------------------------------
23 | # Base class for metrics.
24 |
25 | class MetricBase:
26 | def __init__(self, name, force_dataset_args={}, force_G_kwargs={}):
27 | # Constructor args.
28 | self.name = name
29 | self.force_dataset_args = force_dataset_args
30 | self.force_G_kwargs = force_G_kwargs
31 |
32 | # Configuration.
33 | self._dataset_args = dnnlib.EasyDict()
34 | self._run_dir = None
35 | self._progress_fn = None
36 |
37 | # Internal state.
38 | self._results = []
39 | self._network_name = ''
40 | self._eval_time = 0
41 | self._dataset = None
42 |
43 | def configure(self, dataset_args={}, run_dir=None, progress_fn=None):
44 | self._dataset_args = dnnlib.EasyDict(dataset_args)
45 | self._dataset_args.update(self.force_dataset_args)
46 | self._run_dir = run_dir
47 | self._progress_fn = progress_fn
48 |
49 | def run(self, network_pkl, num_gpus=1, G_kwargs=dict(is_validation=True)):
50 | self._results = []
51 | self._network_name = os.path.splitext(os.path.basename(network_pkl))[0]
52 | self._eval_time = 0
53 | self._dataset = None
54 |
55 | with tf.Graph().as_default(), tflib.create_session().as_default(): # pylint: disable=not-context-manager
56 | self._report_progress(0, 1)
57 | time_begin = time.time()
58 | with dnnlib.util.open_url(network_pkl) as f:
59 | G, D, Gs = pickle.load(f)
60 |
61 | G_kwargs = dnnlib.EasyDict(G_kwargs)
62 | G_kwargs.update(self.force_G_kwargs)
63 | self._evaluate(G=G, D=D, Gs=Gs, G_kwargs=G_kwargs, num_gpus=num_gpus)
64 |
65 | self._eval_time = time.time() - time_begin # pylint: disable=attribute-defined-outside-init
66 | self._report_progress(1, 1)
67 | if self._dataset is not None:
68 | self._dataset.close()
69 | self._dataset = None
70 |
71 | result_str = self.get_result_str()
72 | print(result_str)
73 | if self._run_dir is not None and os.path.isdir(self._run_dir):
74 | with open(os.path.join(self._run_dir, f'metric-{self.name}.txt'), 'at') as f:
75 | f.write(result_str + '\n')
76 |
77 | def get_result_str(self):
78 | title = self._network_name
79 | if len(title) > 29:
80 | title = '...' + title[-26:]
81 | result_str = f'{title:<30s} time {dnnlib.util.format_time(self._eval_time):<12s}'
82 | for res in self._results:
83 | result_str += f' {self.name}{res.suffix} {res.fmt % res.value}'
84 | return result_str.strip()
85 |
86 | def update_autosummaries(self):
87 | for res in self._results:
88 | tflib.autosummary.autosummary('Metrics/' + self.name + res.suffix, res.value)
89 |
90 | def _evaluate(self, **_kwargs):
91 | raise NotImplementedError # to be overridden by subclasses
92 |
93 | def _report_result(self, value, suffix='', fmt='%-10.4f'):
94 | self._results += [dnnlib.EasyDict(value=value, suffix=suffix, fmt=fmt)]
95 |
96 | def _report_progress(self, cur, total):
97 | if self._progress_fn is not None:
98 | self._progress_fn(cur, total)
99 |
100 | def _get_cache_file_for_reals(self, extension='pkl', **kwargs):
101 | all_args = dnnlib.EasyDict(metric_name=self.name)
102 | all_args.update(self._dataset_args)
103 | all_args.update(kwargs)
104 | md5 = hashlib.md5(repr(sorted(all_args.items())).encode('utf-8'))
105 | dataset_name = os.path.splitext(os.path.basename(self._dataset_args.path))[0]
106 | return dnnlib.make_cache_dir_path('metrics', f'{md5.hexdigest()}-{self.name}-{dataset_name}.{extension}')
107 |
108 | def _get_dataset_obj(self):
109 | if self._dataset is None:
110 | self._dataset = dataset.load_dataset(**self._dataset_args)
111 | return self._dataset
112 |
113 | def _iterate_reals(self, minibatch_size):
114 | print(f'Calculating real image statistics for {self.name}...')
115 | dataset_obj = self._get_dataset_obj()
116 | while True:
117 | images = []
118 | labels = []
119 | for _ in range(minibatch_size):
120 | image, label = dataset_obj.get_minibatch_np(1)
121 | if image is None:
122 | break
123 | images.append(image)
124 | labels.append(label)
125 | num = len(images)
126 | if num == 0:
127 | break
128 | images = np.concatenate(images + [images[-1]] * (minibatch_size - num), axis=0)
129 | labels = np.concatenate(labels + [labels[-1]] * (minibatch_size - num), axis=0)
130 | yield images, labels, num
131 | if num < minibatch_size:
132 | break
133 |
134 | def _get_random_labels_tf(self, minibatch_size):
135 | return self._get_dataset_obj().get_random_labels_tf(minibatch_size)
136 |
137 | #----------------------------------------------------------------------------
138 |
--------------------------------------------------------------------------------
/stylegan2-ada/docs/license.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Nvidia Source Code License-NC
7 |
8 |
56 |
57 |
58 |
59 |
NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator Augmentation (ADA)
60 |
61 |
62 |
63 |
1. Definitions
64 |
65 |
“Licensor” means any person or entity that distributes its Work.
66 |
67 |
“Software” means the original work of authorship made available under
68 | this License.
69 |
70 |
“Work” means the Software and any additions to or derivative works of
71 | the Software that are made available under this License.
72 |
73 |
The terms “reproduce,” “reproduction,” “derivative works,” and
74 | “distribution” have the meaning as provided under U.S. copyright law;
75 | provided, however, that for the purposes of this License, derivative
76 | works shall not include works that remain separable from, or merely
77 | link (or bind by name) to the interfaces of, the Work.
78 |
79 |
Works, including the Software, are “made available” under this License
80 | by including in or with the Work either (a) a copyright notice
81 | referencing the applicability of this License to the Work, or (b) a
82 | copy of this License.
83 |
84 |
2. License Grants
85 |
86 |
2.1 Copyright Grant. Subject to the terms and conditions of this
87 | License, each Licensor grants to you a perpetual, worldwide,
88 | non-exclusive, royalty-free, copyright license to reproduce,
89 | prepare derivative works of, publicly display, publicly perform,
90 | sublicense and distribute its Work and any resulting derivative
91 | works in any form.
92 |
93 |
3. Limitations
94 |
95 |
3.1 Redistribution. You may reproduce or distribute the Work only
96 | if (a) you do so under this License, (b) you include a complete
97 | copy of this License with your distribution, and (c) you retain
98 | without modification any copyright, patent, trademark, or
99 | attribution notices that are present in the Work.
100 |
101 |
3.2 Derivative Works. You may specify that additional or different
102 | terms apply to the use, reproduction, and distribution of your
103 | derivative works of the Work (“Your Terms”) only if (a) Your Terms
104 | provide that the use limitation in Section 3.3 applies to your
105 | derivative works, and (b) you identify the specific derivative
106 | works that are subject to Your Terms. Notwithstanding Your Terms,
107 | this License (including the redistribution requirements in Section
108 | 3.1) will continue to apply to the Work itself.
109 |
110 |
3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for
111 | use non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work
112 | and any derivative works commercially. As used herein, “non-commercially” means for research or
113 | evaluation purposes only.
114 |
115 |
3.4 Patent Claims. If you bring or threaten to bring a patent claim
116 | against any Licensor (including any claim, cross-claim or
117 | counterclaim in a lawsuit) to enforce any patents that you allege
118 | are infringed by any Work, then your rights under this License from
119 | such Licensor (including the grant in Section 2.1) will terminate immediately.
120 |
121 |
3.5 Trademarks. This License does not grant any rights to use any
122 | Licensor’s or its affiliates’ names, logos, or trademarks, except
123 | as necessary to reproduce the notices described in this License.
124 |
125 |
3.6 Termination. If you violate any term of this License, then your
126 | rights under this License (including the grant in Section 2.1)
127 | will terminate immediately.
128 |
129 |
4. Disclaimer of Warranty.
130 |
131 |
THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY
132 | KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
133 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
134 | NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
135 | THIS LICENSE.
136 |
137 |
5. Limitation of Liability.
138 |
139 |
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
140 | THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
141 | SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
142 | INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
143 | OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
144 | (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
145 | LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
146 | COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
147 | THE POSSIBILITY OF SUCH DAMAGES.
148 |
149 |
150 |
151 |
152 |
153 |
154 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/perceptual_path_length.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Perceptual Path Length (PPL) from the paper
10 | "A Style-Based Generator Architecture for Generative Adversarial Networks"."""
11 |
12 | import pickle
13 | import numpy as np
14 | import tensorflow as tf
15 | import dnnlib
16 | import dnnlib.tflib as tflib
17 |
18 | from metrics import metric_base
19 |
20 | #----------------------------------------------------------------------------
21 |
22 | # Normalize batch of vectors.
23 | def normalize(v):
24 | return v / tf.sqrt(tf.reduce_sum(tf.square(v), axis=-1, keepdims=True))
25 |
26 | # Spherical interpolation of a batch of vectors.
27 | def slerp(a, b, t):
28 | a = normalize(a)
29 | b = normalize(b)
30 | d = tf.reduce_sum(a * b, axis=-1, keepdims=True)
31 | p = t * tf.math.acos(d)
32 | c = normalize(b - d * a)
33 | d = a * tf.math.cos(p) + c * tf.math.sin(p)
34 | return normalize(d)
35 |
36 | #----------------------------------------------------------------------------
37 |
38 | class PPL(metric_base.MetricBase):
39 | def __init__(self, num_samples, epsilon, space, sampling, crop, minibatch_per_gpu, **kwargs):
40 | assert space in ['z', 'w']
41 | assert sampling in ['full', 'end']
42 | super().__init__(**kwargs)
43 | self.num_samples = num_samples
44 | self.epsilon = epsilon
45 | self.space = space
46 | self.sampling = sampling
47 | self.crop = crop
48 | self.minibatch_per_gpu = minibatch_per_gpu
49 |
50 | def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ
51 | minibatch_size = num_gpus * self.minibatch_per_gpu
52 |
53 | # Construct TensorFlow graph.
54 | distance_expr = []
55 | for gpu_idx in range(num_gpus):
56 | with tf.device(f'/gpu:{gpu_idx}'):
57 | Gs_clone = Gs.clone()
58 | noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')]
59 |
60 | # Generate random latents and interpolation t-values.
61 | lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:])
62 | lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0)
63 | labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1])
64 |
65 | # Interpolate in W or Z.
66 | if self.space == 'w':
67 | dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, labels, **G_kwargs)
68 | dlat_t01 = tf.cast(dlat_t01, tf.float32)
69 | dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2]
70 | dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis])
71 | dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)
72 | dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape)
73 | else: # space == 'z'
74 | lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2]
75 | lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis])
76 | lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon)
77 | lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape)
78 | dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, labels, **G_kwargs)
79 |
80 | # Synthesize images.
81 | with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch
82 | images = Gs_clone.components.synthesis.get_output_for(dlat_e01, randomize_noise=False, **G_kwargs)
83 | images = tf.cast(images, tf.float32)
84 |
85 | # Crop only the face region.
86 | if self.crop:
87 | c = int(images.shape[2] // 8)
88 | images = images[:, :, c*3 : c*7, c*2 : c*6]
89 |
90 | # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
91 | factor = images.shape[2] // 256
92 | if factor > 1:
93 | images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor])
94 | images = tf.reduce_mean(images, axis=[3,5])
95 |
96 | # Scale dynamic range from [-1,1] to [0,255] for VGG.
97 | images = (images + 1) * (255 / 2)
98 | if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1])
99 |
100 | # Evaluate perceptual distance.
101 | img_e0, img_e1 = images[0::2], images[1::2]
102 | with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16_zhang_perceptual.pkl') as f:
103 | distance_measure = pickle.load(f)
104 | distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2))
105 |
106 | # Sampling loop.
107 | all_distances = []
108 | for begin in range(0, self.num_samples, minibatch_size):
109 | self._report_progress(begin, self.num_samples)
110 | all_distances += tflib.run(distance_expr)
111 | all_distances = np.concatenate(all_distances, axis=0)
112 |
113 | # Reject outliers.
114 | lo = np.percentile(all_distances, 1, interpolation='lower')
115 | hi = np.percentile(all_distances, 99, interpolation='higher')
116 | filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances)
117 | self._report_result(np.mean(filtered_distances))
118 |
119 | #----------------------------------------------------------------------------
120 |
--------------------------------------------------------------------------------
/stylegan2-ada/calc_metrics.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Calculate quality metrics for previous training run or pretrained network pickle."""
10 |
11 | import os
12 | import argparse
13 | import json
14 | import pickle
15 | import dnnlib
16 | import dnnlib.tflib as tflib
17 |
18 | from metrics import metric_defaults
19 |
20 | #----------------------------------------------------------------------------
21 |
22 | class UserError(Exception):
23 | pass
24 |
25 | #----------------------------------------------------------------------------
26 |
27 | def calc_metrics(network_pkl, metric_names, metricdata, mirror, gpus):
28 | tflib.init_tf()
29 |
30 | # Initialize metrics.
31 | metrics = []
32 | for name in metric_names:
33 | if name not in metric_defaults.metric_defaults:
34 | raise UserError('\n'.join(['--metrics can only contain the following values:', 'none'] + list(metric_defaults.metric_defaults.keys())))
35 | metrics.append(dnnlib.util.construct_class_by_name(**metric_defaults.metric_defaults[name]))
36 |
37 | # Load network.
38 | if not dnnlib.util.is_url(network_pkl, allow_file_urls=True) and not os.path.isfile(network_pkl):
39 | raise UserError('--network must point to a file or URL')
40 | print(f'Loading network from "{network_pkl}"...')
41 | with dnnlib.util.open_url(network_pkl) as f:
42 | _G, _D, Gs = pickle.load(f)
43 | Gs.print_layers()
44 |
45 | # Look up training options.
46 | run_dir = None
47 | training_options = None
48 | if os.path.isfile(network_pkl):
49 | potential_run_dir = os.path.dirname(network_pkl)
50 | potential_json_file = os.path.join(potential_run_dir, 'training_options.json')
51 | if os.path.isfile(potential_json_file):
52 | print(f'Looking up training options from "{potential_json_file}"...')
53 | run_dir = potential_run_dir
54 | with open(potential_json_file, 'rt') as f:
55 | training_options = json.load(f, object_pairs_hook=dnnlib.EasyDict)
56 | if training_options is None:
57 | print('Could not look up training options; will rely on --metricdata and --mirror')
58 |
59 | # Choose dataset options.
60 | dataset_options = dnnlib.EasyDict()
61 | if training_options is not None:
62 | dataset_options.update(training_options.metric_dataset_args)
63 | dataset_options.resolution = Gs.output_shapes[0][-1]
64 | dataset_options.max_label_size = Gs.input_shapes[1][-1]
65 | if metricdata is not None:
66 | if not os.path.isdir(metricdata):
67 | raise UserError('--metricdata must point to a directory containing *.tfrecords')
68 | dataset_options.path = metricdata
69 | if mirror is not None:
70 | dataset_options.mirror_augment = mirror
71 | if 'path' not in dataset_options:
72 | raise UserError('--metricdata must be specified explicitly')
73 |
74 | # Print dataset options.
75 | print()
76 | print('Dataset options:')
77 | print(json.dumps(dataset_options, indent=2))
78 |
79 | # Evaluate metrics.
80 | for metric in metrics:
81 | print()
82 | print(f'Evaluating {metric.name}...')
83 | metric.configure(dataset_args=dataset_options, run_dir=run_dir)
84 | metric.run(network_pkl=network_pkl, num_gpus=gpus)
85 |
86 | #----------------------------------------------------------------------------
87 |
88 | def _str_to_bool(v):
89 | if isinstance(v, bool):
90 | return v
91 | if v.lower() in ('yes', 'true', 't', 'y', '1'):
92 | return True
93 | if v.lower() in ('no', 'false', 'f', 'n', '0'):
94 | return False
95 | raise argparse.ArgumentTypeError('Boolean value expected.')
96 |
97 | def _parse_comma_sep(s):
98 | if s is None or s.lower() == 'none' or s == '':
99 | return []
100 | return s.split(',')
101 |
102 | #----------------------------------------------------------------------------
103 |
104 | _cmdline_help_epilog = '''examples:
105 |
106 | # Previous training run: look up options automatically, save result to text file.
107 | python %(prog)s --metrics=pr50k3_full \\
108 | --network=~/training-runs/00000-ffhq10k-res64-auto1/network-snapshot-000000.pkl
109 |
110 | # Pretrained network pickle: specify dataset explicitly, print result to stdout.
111 | python %(prog)s --metrics=fid50k_full --metricdata=~/datasets/ffhq --mirror=1 \\
112 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl
113 |
114 | available metrics:
115 |
116 | ADA paper:
117 | fid50k_full Frechet inception distance against the full dataset.
118 | kid50k_full Kernel inception distance against the full dataset.
119 | pr50k3_full Precision and recall againt the full dataset.
120 | is50k Inception score for CIFAR-10.
121 |
122 | Legacy: StyleGAN2
123 | fid50k Frechet inception distance against 50k real images.
124 | kid50k Kernel inception distance against 50k real images.
125 | pr50k3 Precision and recall against 50k real images.
126 | ppl2_wend Perceptual path length in W at path endpoints against full image.
127 |
128 | Legacy: StyleGAN
129 | ppl_zfull Perceptual path length in Z for full paths against cropped image.
130 | ppl_wfull Perceptual path length in W for full paths against cropped image.
131 | ppl_zend Perceptual path length in Z at path endpoints against cropped image.
132 | ppl_wend Perceptual path length in W at path endpoints against cropped image.
133 | ls Linear separability with respect to CelebA attributes.
134 | '''
135 |
136 | #----------------------------------------------------------------------------
137 |
138 | def main():
139 | parser = argparse.ArgumentParser(
140 | description='Calculate quality metrics for previous training run or pretrained network pickle.',
141 | epilog=_cmdline_help_epilog,
142 | formatter_class=argparse.RawDescriptionHelpFormatter
143 | )
144 |
145 | parser.add_argument('--network', help='Network pickle filename or URL', dest='network_pkl', metavar='PATH')
146 | parser.add_argument('--metrics', help='Comma-separated list or "none" (default: %(default)s)', dest='metric_names', type=_parse_comma_sep, default='fid50k_full', metavar='LIST')
147 | parser.add_argument('--metricdata', help='Dataset to evaluate metrics against (default: look up from training options)', metavar='PATH')
148 | parser.add_argument('--mirror', help='Whether the dataset was augmented with x-flips during training (default: look up from training options)', type=_str_to_bool, metavar='BOOL')
149 | parser.add_argument('--gpus', help='Number of GPUs to use (default: %(default)s)', type=int, default=1, metavar='INT')
150 |
151 | args = parser.parse_args()
152 | try:
153 | calc_metrics(**vars(args))
154 | except UserError as err:
155 | print(f'Error: {err}')
156 | exit(1)
157 |
158 | #----------------------------------------------------------------------------
159 |
160 | if __name__ == "__main__":
161 | main()
162 |
163 | #----------------------------------------------------------------------------
164 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/tflib/custom_ops.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """TensorFlow custom ops builder.
10 | """
11 |
12 | import glob
13 | import os
14 | import re
15 | import uuid
16 | import hashlib
17 | import tempfile
18 | import shutil
19 | import tensorflow as tf
20 | from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
21 |
22 | from .. import util
23 |
24 | #----------------------------------------------------------------------------
25 | # Global options.
26 |
27 | cuda_cache_path = None
28 | cuda_cache_version_tag = 'v1'
29 | do_not_hash_included_headers = True # Speed up compilation by assuming that headers included by the CUDA code never change.
30 | verbose = True # Print status messages to stdout.
31 |
32 | #----------------------------------------------------------------------------
33 | # Internal helper funcs.
34 |
35 | def _find_compiler_bindir():
36 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
37 | if hostx64_paths != []:
38 | return hostx64_paths[0]
39 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
40 | if hostx64_paths != []:
41 | return hostx64_paths[0]
42 | hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
43 | if hostx64_paths != []:
44 | return hostx64_paths[0]
45 | vc_bin_dir = 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin'
46 | if os.path.isdir(vc_bin_dir):
47 | return vc_bin_dir
48 | return None
49 |
50 | def _get_compute_cap(device):
51 | caps_str = device.physical_device_desc
52 | m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
53 | major = m.group(1)
54 | minor = m.group(2)
55 | return (major, minor)
56 |
57 | def _get_cuda_gpu_arch_string():
58 | gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
59 | if len(gpus) == 0:
60 | raise RuntimeError('No GPU devices found')
61 | (major, minor) = _get_compute_cap(gpus[0])
62 | return 'sm_%s%s' % (major, minor)
63 |
64 | def _run_cmd(cmd):
65 | with os.popen(cmd) as pipe:
66 | output = pipe.read()
67 | status = pipe.close()
68 | if status is not None:
69 | raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
70 |
71 | def _prepare_nvcc_cli(opts):
72 | cmd = 'nvcc ' + opts.strip()
73 | cmd += ' --disable-warnings'
74 | cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
75 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
76 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
77 | cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
78 |
79 | compiler_bindir = _find_compiler_bindir()
80 | if compiler_bindir is None:
81 | # Require that _find_compiler_bindir succeeds on Windows. Allow
82 | # nvcc to use whatever is the default on Linux.
83 | if os.name == 'nt':
84 | raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
85 | else:
86 | cmd += ' --compiler-bindir "%s"' % compiler_bindir
87 | cmd += ' 2>&1'
88 | return cmd
89 |
90 | #----------------------------------------------------------------------------
91 | # Main entry point.
92 |
93 | _plugin_cache = dict()
94 |
95 | def get_plugin(cuda_file, extra_nvcc_options=[]):
96 | cuda_file_base = os.path.basename(cuda_file)
97 | cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
98 |
99 | # Already in cache?
100 | if cuda_file in _plugin_cache:
101 | return _plugin_cache[cuda_file]
102 |
103 | # Setup plugin.
104 | if verbose:
105 | print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
106 | try:
107 | # Hash CUDA source.
108 | md5 = hashlib.md5()
109 | with open(cuda_file, 'rb') as f:
110 | md5.update(f.read())
111 | md5.update(b'\n')
112 |
113 | # Hash headers included by the CUDA code by running it through the preprocessor.
114 | if not do_not_hash_included_headers:
115 | if verbose:
116 | print('Preprocessing... ', end='', flush=True)
117 | with tempfile.TemporaryDirectory() as tmp_dir:
118 | tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
119 | _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
120 | with open(tmp_file, 'rb') as f:
121 | bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
122 | good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
123 | for ln in f:
124 | if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
125 | ln = ln.replace(bad_file_str, good_file_str)
126 | md5.update(ln)
127 | md5.update(b'\n')
128 |
129 | # Select compiler options.
130 | compile_opts = ''
131 | if os.name == 'nt':
132 | compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
133 | elif os.name == 'posix':
134 | compile_opts += f' --compiler-options \'-fPIC\''
135 | compile_opts += f' --compiler-options \'{" ".join(tf.sysconfig.get_compile_flags())}\''
136 | compile_opts += f' --linker-options \'{" ".join(tf.sysconfig.get_link_flags())}\''
137 | else:
138 | assert False # not Windows or Linux, w00t?
139 | compile_opts += f' --gpu-architecture={_get_cuda_gpu_arch_string()}'
140 | compile_opts += ' --use_fast_math'
141 | for opt in extra_nvcc_options:
142 | compile_opts += ' ' + opt
143 | nvcc_cmd = _prepare_nvcc_cli(compile_opts)
144 |
145 | # Hash build configuration.
146 | md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
147 | md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
148 | md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
149 |
150 | # Compile if not already compiled.
151 | cache_dir = util.make_cache_dir_path('tflib-cudacache') if cuda_cache_path is None else cuda_cache_path
152 | bin_file_ext = '.dll' if os.name == 'nt' else '.so'
153 | bin_file = os.path.join(cache_dir, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
154 | if not os.path.isfile(bin_file):
155 | if verbose:
156 | print('Compiling... ', end='', flush=True)
157 | with tempfile.TemporaryDirectory() as tmp_dir:
158 | tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
159 | _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
160 | os.makedirs(cache_dir, exist_ok=True)
161 | intermediate_file = os.path.join(cache_dir, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
162 | shutil.copyfile(tmp_file, intermediate_file)
163 | os.rename(intermediate_file, bin_file) # atomic
164 |
165 | # Load.
166 | if verbose:
167 | print('Loading... ', end='', flush=True)
168 | plugin = tf.load_op_library(bin_file)
169 |
170 | # Add to cache.
171 | _plugin_cache[cuda_file] = plugin
172 | if verbose:
173 | print('Done.', flush=True)
174 | return plugin
175 |
176 | except:
177 | if verbose:
178 | print('Failed!', flush=True)
179 | raise
180 |
181 | #----------------------------------------------------------------------------
182 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/tflib/autosummary.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Helper for adding automatically tracked values to Tensorboard.
10 |
11 | Autosummary creates an identity op that internally keeps track of the input
12 | values and automatically shows up in TensorBoard. The reported value
13 | represents an average over input components. The average is accumulated
14 | constantly over time and flushed when save_summaries() is called.
15 |
16 | Notes:
17 | - The output tensor must be used as an input for something else in the
18 | graph. Otherwise, the autosummary op will not get executed, and the average
19 | value will not get accumulated.
20 | - It is perfectly fine to include autosummaries with the same name in
21 | several places throughout the graph, even if they are executed concurrently.
22 | - It is ok to also pass in a python scalar or numpy array. In this case, it
23 | is added to the average immediately.
24 | """
25 |
26 | from collections import OrderedDict
27 | import numpy as np
28 | import tensorflow as tf
29 | from tensorboard import summary as summary_lib
30 | from tensorboard.plugins.custom_scalar import layout_pb2
31 |
32 | from . import tfutil
33 | from .tfutil import TfExpression
34 | from .tfutil import TfExpressionEx
35 |
36 | # Enable "Custom scalars" tab in TensorBoard for advanced formatting.
37 | # Disabled by default to reduce tfevents file size.
38 | enable_custom_scalars = False
39 |
40 | _dtype = tf.float64
41 | _vars = OrderedDict() # name => [var, ...]
42 | _immediate = OrderedDict() # name => update_op, update_value
43 | _finalized = False
44 | _merge_op = None
45 |
46 |
47 | def _create_var(name: str, value_expr: TfExpression) -> TfExpression:
48 | """Internal helper for creating autosummary accumulators."""
49 | assert not _finalized
50 | name_id = name.replace("/", "_")
51 | v = tf.cast(value_expr, _dtype)
52 |
53 | if v.shape.is_fully_defined():
54 | size = np.prod(v.shape.as_list())
55 | size_expr = tf.constant(size, dtype=_dtype)
56 | else:
57 | size = None
58 | size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype))
59 |
60 | if size == 1:
61 | if v.shape.ndims != 0:
62 | v = tf.reshape(v, [])
63 | v = [size_expr, v, tf.square(v)]
64 | else:
65 | v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))]
66 | v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype))
67 |
68 | with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None):
69 | var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)]
70 | update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v))
71 |
72 | if name in _vars:
73 | _vars[name].append(var)
74 | else:
75 | _vars[name] = [var]
76 | return update_op
77 |
78 |
79 | def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx:
80 | """Create a new autosummary.
81 |
82 | Args:
83 | name: Name to use in TensorBoard
84 | value: TensorFlow expression or python value to track
85 | passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node.
86 |
87 | Example use of the passthru mechanism:
88 |
89 | n = autosummary('l2loss', loss, passthru=n)
90 |
91 | This is a shorthand for the following code:
92 |
93 | with tf.control_dependencies([autosummary('l2loss', loss)]):
94 | n = tf.identity(n)
95 | """
96 | tfutil.assert_tf_initialized()
97 | name_id = name.replace("/", "_")
98 |
99 | if tfutil.is_tf_expression(value):
100 | with tf.name_scope("summary_" + name_id), tf.device(value.device):
101 | condition = tf.convert_to_tensor(condition, name='condition')
102 | update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op)
103 | with tf.control_dependencies([update_op]):
104 | return tf.identity(value if passthru is None else passthru)
105 |
106 | else: # python scalar or numpy array
107 | assert not tfutil.is_tf_expression(passthru)
108 | assert not tfutil.is_tf_expression(condition)
109 | if condition:
110 | if name not in _immediate:
111 | with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None):
112 | update_value = tf.placeholder(_dtype)
113 | update_op = _create_var(name, update_value)
114 | _immediate[name] = update_op, update_value
115 | update_op, update_value = _immediate[name]
116 | tfutil.run(update_op, {update_value: value})
117 | return value if passthru is None else passthru
118 |
119 |
120 | def finalize_autosummaries() -> None:
121 | """Create the necessary ops to include autosummaries in TensorBoard report.
122 | Note: This should be done only once per graph.
123 | """
124 | global _finalized
125 | tfutil.assert_tf_initialized()
126 |
127 | if _finalized:
128 | return None
129 |
130 | _finalized = True
131 | tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list])
132 |
133 | # Create summary ops.
134 | with tf.device(None), tf.control_dependencies(None):
135 | for name, vars_list in _vars.items():
136 | name_id = name.replace("/", "_")
137 | with tfutil.absolute_name_scope("Autosummary/" + name_id):
138 | moments = tf.add_n(vars_list)
139 | moments /= moments[0]
140 | with tf.control_dependencies([moments]): # read before resetting
141 | reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list]
142 | with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting
143 | mean = moments[1]
144 | std = tf.sqrt(moments[2] - tf.square(moments[1]))
145 | tf.summary.scalar(name, mean)
146 | if enable_custom_scalars:
147 | tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std)
148 | tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std)
149 |
150 | # Setup layout for custom scalars.
151 | layout = None
152 | if enable_custom_scalars:
153 | cat_dict = OrderedDict()
154 | for series_name in sorted(_vars.keys()):
155 | p = series_name.split("/")
156 | cat = p[0] if len(p) >= 2 else ""
157 | chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1]
158 | if cat not in cat_dict:
159 | cat_dict[cat] = OrderedDict()
160 | if chart not in cat_dict[cat]:
161 | cat_dict[cat][chart] = []
162 | cat_dict[cat][chart].append(series_name)
163 | categories = []
164 | for cat_name, chart_dict in cat_dict.items():
165 | charts = []
166 | for chart_name, series_names in chart_dict.items():
167 | series = []
168 | for series_name in series_names:
169 | series.append(layout_pb2.MarginChartContent.Series(
170 | value=series_name,
171 | lower="xCustomScalars/" + series_name + "/margin_lo",
172 | upper="xCustomScalars/" + series_name + "/margin_hi"))
173 | margin = layout_pb2.MarginChartContent(series=series)
174 | charts.append(layout_pb2.Chart(title=chart_name, margin=margin))
175 | categories.append(layout_pb2.Category(title=cat_name, chart=charts))
176 | layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories))
177 | return layout
178 |
179 | def save_summaries(file_writer, global_step=None):
180 | """Call FileWriter.add_summary() with all summaries in the default graph,
181 | automatically finalizing and merging them on the first call.
182 | """
183 | global _merge_op
184 | tfutil.assert_tf_initialized()
185 |
186 | if _merge_op is None:
187 | layout = finalize_autosummaries()
188 | if layout is not None:
189 | file_writer.add_summary(layout)
190 | with tf.device(None), tf.control_dependencies(None):
191 | _merge_op = tf.summary.merge_all()
192 |
193 | file_writer.add_summary(_merge_op.eval(), global_step)
194 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/tflib/ops/fused_bias_act.cu:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | //
3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
4 | // and proprietary rights in and to this software, related documentation
5 | // and any modifications thereto. Any use, reproduction, disclosure or
6 | // distribution of this software and related documentation without an express
7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | #define EIGEN_USE_GPU
10 | #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
11 | #include "tensorflow/core/framework/op.h"
12 | #include "tensorflow/core/framework/op_kernel.h"
13 | #include "tensorflow/core/framework/shape_inference.h"
14 | #include
15 |
16 | using namespace tensorflow;
17 | using namespace tensorflow::shape_inference;
18 |
19 | #define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
20 |
21 | //------------------------------------------------------------------------
22 | // CUDA kernel.
23 |
24 | template
25 | struct FusedBiasActKernelParams
26 | {
27 | const T* x; // [sizeX]
28 | const T* b; // [sizeB] or NULL
29 | const T* xref; // [sizeX] or NULL
30 | const T* yref; // [sizeX] or NULL
31 | T* y; // [sizeX]
32 |
33 | int grad;
34 | int axis;
35 | int act;
36 | float alpha;
37 | float gain;
38 | float clamp;
39 |
40 | int sizeX;
41 | int sizeB;
42 | int stepB;
43 | int loopX;
44 | };
45 |
46 | template
47 | static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams p)
48 | {
49 | const float expRange = 80.0f;
50 | const float halfExpRange = 40.0f;
51 | const float seluScale = 1.0507009873554804934193349852946f;
52 | const float seluAlpha = 1.6732632423543772848170429916717f;
53 |
54 | // Loop over elements.
55 | int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x;
56 | for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x)
57 | {
58 | // Load and apply bias.
59 | float x = (float)p.x[xi];
60 | if (p.b)
61 | x += (float)p.b[(xi / p.stepB) % p.sizeB];
62 | float xref = (p.xref) ? (float)p.xref[xi] : 0.0f;
63 | float yref = (p.yref) ? (float)p.yref[xi] : 0.0f;
64 | float yy = (p.gain != 0.0f) ? yref / p.gain : 0.0f;
65 |
66 | // Evaluate activation func.
67 | float y;
68 | switch (p.act * 10 + p.grad)
69 | {
70 | // linear
71 | default:
72 | case 10: y = x; break;
73 | case 11: y = x; break;
74 | case 12: y = 0.0f; break;
75 |
76 | // relu
77 | case 20: y = (x > 0.0f) ? x : 0.0f; break;
78 | case 21: y = (yy > 0.0f) ? x : 0.0f; break;
79 | case 22: y = 0.0f; break;
80 |
81 | // lrelu
82 | case 30: y = (x > 0.0f) ? x : x * p.alpha; break;
83 | case 31: y = (yy > 0.0f) ? x : x * p.alpha; break;
84 | case 32: y = 0.0f; break;
85 |
86 | // tanh
87 | case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break;
88 | case 41: y = x * (1.0f - yy * yy); break;
89 | case 42: y = x * (1.0f - yy * yy) * (-2.0f * yy); break;
90 |
91 | // sigmoid
92 | case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break;
93 | case 51: y = x * yy * (1.0f - yy); break;
94 | case 52: y = x * yy * (1.0f - yy) * (1.0f - 2.0f * yy); break;
95 |
96 | // elu
97 | case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break;
98 | case 61: y = (yy >= 0.0f) ? x : x * (yy + 1.0f); break;
99 | case 62: y = (yy >= 0.0f) ? 0.0f : x * (yy + 1.0f); break;
100 |
101 | // selu
102 | case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break;
103 | case 71: y = (yy >= 0.0f) ? x * seluScale : x * (yy + seluScale * seluAlpha); break;
104 | case 72: y = (yy >= 0.0f) ? 0.0f : x * (yy + seluScale * seluAlpha); break;
105 |
106 | // softplus
107 | case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break;
108 | case 81: y = x * (1.0f - expf(-yy)); break;
109 | case 82: { float c = expf(-yy); y = x * c * (1.0f - c); } break;
110 |
111 | // swish
112 | case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break;
113 | case 91:
114 | case 92:
115 | {
116 | float c = expf(xref);
117 | float d = c + 1.0f;
118 | if (p.grad == 1)
119 | y = (xref > halfExpRange) ? x : x * c * (xref + d) / (d * d);
120 | else
121 | y = (xref > halfExpRange) ? 0.0f : x * c * (xref * (2.0f - d) + 2.0f * d) / (d * d * d);
122 | yref = (xref < -expRange) ? 0.0f : xref / (expf(-xref) + 1.0f) * p.gain;
123 | }
124 | break;
125 | }
126 |
127 | // Apply gain.
128 | y *= p.gain;
129 |
130 | // Clamp.
131 | if (p.clamp >= 0.0f)
132 | {
133 | if (p.grad == 0)
134 | y = (fabsf(y) < p.clamp) ? y : (y >= 0.0f) ? p.clamp : -p.clamp;
135 | else
136 | y = (fabsf(yref) < p.clamp) ? y : 0.0f;
137 | }
138 |
139 | // Store.
140 | p.y[xi] = (T)y;
141 | }
142 | }
143 |
144 | //------------------------------------------------------------------------
145 | // TensorFlow op.
146 |
147 | template
148 | struct FusedBiasActOp : public OpKernel
149 | {
150 | FusedBiasActKernelParams m_attribs;
151 |
152 | FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx)
153 | {
154 | memset(&m_attribs, 0, sizeof(m_attribs));
155 | OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad));
156 | OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis));
157 | OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act));
158 | OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha));
159 | OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain));
160 | OP_REQUIRES_OK(ctx, ctx->GetAttr("clamp", &m_attribs.clamp));
161 | OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative"));
162 | OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative"));
163 | OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative"));
164 | }
165 |
166 | void Compute(OpKernelContext* ctx)
167 | {
168 | FusedBiasActKernelParams p = m_attribs;
169 | cudaStream_t stream = ctx->eigen_device().stream();
170 |
171 | const Tensor& x = ctx->input(0); // [...]
172 | const Tensor& b = ctx->input(1); // [sizeB] or [0]
173 | const Tensor& xref = ctx->input(2); // x.shape or [0]
174 | const Tensor& yref = ctx->input(3); // x.shape or [0]
175 | p.x = x.flat().data();
176 | p.b = (b.NumElements()) ? b.flat().data() : NULL;
177 | p.xref = (xref.NumElements()) ? xref.flat().data() : NULL;
178 | p.yref = (yref.NumElements()) ? yref.flat().data() : NULL;
179 | OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds"));
180 | OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1"));
181 | OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements"));
182 | OP_REQUIRES(ctx, xref.NumElements() == 0 || xref.NumElements() == x.NumElements(), errors::InvalidArgument("xref has wrong number of elements"));
183 | OP_REQUIRES(ctx, yref.NumElements() == 0 || yref.NumElements() == x.NumElements(), errors::InvalidArgument("yref has wrong number of elements"));
184 | OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large"));
185 |
186 | p.sizeX = (int)x.NumElements();
187 | p.sizeB = (int)b.NumElements();
188 | p.stepB = 1;
189 | for (int i = m_attribs.axis + 1; i < x.dims(); i++)
190 | p.stepB *= (int)x.dim_size(i);
191 |
192 | Tensor* y = NULL; // x.shape
193 | OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y));
194 | p.y = y->flat().data();
195 |
196 | p.loopX = 4;
197 | int blockSize = 4 * 32;
198 | int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1;
199 | void* args[] = {&p};
200 | OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel, gridSize, blockSize, args, 0, stream));
201 | }
202 | };
203 |
204 | REGISTER_OP("FusedBiasAct")
205 | .Input ("x: T")
206 | .Input ("b: T")
207 | .Input ("xref: T")
208 | .Input ("yref: T")
209 | .Output ("y: T")
210 | .Attr ("T: {float, half}")
211 | .Attr ("grad: int = 0")
212 | .Attr ("axis: int = 1")
213 | .Attr ("act: int = 0")
214 | .Attr ("alpha: float = 0.0")
215 | .Attr ("gain: float = 1.0")
216 | .Attr ("clamp: float = -1.0");
217 | REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp);
218 | REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp);
219 |
220 | //------------------------------------------------------------------------
221 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/tflib/ops/fused_bias_act.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Custom TensorFlow ops for efficient bias and activation."""
10 |
11 | import os
12 | import numpy as np
13 | import tensorflow as tf
14 | from .. import custom_ops
15 | from ...util import EasyDict
16 |
17 | def _get_plugin():
18 | return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
19 |
20 | #----------------------------------------------------------------------------
21 |
22 | activation_funcs = {
23 | 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True),
24 | 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True),
25 | 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True),
26 | 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False),
27 | 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False),
28 | 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False),
29 | 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False),
30 | 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False),
31 | 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False),
32 | }
33 |
34 | #----------------------------------------------------------------------------
35 |
36 | def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, clamp=None, impl='cuda'):
37 | r"""Fused bias and activation function.
38 |
39 | Adds bias `b` to activation tensor `x`, evaluates activation function `act`,
40 | and scales the result by `gain`. Each of the steps is optional. In most cases,
41 | the fused op is considerably more efficient than performing the same calculation
42 | using standard TensorFlow ops. It supports first and second order gradients,
43 | but not third order gradients.
44 |
45 | Args:
46 | x: Input activation tensor. Can have any shape, but if `b` is defined, the
47 | dimension corresponding to `axis`, as well as the rank, must be known.
48 | b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type
49 | as `x`. The shape must be known, and it must match the dimension of `x`
50 | corresponding to `axis`.
51 | axis: The dimension in `x` corresponding to the elements of `b`.
52 | The value of `axis` is ignored if `b` is not specified.
53 | act: Name of the activation function to evaluate, or `"linear"` to disable.
54 | Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc.
55 | See `activation_funcs` for a full list. `None` is not allowed.
56 | alpha: Shape parameter for the activation function, or `None` to use the default.
57 | gain: Scaling factor for the output tensor, or `None` to use default.
58 | See `activation_funcs` for the default scaling of each activation function.
59 | If unsure, consider specifying `1.0`.
60 | clamp: Clamp the output values to `[-clamp, +clamp]`, or `None` to disable
61 | the clamping (default).
62 | impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
63 |
64 | Returns:
65 | Tensor of the same shape and datatype as `x`.
66 | """
67 |
68 | impl_dict = {
69 | 'ref': _fused_bias_act_ref,
70 | 'cuda': _fused_bias_act_cuda,
71 | }
72 | return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain, clamp=clamp)
73 |
74 | #----------------------------------------------------------------------------
75 |
76 | def _fused_bias_act_ref(x, b, axis, act, alpha, gain, clamp):
77 | """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops."""
78 |
79 | # Validate arguments.
80 | x = tf.convert_to_tensor(x)
81 | b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype)
82 | act_spec = activation_funcs[act]
83 | assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
84 | assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
85 | if alpha is None:
86 | alpha = act_spec.def_alpha
87 | if gain is None:
88 | gain = act_spec.def_gain
89 |
90 | # Add bias.
91 | if b.shape[0] != 0:
92 | x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)])
93 |
94 | # Evaluate activation function.
95 | x = act_spec.func(x, alpha=alpha)
96 |
97 | # Scale by gain.
98 | if gain != 1:
99 | x *= gain
100 |
101 | # Clamp.
102 | if clamp is not None:
103 | clamp = np.asarray(clamp, dtype=x.dtype.name)
104 | assert clamp.shape == () and clamp >= 0
105 | x = tf.clip_by_value(x, -clamp, clamp)
106 | return x
107 |
108 | #----------------------------------------------------------------------------
109 |
110 | def _fused_bias_act_cuda(x, b, axis, act, alpha, gain, clamp):
111 | """Fast CUDA implementation of `fused_bias_act()` using custom ops."""
112 |
113 | # Validate arguments.
114 | x = tf.convert_to_tensor(x)
115 | empty_tensor = tf.constant([], dtype=x.dtype)
116 | b = tf.convert_to_tensor(b) if b is not None else empty_tensor
117 | act_spec = activation_funcs[act]
118 | assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
119 | assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
120 | if alpha is None:
121 | alpha = act_spec.def_alpha
122 | if gain is None:
123 | gain = act_spec.def_gain
124 |
125 | # Special cases.
126 | if act == 'linear' and b is None and gain == 1.0:
127 | return x
128 | if act_spec.cuda_idx is None:
129 | return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain, clamp=clamp)
130 |
131 | # CUDA op.
132 | cuda_op = _get_plugin().fused_bias_act
133 | cuda_kwargs = dict(axis=int(axis), act=int(act_spec.cuda_idx), gain=float(gain))
134 | if alpha is not None:
135 | cuda_kwargs['alpha'] = float(alpha)
136 | if clamp is not None:
137 | clamp = np.asarray(clamp, dtype=x.dtype.name)
138 | assert clamp.shape == () and clamp >= 0
139 | cuda_kwargs['clamp'] = float(clamp.astype(np.float32))
140 | def ref(tensor, name):
141 | return tensor if act_spec.ref == name else empty_tensor
142 |
143 | # Forward pass: y = func(x, b).
144 | def func_y(x, b):
145 | y = cuda_op(x=x, b=b, xref=empty_tensor, yref=empty_tensor, grad=0, **cuda_kwargs)
146 | y.set_shape(x.shape)
147 | return y
148 |
149 | # Backward pass: dx, db = grad(dy, x, y)
150 | def grad_dx(dy, x, y):
151 | dx = cuda_op(x=dy, b=empty_tensor, xref=ref(x,'x'), yref=ref(y,'y'), grad=1, **cuda_kwargs)
152 | dx.set_shape(x.shape)
153 | return dx
154 | def grad_db(dx):
155 | if b.shape[0] == 0:
156 | return empty_tensor
157 | db = dx
158 | if axis < x.shape.rank - 1:
159 | db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank)))
160 | if axis > 0:
161 | db = tf.reduce_sum(db, list(range(axis)))
162 | db.set_shape(b.shape)
163 | return db
164 |
165 | # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y)
166 | def grad2_d_dy(d_dx, d_db, x, y):
167 | d_dy = cuda_op(x=d_dx, b=d_db, xref=ref(x,'x'), yref=ref(y,'y'), grad=1, **cuda_kwargs)
168 | d_dy.set_shape(x.shape)
169 | return d_dy
170 | def grad2_d_x(d_dx, d_db, x, y):
171 | d_x = cuda_op(x=d_dx, b=d_db, xref=ref(x,'x'), yref=ref(y,'y'), grad=2, **cuda_kwargs)
172 | d_x.set_shape(x.shape)
173 | return d_x
174 |
175 | # Fast version for piecewise-linear activation funcs.
176 | @tf.custom_gradient
177 | def func_zero_2nd_grad(x, b):
178 | y = func_y(x, b)
179 | @tf.custom_gradient
180 | def grad(dy):
181 | dx = grad_dx(dy, x, y)
182 | db = grad_db(dx)
183 | def grad2(d_dx, d_db):
184 | d_dy = grad2_d_dy(d_dx, d_db, x, y)
185 | return d_dy
186 | return (dx, db), grad2
187 | return y, grad
188 |
189 | # Slow version for general activation funcs.
190 | @tf.custom_gradient
191 | def func_nonzero_2nd_grad(x, b):
192 | y = func_y(x, b)
193 | def grad_wrap(dy):
194 | @tf.custom_gradient
195 | def grad_impl(dy, x):
196 | dx = grad_dx(dy, x, y)
197 | db = grad_db(dx)
198 | def grad2(d_dx, d_db):
199 | d_dy = grad2_d_dy(d_dx, d_db, x, y)
200 | d_x = grad2_d_x(d_dx, d_db, x, y)
201 | return d_dy, d_x
202 | return (dx, db), grad2
203 | return grad_impl(dy, x)
204 | return y, grad_wrap
205 |
206 | # Which version to use?
207 | if act_spec.zero_2nd_grad:
208 | return func_zero_2nd_grad(x, b)
209 | return func_nonzero_2nd_grad(x, b)
210 |
211 | #----------------------------------------------------------------------------
212 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/tflib/tfutil.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Miscellaneous helper utils for Tensorflow."""
10 |
11 | import os
12 | import numpy as np
13 | import tensorflow as tf
14 |
15 | # Silence deprecation warnings from TensorFlow 1.13 onwards
16 | import logging
17 | logging.getLogger('tensorflow').setLevel(logging.ERROR)
18 | import tensorflow.contrib # requires TensorFlow 1.x!
19 | tf.contrib = tensorflow.contrib
20 |
21 | from typing import Any, Iterable, List, Union
22 |
23 | TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation]
24 | """A type that represents a valid Tensorflow expression."""
25 |
26 | TfExpressionEx = Union[TfExpression, int, float, np.ndarray]
27 | """A type that can be converted to a valid Tensorflow expression."""
28 |
29 |
30 | def run(*args, **kwargs) -> Any:
31 | """Run the specified ops in the default session."""
32 | assert_tf_initialized()
33 | return tf.get_default_session().run(*args, **kwargs)
34 |
35 |
36 | def is_tf_expression(x: Any) -> bool:
37 | """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation."""
38 | return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation))
39 |
40 |
41 | def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]:
42 | """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code."""
43 | return [dim.value for dim in shape]
44 |
45 |
46 | def flatten(x: TfExpressionEx) -> TfExpression:
47 | """Shortcut function for flattening a tensor."""
48 | with tf.name_scope("Flatten"):
49 | return tf.reshape(x, [-1])
50 |
51 |
52 | def log2(x: TfExpressionEx) -> TfExpression:
53 | """Logarithm in base 2."""
54 | with tf.name_scope("Log2"):
55 | return tf.log(x) * np.float32(1.0 / np.log(2.0))
56 |
57 |
58 | def exp2(x: TfExpressionEx) -> TfExpression:
59 | """Exponent in base 2."""
60 | with tf.name_scope("Exp2"):
61 | return tf.exp(x * np.float32(np.log(2.0)))
62 |
63 |
64 | def erfinv(y: TfExpressionEx) -> TfExpression:
65 | """Inverse of the error function."""
66 | # pylint: disable=no-name-in-module
67 | from tensorflow.python.ops.distributions import special_math
68 | return special_math.erfinv(y)
69 |
70 |
71 | def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx:
72 | """Linear interpolation."""
73 | with tf.name_scope("Lerp"):
74 | return a + (b - a) * t
75 |
76 |
77 | def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression:
78 | """Linear interpolation with clip."""
79 | with tf.name_scope("LerpClip"):
80 | return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0)
81 |
82 |
83 | def absolute_name_scope(scope: str) -> tf.name_scope:
84 | """Forcefully enter the specified name scope, ignoring any surrounding scopes."""
85 | return tf.name_scope(scope + "/")
86 |
87 |
88 | def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope:
89 | """Forcefully enter the specified variable scope, ignoring any surrounding scopes."""
90 | return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False)
91 |
92 |
93 | def _sanitize_tf_config(config_dict: dict = None) -> dict:
94 | # Defaults.
95 | cfg = dict()
96 | cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is.
97 | cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is.
98 | cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info.
99 | cfg["env.HDF5_USE_FILE_LOCKING"] = "FALSE" # Disable HDF5 file locking to avoid concurrency issues with network shares.
100 | cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used.
101 | cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed.
102 |
103 | # Remove defaults for environment variables that are already set.
104 | for key in list(cfg):
105 | fields = key.split(".")
106 | if fields[0] == "env":
107 | assert len(fields) == 2
108 | if fields[1] in os.environ:
109 | del cfg[key]
110 |
111 | # User overrides.
112 | if config_dict is not None:
113 | cfg.update(config_dict)
114 | return cfg
115 |
116 |
117 | def init_tf(config_dict: dict = None) -> None:
118 | """Initialize TensorFlow session using good default settings."""
119 | # Skip if already initialized.
120 | if tf.get_default_session() is not None:
121 | return
122 |
123 | # Setup config dict and random seeds.
124 | cfg = _sanitize_tf_config(config_dict)
125 | np_random_seed = cfg["rnd.np_random_seed"]
126 | if np_random_seed is not None:
127 | np.random.seed(np_random_seed)
128 | tf_random_seed = cfg["rnd.tf_random_seed"]
129 | if tf_random_seed == "auto":
130 | tf_random_seed = np.random.randint(1 << 31)
131 | if tf_random_seed is not None:
132 | tf.set_random_seed(tf_random_seed)
133 |
134 | # Setup environment variables.
135 | for key, value in cfg.items():
136 | fields = key.split(".")
137 | if fields[0] == "env":
138 | assert len(fields) == 2
139 | os.environ[fields[1]] = str(value)
140 |
141 | # Create default TensorFlow session.
142 | create_session(cfg, force_as_default=True)
143 |
144 |
145 | def assert_tf_initialized():
146 | """Check that TensorFlow session has been initialized."""
147 | if tf.get_default_session() is None:
148 | raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().")
149 |
150 |
151 | def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session:
152 | """Create tf.Session based on config dict."""
153 | # Setup TensorFlow config proto.
154 | cfg = _sanitize_tf_config(config_dict)
155 | config_proto = tf.ConfigProto()
156 | for key, value in cfg.items():
157 | fields = key.split(".")
158 | if fields[0] not in ["rnd", "env"]:
159 | obj = config_proto
160 | for field in fields[:-1]:
161 | obj = getattr(obj, field)
162 | setattr(obj, fields[-1], value)
163 |
164 | # Create session.
165 | session = tf.Session(config=config_proto)
166 | if force_as_default:
167 | # pylint: disable=protected-access
168 | session._default_session = session.as_default()
169 | session._default_session.enforce_nesting = False
170 | session._default_session.__enter__()
171 | return session
172 |
173 |
174 | def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None:
175 | """Initialize all tf.Variables that have not already been initialized.
176 |
177 | Equivalent to the following, but more efficient and does not bloat the tf graph:
178 | tf.variables_initializer(tf.report_uninitialized_variables()).run()
179 | """
180 | assert_tf_initialized()
181 | if target_vars is None:
182 | target_vars = tf.global_variables()
183 |
184 | test_vars = []
185 | test_ops = []
186 |
187 | with tf.control_dependencies(None): # ignore surrounding control_dependencies
188 | for var in target_vars:
189 | assert is_tf_expression(var)
190 |
191 | try:
192 | tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0"))
193 | except KeyError:
194 | # Op does not exist => variable may be uninitialized.
195 | test_vars.append(var)
196 |
197 | with absolute_name_scope(var.name.split(":")[0]):
198 | test_ops.append(tf.is_variable_initialized(var))
199 |
200 | init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited]
201 | run([var.initializer for var in init_vars])
202 |
203 |
204 | def set_vars(var_to_value_dict: dict) -> None:
205 | """Set the values of given tf.Variables.
206 |
207 | Equivalent to the following, but more efficient and does not bloat the tf graph:
208 | tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()]
209 | """
210 | assert_tf_initialized()
211 | ops = []
212 | feed_dict = {}
213 |
214 | for var, value in var_to_value_dict.items():
215 | assert is_tf_expression(var)
216 |
217 | try:
218 | setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op
219 | except KeyError:
220 | with absolute_name_scope(var.name.split(":")[0]):
221 | with tf.control_dependencies(None): # ignore surrounding control_dependencies
222 | setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter
223 |
224 | ops.append(setter)
225 | feed_dict[setter.op.inputs[1]] = value
226 |
227 | run(ops, feed_dict)
228 |
229 |
230 | def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs):
231 | """Create tf.Variable with large initial value without bloating the tf graph."""
232 | assert_tf_initialized()
233 | assert isinstance(initial_value, np.ndarray)
234 | zeros = tf.zeros(initial_value.shape, initial_value.dtype)
235 | var = tf.Variable(zeros, *args, **kwargs)
236 | set_vars({var: initial_value})
237 | return var
238 |
239 |
240 | def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False):
241 | """Convert a minibatch of images from uint8 to float32 with configurable dynamic range.
242 | Can be used as an input transformation for Network.run().
243 | """
244 | images = tf.cast(images, tf.float32)
245 | if nhwc_to_nchw:
246 | images = tf.transpose(images, [0, 3, 1, 2])
247 | return images * ((drange[1] - drange[0]) / 255) + drange[0]
248 |
249 |
250 | def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1):
251 | """Convert a minibatch of images from float32 to uint8 with configurable dynamic range.
252 | Can be used as an output transformation for Network.run().
253 | """
254 | images = tf.cast(images, tf.float32)
255 | if shrink > 1:
256 | ksize = [1, 1, shrink, shrink]
257 | images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW")
258 | if nchw_to_nhwc:
259 | images = tf.transpose(images, [0, 2, 3, 1])
260 | scale = 255 / (drange[1] - drange[0])
261 | images = images * scale + (0.5 - drange[0] * scale)
262 | return tf.saturate_cast(images, tf.uint8)
263 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/linear_separability.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Linear Separability (LS) from the paper
10 | "A Style-Based Generator Architecture for Generative Adversarial Networks"."""
11 |
12 | import pickle
13 | from collections import defaultdict
14 | import numpy as np
15 | import sklearn.svm
16 | import tensorflow as tf
17 | import dnnlib
18 | import dnnlib.tflib as tflib
19 |
20 | from metrics import metric_base
21 |
22 | #----------------------------------------------------------------------------
23 |
24 | classifier_urls = [
25 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-00-male.pkl',
26 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-01-smiling.pkl',
27 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-02-attractive.pkl',
28 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-03-wavy-hair.pkl',
29 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-04-young.pkl',
30 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-05-5-o-clock-shadow.pkl',
31 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-06-arched-eyebrows.pkl',
32 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-07-bags-under-eyes.pkl',
33 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-08-bald.pkl',
34 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-09-bangs.pkl',
35 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-10-big-lips.pkl',
36 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-11-big-nose.pkl',
37 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-12-black-hair.pkl',
38 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-13-blond-hair.pkl',
39 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-14-blurry.pkl',
40 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-15-brown-hair.pkl',
41 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-16-bushy-eyebrows.pkl',
42 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-17-chubby.pkl',
43 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-18-double-chin.pkl',
44 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-19-eyeglasses.pkl',
45 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-20-goatee.pkl',
46 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-21-gray-hair.pkl',
47 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-22-heavy-makeup.pkl',
48 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-23-high-cheekbones.pkl',
49 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-24-mouth-slightly-open.pkl',
50 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-25-mustache.pkl',
51 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-26-narrow-eyes.pkl',
52 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-27-no-beard.pkl',
53 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-28-oval-face.pkl',
54 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-29-pale-skin.pkl',
55 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-30-pointy-nose.pkl',
56 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-31-receding-hairline.pkl',
57 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-32-rosy-cheeks.pkl',
58 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-33-sideburns.pkl',
59 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-34-straight-hair.pkl',
60 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-35-wearing-earrings.pkl',
61 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-36-wearing-hat.pkl',
62 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-37-wearing-lipstick.pkl',
63 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-38-wearing-necklace.pkl',
64 | 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-39-wearing-necktie.pkl',
65 | ]
66 |
67 | #----------------------------------------------------------------------------
68 |
69 | def prob_normalize(p):
70 | p = np.asarray(p).astype(np.float32)
71 | assert len(p.shape) == 2
72 | return p / np.sum(p)
73 |
74 | def mutual_information(p):
75 | p = prob_normalize(p)
76 | px = np.sum(p, axis=1)
77 | py = np.sum(p, axis=0)
78 | result = 0.0
79 | for x in range(p.shape[0]):
80 | p_x = px[x]
81 | for y in range(p.shape[1]):
82 | p_xy = p[x][y]
83 | p_y = py[y]
84 | if p_xy > 0.0:
85 | result += p_xy * np.log2(p_xy / (p_x * p_y)) # get bits as output
86 | return result
87 |
88 | def entropy(p):
89 | p = prob_normalize(p)
90 | result = 0.0
91 | for x in range(p.shape[0]):
92 | for y in range(p.shape[1]):
93 | p_xy = p[x][y]
94 | if p_xy > 0.0:
95 | result -= p_xy * np.log2(p_xy)
96 | return result
97 |
98 | def conditional_entropy(p):
99 | # H(Y|X) where X corresponds to axis 0, Y to axis 1
100 | # i.e., How many bits of additional information are needed to where we are on axis 1 if we know where we are on axis 0?
101 | p = prob_normalize(p)
102 | y = np.sum(p, axis=0, keepdims=True) # marginalize to calculate H(Y)
103 | return max(0.0, entropy(y) - mutual_information(p)) # can slip just below 0 due to FP inaccuracies, clean those up.
104 |
105 | #----------------------------------------------------------------------------
106 |
107 | class LS(metric_base.MetricBase):
108 | def __init__(self, num_samples, num_keep, attrib_indices, minibatch_per_gpu, **kwargs):
109 | assert num_keep <= num_samples
110 | super().__init__(**kwargs)
111 | self.num_samples = num_samples
112 | self.num_keep = num_keep
113 | self.attrib_indices = attrib_indices
114 | self.minibatch_per_gpu = minibatch_per_gpu
115 |
116 | def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ
117 | minibatch_size = num_gpus * self.minibatch_per_gpu
118 |
119 | # Construct TensorFlow graph for each GPU.
120 | result_expr = []
121 | for gpu_idx in range(num_gpus):
122 | with tf.device(f'/gpu:{gpu_idx}'):
123 | Gs_clone = Gs.clone()
124 |
125 | # Generate images.
126 | latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
127 | labels = self._get_random_labels_tf(self.minibatch_per_gpu)
128 | dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **G_kwargs)
129 | images = Gs_clone.get_output_for(latents, None, **G_kwargs)
130 | if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1])
131 |
132 | # Downsample to 256x256. The attribute classifiers were built for 256x256.
133 | if images.shape[2] > 256:
134 | factor = images.shape[2] // 256
135 | images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor])
136 | images = tf.reduce_mean(images, axis=[3, 5])
137 |
138 | # Run classifier for each attribute.
139 | result_dict = dict(latents=latents, dlatents=dlatents[:,-1])
140 | for attrib_idx in self.attrib_indices:
141 | with dnnlib.util.open_url(classifier_urls[attrib_idx]) as f:
142 | classifier = pickle.load(f)
143 | logits = classifier.get_output_for(images, None)
144 | predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1))
145 | result_dict[attrib_idx] = predictions
146 | result_expr.append(result_dict)
147 |
148 | # Sampling loop.
149 | results = []
150 | for begin in range(0, self.num_samples, minibatch_size):
151 | self._report_progress(begin, self.num_samples)
152 | results += tflib.run(result_expr)
153 | results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()}
154 |
155 | # Calculate conditional entropy for each attribute.
156 | conditional_entropies = defaultdict(list)
157 | for attrib_idx in self.attrib_indices:
158 | # Prune the least confident samples.
159 | pruned_indices = list(range(self.num_samples))
160 | pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i]))
161 | pruned_indices = pruned_indices[:self.num_keep]
162 |
163 | # Fit SVM to the remaining samples.
164 | svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1)
165 | for space in ['latents', 'dlatents']:
166 | svm_inputs = results[space][pruned_indices]
167 | try:
168 | svm = sklearn.svm.LinearSVC()
169 | svm.fit(svm_inputs, svm_targets)
170 | svm.score(svm_inputs, svm_targets)
171 | svm_outputs = svm.predict(svm_inputs)
172 | except:
173 | svm_outputs = svm_targets # assume perfect prediction
174 |
175 | # Calculate conditional entropy.
176 | p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)]
177 | conditional_entropies[space].append(conditional_entropy(p))
178 |
179 | # Calculate separability scores.
180 | scores = {key: 2**np.sum(values) for key, values in conditional_entropies.items()}
181 | self._report_result(scores['latents'], suffix='_z')
182 | self._report_result(scores['dlatents'], suffix='_w')
183 |
184 | #----------------------------------------------------------------------------
185 |
--------------------------------------------------------------------------------
/stylegan2-ada/metrics/precision_recall.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Precision/Recall (PR) from the paper
10 | "Improved Precision and Recall Metric for Assessing Generative Models"."""
11 |
12 | import os
13 | import pickle
14 | import numpy as np
15 | import tensorflow as tf
16 | import dnnlib
17 | import dnnlib.tflib as tflib
18 |
19 | from metrics import metric_base
20 |
21 | #----------------------------------------------------------------------------
22 |
23 | def batch_pairwise_distances(U, V):
24 | """ Compute pairwise distances between two batches of feature vectors."""
25 | with tf.variable_scope('pairwise_dist_block'):
26 | # Squared norms of each row in U and V.
27 | norm_u = tf.reduce_sum(tf.square(U), 1)
28 | norm_v = tf.reduce_sum(tf.square(V), 1)
29 |
30 | # norm_u as a row and norm_v as a column vectors.
31 | norm_u = tf.reshape(norm_u, [-1, 1])
32 | norm_v = tf.reshape(norm_v, [1, -1])
33 |
34 | # Pairwise squared Euclidean distances.
35 | D = tf.maximum(norm_u - 2*tf.matmul(U, V, False, True) + norm_v, 0.0)
36 |
37 | return D
38 |
39 | #----------------------------------------------------------------------------
40 |
41 | class DistanceBlock():
42 | """Distance block."""
43 | def __init__(self, num_features, num_gpus):
44 | self.num_features = num_features
45 | self.num_gpus = num_gpus
46 |
47 | # Initialize TF graph to calculate pairwise distances.
48 | with tf.device('/cpu:0'):
49 | self._features_batch1 = tf.placeholder(tf.float16, shape=[None, self.num_features])
50 | self._features_batch2 = tf.placeholder(tf.float16, shape=[None, self.num_features])
51 | features_split2 = tf.split(self._features_batch2, self.num_gpus, axis=0)
52 | distances_split = []
53 | for gpu_idx in range(self.num_gpus):
54 | with tf.device(f'/gpu:{gpu_idx}'):
55 | distances_split.append(batch_pairwise_distances(self._features_batch1, features_split2[gpu_idx]))
56 | self._distance_block = tf.concat(distances_split, axis=1)
57 |
58 | def pairwise_distances(self, U, V):
59 | """Evaluate pairwise distances between two batches of feature vectors."""
60 | return self._distance_block.eval(feed_dict={self._features_batch1: U, self._features_batch2: V})
61 |
62 | #----------------------------------------------------------------------------
63 |
64 | class ManifoldEstimator():
65 | """Finds an estimate for the manifold of given feature vectors."""
66 | def __init__(self, distance_block, features, row_batch_size, col_batch_size, nhood_sizes, clamp_to_percentile=None):
67 | """Find an estimate of the manifold of given feature vectors."""
68 | num_images = features.shape[0]
69 | self.nhood_sizes = nhood_sizes
70 | self.num_nhoods = len(nhood_sizes)
71 | self.row_batch_size = row_batch_size
72 | self.col_batch_size = col_batch_size
73 | self._ref_features = features
74 | self._distance_block = distance_block
75 |
76 | # Estimate manifold of features by calculating distances to kth nearest neighbor of each sample.
77 | self.D = np.zeros([num_images, self.num_nhoods], dtype=np.float16)
78 | distance_batch = np.zeros([row_batch_size, num_images], dtype=np.float16)
79 | seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32)
80 |
81 | for begin1 in range(0, num_images, row_batch_size):
82 | end1 = min(begin1 + row_batch_size, num_images)
83 | row_batch = features[begin1:end1]
84 |
85 | for begin2 in range(0, num_images, col_batch_size):
86 | end2 = min(begin2 + col_batch_size, num_images)
87 | col_batch = features[begin2:end2]
88 |
89 | # Compute distances between batches.
90 | distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(row_batch, col_batch)
91 |
92 | # Find the kth nearest neighbor from the current batch.
93 | self.D[begin1:end1, :] = np.partition(distance_batch[0:end1-begin1, :], seq, axis=1)[:, self.nhood_sizes]
94 |
95 | if clamp_to_percentile is not None:
96 | max_distances = np.percentile(self.D, clamp_to_percentile, axis=0)
97 | self.D[self.D > max_distances] = 0 #max_distances # 0
98 |
99 | def evaluate(self, eval_features, return_realism=False, return_neighbors=False):
100 | """Evaluate if new feature vectors are in the estimated manifold."""
101 | num_eval_images = eval_features.shape[0]
102 | num_ref_images = self.D.shape[0]
103 | distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float16)
104 | batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32)
105 | #max_realism_score = np.zeros([num_eval_images,], dtype=np.float32)
106 | realism_score = np.zeros([num_eval_images,], dtype=np.float32)
107 | nearest_indices = np.zeros([num_eval_images,], dtype=np.int32)
108 |
109 | for begin1 in range(0, num_eval_images, self.row_batch_size):
110 | end1 = min(begin1 + self.row_batch_size, num_eval_images)
111 | feature_batch = eval_features[begin1:end1]
112 |
113 | for begin2 in range(0, num_ref_images, self.col_batch_size):
114 | end2 = min(begin2 + self.col_batch_size, num_ref_images)
115 | ref_batch = self._ref_features[begin2:end2]
116 |
117 | distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(feature_batch, ref_batch)
118 |
119 | # From the minibatch of new feature vectors, determine if they are in the estimated manifold.
120 | # If a feature vector is inside a hypersphere of some reference sample, then the new sample lies on the estimated manifold.
121 | # The radii of the hyperspheres are determined from distances of neighborhood size k.
122 | samples_in_manifold = distance_batch[0:end1-begin1, :, None] <= self.D
123 | batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32)
124 |
125 | #max_realism_score[begin1:end1] = np.max(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1)
126 | #nearest_indices[begin1:end1] = np.argmax(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1)
127 | nearest_indices[begin1:end1] = np.argmin(distance_batch[0:end1-begin1, :], axis=1)
128 | realism_score[begin1:end1] = self.D[nearest_indices[begin1:end1], 0] / np.min(distance_batch[0:end1-begin1, :], axis=1)
129 |
130 | if return_realism and return_neighbors:
131 | return batch_predictions, realism_score, nearest_indices
132 | elif return_realism:
133 | return batch_predictions, realism_score
134 | elif return_neighbors:
135 | return batch_predictions, nearest_indices
136 |
137 | return batch_predictions
138 |
139 | #----------------------------------------------------------------------------
140 |
141 | def knn_precision_recall_features(ref_features, eval_features, feature_net, nhood_sizes,
142 | row_batch_size, col_batch_size, num_gpus):
143 | """Calculates k-NN precision and recall for two sets of feature vectors."""
144 | state = dnnlib.EasyDict()
145 | #num_images = ref_features.shape[0]
146 | num_features = feature_net.output_shape[1]
147 | state.ref_features = ref_features
148 | state.eval_features = eval_features
149 |
150 | # Initialize DistanceBlock and ManifoldEstimators.
151 | distance_block = DistanceBlock(num_features, num_gpus)
152 | state.ref_manifold = ManifoldEstimator(distance_block, state.ref_features, row_batch_size, col_batch_size, nhood_sizes)
153 | state.eval_manifold = ManifoldEstimator(distance_block, state.eval_features, row_batch_size, col_batch_size, nhood_sizes)
154 |
155 | # Evaluate precision and recall using k-nearest neighbors.
156 | #print(f'Evaluating k-NN precision and recall with {num_images} samples...')
157 | #start = time.time()
158 |
159 | # Precision: How many points from eval_features are in ref_features manifold.
160 | state.precision, state.realism_scores, state.nearest_neighbors = state.ref_manifold.evaluate(state.eval_features, return_realism=True, return_neighbors=True)
161 | state.knn_precision = state.precision.mean(axis=0)
162 |
163 | # Recall: How many points from ref_features are in eval_features manifold.
164 | state.recall = state.eval_manifold.evaluate(state.ref_features)
165 | state.knn_recall = state.recall.mean(axis=0)
166 |
167 | #elapsed_time = time.time() - start
168 | #print(f'Done evaluation in: {elapsed_time:g}s')
169 |
170 | return state
171 |
172 | #----------------------------------------------------------------------------
173 |
174 | class PR(metric_base.MetricBase):
175 | def __init__(self, max_reals, num_fakes, nhood_size, minibatch_per_gpu, row_batch_size, col_batch_size, **kwargs):
176 | super().__init__(**kwargs)
177 | self.max_reals = max_reals
178 | self.num_fakes = num_fakes
179 | self.nhood_size = nhood_size
180 | self.minibatch_per_gpu = minibatch_per_gpu
181 | self.row_batch_size = row_batch_size
182 | self.col_batch_size = col_batch_size
183 |
184 | def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ
185 | minibatch_size = num_gpus * self.minibatch_per_gpu
186 | with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16.pkl') as f:
187 | feature_net = pickle.load(f)
188 |
189 | # Calculate features for reals.
190 | cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals)
191 | os.makedirs(os.path.dirname(cache_file), exist_ok=True)
192 | if os.path.isfile(cache_file):
193 | with open(cache_file, 'rb') as f:
194 | feat_real = pickle.load(f)
195 | else:
196 | feat_real = []
197 | for images, _labels, num in self._iterate_reals(minibatch_size):
198 | if images.shape[1] == 1: images = np.tile(images, [1, 3, 1, 1])
199 | feat_real += list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num]
200 | if self.max_reals is not None and len(feat_real) >= self.max_reals:
201 | break
202 | if self.max_reals is not None and len(feat_real) > self.max_reals:
203 | feat_real = feat_real[:self.max_reals]
204 | feat_real = np.stack(feat_real)
205 | with open(cache_file, 'wb') as f:
206 | pickle.dump(feat_real, f)
207 |
208 | # Construct TensorFlow graph.
209 | result_expr = []
210 | for gpu_idx in range(num_gpus):
211 | with tf.device(f'/gpu:{gpu_idx}'):
212 | Gs_clone = Gs.clone()
213 | feature_net_clone = feature_net.clone()
214 | latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
215 | labels = self._get_random_labels_tf(self.minibatch_per_gpu)
216 | images = Gs_clone.get_output_for(latents, labels, **G_kwargs)
217 | if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1])
218 | images = tflib.convert_images_to_uint8(images)
219 | result_expr.append(feature_net_clone.get_output_for(images))
220 |
221 | # Calculate features for fakes.
222 | feat_fake = []
223 | for begin in range(0, self.num_fakes, minibatch_size):
224 | self._report_progress(begin, self.num_fakes)
225 | feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0))
226 | feat_fake = np.stack(feat_fake[:self.num_fakes])
227 |
228 | # Calculate precision and recall.
229 | state = knn_precision_recall_features(ref_features=feat_real, eval_features=feat_fake, feature_net=feature_net,
230 | nhood_sizes=[self.nhood_size], row_batch_size=self.row_batch_size, col_batch_size=self.row_batch_size, num_gpus=num_gpus)
231 | self._report_result(state.knn_precision[0], suffix='_precision')
232 | self._report_result(state.knn_recall[0], suffix='_recall')
233 |
234 | #----------------------------------------------------------------------------
235 |
--------------------------------------------------------------------------------
/stylegan2-ada/training/dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Streaming images and labels from dataset created with dataset_tool.py."""
10 |
11 | import os
12 | import glob
13 | import numpy as np
14 | import tensorflow as tf
15 | import dnnlib.tflib as tflib
16 |
17 | #----------------------------------------------------------------------------
18 | # Dataset class that loads images from tfrecords files.
19 |
20 | class TFRecordDataset:
21 | def __init__(self,
22 | tfrecord_dir, # Directory containing a collection of tfrecords files.
23 | resolution = None, # Dataset resolution, None = autodetect.
24 | label_file = None, # Relative path of the labels file, None = autodetect.
25 | max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components.
26 | max_images = None, # Maximum number of images to use, None = use all images.
27 | max_validation = 10000, # Maximum size of the validation set, None = use all available images.
28 | mirror_augment = False, # Apply mirror augment?
29 | repeat = True, # Repeat dataset indefinitely?
30 | shuffle = True, # Shuffle images?
31 | shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling.
32 | prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching.
33 | buffer_mb = 256, # Read buffer size (megabytes).
34 | num_threads = 2, # Number of concurrent threads.
35 | _is_validation = False,
36 | ):
37 | self.tfrecord_dir = tfrecord_dir
38 | self.resolution = None
39 | self.resolution_log2 = None
40 | self.shape = [] # [channels, height, width]
41 | self.dtype = 'uint8'
42 | self.label_file = label_file
43 | self.label_size = None # components
44 | self.label_dtype = None
45 | self.has_validation_set = None
46 | self.mirror_augment = mirror_augment
47 | self.repeat = repeat
48 | self.shuffle = shuffle
49 | self._max_validation = max_validation
50 | self._np_labels = None
51 | self._tf_minibatch_in = None
52 | self._tf_labels_var = None
53 | self._tf_labels_dataset = None
54 | self._tf_datasets = dict()
55 | self._tf_iterator = None
56 | self._tf_init_ops = dict()
57 | self._tf_minibatch_np = None
58 | self._cur_minibatch = -1
59 | self._cur_lod = -1
60 |
61 | # List files in the dataset directory.
62 | assert os.path.isdir(self.tfrecord_dir)
63 | all_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*')))
64 | self.has_validation_set = (self._max_validation > 0) and any(os.path.basename(f).startswith('validation-') for f in all_files)
65 | all_files = [f for f in all_files if os.path.basename(f).startswith('validation-') == _is_validation]
66 |
67 | # Inspect tfrecords files.
68 | tfr_files = [f for f in all_files if f.endswith('.tfrecords')]
69 | assert len(tfr_files) >= 1
70 | tfr_shapes = []
71 | for tfr_file in tfr_files:
72 | tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE)
73 | for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt):
74 | tfr_shapes.append(self.parse_tfrecord_np(record).shape)
75 | break
76 |
77 | # Autodetect label filename.
78 | if self.label_file is None:
79 | guess = [f for f in all_files if f.endswith('.labels')]
80 | if len(guess):
81 | self.label_file = guess[0]
82 | elif not os.path.isfile(self.label_file):
83 | guess = os.path.join(self.tfrecord_dir, self.label_file)
84 | if os.path.isfile(guess):
85 | self.label_file = guess
86 |
87 | # Determine shape and resolution.
88 | max_shape = max(tfr_shapes, key=np.prod)
89 | self.resolution = resolution if resolution is not None else max_shape[1]
90 | self.resolution_log2 = int(np.log2(self.resolution))
91 | self.shape = [max_shape[0], self.resolution, self.resolution]
92 | tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes]
93 | assert all(shape[0] == max_shape[0] for shape in tfr_shapes)
94 | assert all(shape[1] == shape[2] for shape in tfr_shapes)
95 | assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods))
96 | assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1))
97 |
98 | # Load labels.
99 | assert max_label_size == 'full' or max_label_size >= 0
100 | self._np_labels = np.zeros([1<<30, 0], dtype=np.float32)
101 | if self.label_file is not None and max_label_size != 0:
102 | self._np_labels = np.load(self.label_file)
103 | assert self._np_labels.ndim == 2
104 | if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size:
105 | self._np_labels = self._np_labels[:, :max_label_size]
106 | if max_images is not None and self._np_labels.shape[0] > max_images:
107 | self._np_labels = self._np_labels[:max_images]
108 | self.label_size = self._np_labels.shape[1]
109 | self.label_dtype = self._np_labels.dtype.name
110 |
111 | # Build TF expressions.
112 | with tf.name_scope('Dataset'), tf.device('/cpu:0'), tf.control_dependencies(None):
113 | self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[])
114 | self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var')
115 | self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var)
116 | for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods):
117 | if tfr_lod < 0:
118 | continue
119 | dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20)
120 | if max_images is not None:
121 | dset = dset.take(max_images)
122 | dset = dset.map(self.parse_tfrecord_tf, num_parallel_calls=num_threads)
123 | dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset))
124 | bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize
125 | if self.shuffle and shuffle_mb > 0:
126 | dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1)
127 | if self.repeat:
128 | dset = dset.repeat()
129 | if prefetch_mb > 0:
130 | dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1)
131 | dset = dset.batch(self._tf_minibatch_in)
132 | self._tf_datasets[tfr_lod] = dset
133 | self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes)
134 | self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()}
135 |
136 | def close(self):
137 | pass
138 |
139 | # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf().
140 | def configure(self, minibatch_size, lod=0):
141 | lod = int(np.floor(lod))
142 | assert minibatch_size >= 1 and lod in self._tf_datasets
143 | if self._cur_minibatch != minibatch_size or self._cur_lod != lod:
144 | self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size})
145 | self._cur_minibatch = minibatch_size
146 | self._cur_lod = lod
147 |
148 | # Get next minibatch as TensorFlow expressions.
149 | def get_minibatch_tf(self):
150 | images, labels = self._tf_iterator.get_next()
151 | if self.mirror_augment:
152 | images = tf.cast(images, tf.float32)
153 | images = tf.where(tf.random_uniform([tf.shape(images)[0]]) < 0.5, images, tf.reverse(images, [3]))
154 | images = tf.cast(images, self.dtype)
155 | return images, labels
156 |
157 | # Get next minibatch as NumPy arrays.
158 | def get_minibatch_np(self, minibatch_size, lod=0): # => (images, labels) or (None, None)
159 | self.configure(minibatch_size, lod)
160 | if self._tf_minibatch_np is None:
161 | with tf.name_scope('Dataset'):
162 | self._tf_minibatch_np = self.get_minibatch_tf()
163 | try:
164 | return tflib.run(self._tf_minibatch_np)
165 | except tf.errors.OutOfRangeError:
166 | return None, None
167 |
168 | # Get random labels as TensorFlow expression.
169 | def get_random_labels_tf(self, minibatch_size): # => labels
170 | with tf.name_scope('Dataset'):
171 | if self.label_size > 0:
172 | with tf.device('/cpu:0'):
173 | return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32))
174 | return tf.zeros([minibatch_size, 0], self.label_dtype)
175 |
176 | # Get random labels as NumPy array.
177 | def get_random_labels_np(self, minibatch_size): # => labels
178 | if self.label_size > 0:
179 | return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])]
180 | return np.zeros([minibatch_size, 0], self.label_dtype)
181 |
182 | # Load validation set as NumPy array.
183 | def load_validation_set_np(self):
184 | images = []
185 | labels = []
186 | if self.has_validation_set:
187 | validation_set = TFRecordDataset(
188 | tfrecord_dir=self.tfrecord_dir, resolution=self.shape[2], max_label_size=self.label_size,
189 | max_images=self._max_validation, repeat=False, shuffle=False, prefetch_mb=0, _is_validation=True)
190 | validation_set.configure(1)
191 | while True:
192 | image, label = validation_set.get_minibatch_np(1)
193 | if image is None:
194 | break
195 | images.append(image)
196 | labels.append(label)
197 | images = np.concatenate(images, axis=0) if len(images) else np.zeros([0] + self.shape, dtype=self.dtype)
198 | labels = np.concatenate(labels, axis=0) if len(labels) else np.zeros([0, self.label_size], self.label_dtype)
199 | assert list(images.shape[1:]) == self.shape
200 | assert labels.shape[1] == self.label_size
201 | assert images.shape[0] <= self._max_validation
202 | return images, labels
203 |
204 | # Parse individual image from a tfrecords file into TensorFlow expression.
205 | @staticmethod
206 | def parse_tfrecord_tf(record):
207 | features = tf.parse_single_example(record, features={
208 | 'shape': tf.FixedLenFeature([3], tf.int64),
209 | 'data': tf.FixedLenFeature([], tf.string)})
210 | data = tf.decode_raw(features['data'], tf.uint8)
211 | return tf.reshape(data, features['shape'])
212 |
213 | # Parse individual image from a tfrecords file into NumPy array.
214 | @staticmethod
215 | def parse_tfrecord_np(record):
216 | ex = tf.train.Example()
217 | ex.ParseFromString(record)
218 | shape = ex.features.feature['shape'].int64_list.value # pylint: disable=no-member
219 | data = ex.features.feature['data'].bytes_list.value[0] # pylint: disable=no-member
220 | return np.fromstring(data, np.uint8).reshape(shape)
221 |
222 | #----------------------------------------------------------------------------
223 | # Construct a dataset object using the given options.
224 |
225 | def load_dataset(path=None, resolution=None, max_images=None, max_label_size=0, mirror_augment=False, repeat=True, shuffle=True, seed=None):
226 | _ = seed
227 | assert os.path.isdir(path)
228 | return TFRecordDataset(
229 | tfrecord_dir=path,
230 | resolution=resolution, max_images=max_images, max_label_size=max_label_size,
231 | mirror_augment=mirror_augment, repeat=repeat, shuffle=shuffle)
232 |
233 | #----------------------------------------------------------------------------
234 |
--------------------------------------------------------------------------------
/stylegan2-ada/projector.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Project given image to the latent space of pretrained network pickle."""
10 |
11 | import argparse
12 | import os
13 | import pickle
14 | import imageio
15 |
16 | import numpy as np
17 | import PIL.Image
18 | import tensorflow as tf
19 | import tqdm
20 |
21 | import dnnlib
22 | import dnnlib.tflib as tflib
23 |
24 | class Projector:
25 | def __init__(self):
26 | self.num_steps = 1000
27 | self.dlatent_avg_samples = 10000
28 | self.initial_learning_rate = 0.1
29 | self.initial_noise_factor = 0.05
30 | self.lr_rampdown_length = 0.25
31 | self.lr_rampup_length = 0.05
32 | self.noise_ramp_length = 0.75
33 | self.regularize_noise_weight = 1e5
34 | self.verbose = True
35 |
36 | self._Gs = None
37 | self._minibatch_size = None
38 | self._dlatent_avg = None
39 | self._dlatent_std = None
40 | self._noise_vars = None
41 | self._noise_init_op = None
42 | self._noise_normalize_op = None
43 | self._dlatents_var = None
44 | self._dlatent_noise_in = None
45 | self._dlatents_expr = None
46 | self._images_float_expr = None
47 | self._images_uint8_expr = None
48 | self._target_images_var = None
49 | self._lpips = None
50 | self._dist = None
51 | self._loss = None
52 | self._reg_sizes = None
53 | self._lrate_in = None
54 | self._opt = None
55 | self._opt_step = None
56 | self._cur_step = None
57 |
58 | def _info(self, *args):
59 | if self.verbose:
60 | print('Projector:', *args)
61 |
62 | def set_network(self, Gs, dtype='float16'):
63 | if Gs is None:
64 | self._Gs = None
65 | return
66 | self._Gs = Gs.clone(randomize_noise=False, dtype=dtype, num_fp16_res=0, fused_modconv=True)
67 |
68 | # Compute dlatent stats.
69 | self._info(f'Computing W midpoint and stddev using {self.dlatent_avg_samples} samples...')
70 | latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:])
71 | dlatent_samples = self._Gs.components.mapping.run(latent_samples, None) # [N, L, C]
72 | dlatent_samples = dlatent_samples[:, :1, :].astype(np.float32) # [N, 1, C]
73 | self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, C]
74 | self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5
75 | self._info(f'std = {self._dlatent_std:g}')
76 |
77 | # Setup noise inputs.
78 | self._info('Setting up noise inputs...')
79 | self._noise_vars = []
80 | noise_init_ops = []
81 | noise_normalize_ops = []
82 | while True:
83 | n = f'G_synthesis/noise{len(self._noise_vars)}'
84 | if not n in self._Gs.vars:
85 | break
86 | v = self._Gs.vars[n]
87 | self._noise_vars.append(v)
88 | noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32)))
89 | noise_mean = tf.reduce_mean(v)
90 | noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5
91 | noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std))
92 | self._noise_init_op = tf.group(*noise_init_ops)
93 | self._noise_normalize_op = tf.group(*noise_normalize_ops)
94 |
95 | # Build image output graph.
96 | self._info('Building image output graph...')
97 | self._minibatch_size = 1
98 | self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var')
99 | self._dlatent_noise_in = tf.placeholder(tf.float32, [], name='noise_in')
100 | dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._dlatent_noise_in
101 | self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1])
102 | self._images_float_expr = tf.cast(self._Gs.components.synthesis.get_output_for(self._dlatents_expr), tf.float32)
103 | self._images_uint8_expr = tflib.convert_images_to_uint8(self._images_float_expr, nchw_to_nhwc=True)
104 |
105 | # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
106 | proc_images_expr = (self._images_float_expr + 1) * (255 / 2)
107 | sh = proc_images_expr.shape.as_list()
108 | if sh[2] > 256:
109 | factor = sh[2] // 256
110 | proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5])
111 |
112 | # Build loss graph.
113 | self._info('Building loss graph...')
114 | self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var')
115 | if self._lpips is None:
116 | with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16_zhang_perceptual.pkl') as f:
117 | self._lpips = pickle.load(f)
118 | self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var)
119 | self._loss = tf.reduce_sum(self._dist)
120 |
121 | # Build noise regularization graph.
122 | self._info('Building noise regularization graph...')
123 | reg_loss = 0.0
124 | for v in self._noise_vars:
125 | sz = v.shape[2]
126 | while True:
127 | reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2
128 | if sz <= 8:
129 | break # Small enough already
130 | v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale
131 | v = tf.reduce_mean(v, axis=[3, 5])
132 | sz = sz // 2
133 | self._loss += reg_loss * self.regularize_noise_weight
134 |
135 | # Setup optimizer.
136 | self._info('Setting up optimizer...')
137 | self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in')
138 | self._opt = tflib.Optimizer(learning_rate=self._lrate_in)
139 | self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars)
140 | self._opt_step = self._opt.apply_updates()
141 |
142 | def start(self, target_images):
143 | assert self._Gs is not None
144 |
145 | # Prepare target images.
146 | self._info('Preparing target images...')
147 | target_images = np.asarray(target_images, dtype='float32')
148 | target_images = (target_images + 1) * (255 / 2)
149 | sh = target_images.shape
150 | assert sh[0] == self._minibatch_size
151 | if sh[2] > self._target_images_var.shape[2]:
152 | factor = sh[2] // self._target_images_var.shape[2]
153 | target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5))
154 |
155 | # Initialize optimization state.
156 | self._info('Initializing optimization state...')
157 | dlatents = np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1])
158 | tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: dlatents})
159 | tflib.run(self._noise_init_op)
160 | self._opt.reset_optimizer_state()
161 | self._cur_step = 0
162 |
163 | def step(self):
164 | assert self._cur_step is not None
165 | if self._cur_step >= self.num_steps:
166 | return 0, 0
167 |
168 | # Choose hyperparameters.
169 | t = self._cur_step / self.num_steps
170 | dlatent_noise = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2
171 | lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length)
172 | lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi)
173 | lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length)
174 | learning_rate = self.initial_learning_rate * lr_ramp
175 |
176 | # Execute optimization step.
177 | feed_dict = {self._dlatent_noise_in: dlatent_noise, self._lrate_in: learning_rate}
178 | _, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict)
179 | tflib.run(self._noise_normalize_op)
180 | self._cur_step += 1
181 | return dist_value, loss_value
182 |
183 | @property
184 | def cur_step(self):
185 | return self._cur_step
186 |
187 | @property
188 | def dlatents(self):
189 | return tflib.run(self._dlatents_expr, {self._dlatent_noise_in: 0})
190 |
191 | @property
192 | def noises(self):
193 | return tflib.run(self._noise_vars)
194 |
195 | @property
196 | def images_float(self):
197 | return tflib.run(self._images_float_expr, {self._dlatent_noise_in: 0})
198 |
199 | @property
200 | def images_uint8(self):
201 | return tflib.run(self._images_uint8_expr, {self._dlatent_noise_in: 0})
202 |
203 | #----------------------------------------------------------------------------
204 |
205 | def project(network_pkl: str, target_fname: str, outdir: str, save_video: bool, seed: int):
206 | # Load networks.
207 | tflib.init_tf({'rnd.np_random_seed': seed})
208 | print('Loading networks from "%s"...' % network_pkl)
209 | with dnnlib.util.open_url(network_pkl) as fp:
210 | _G, _D, Gs = pickle.load(fp)
211 |
212 | # Load target image.
213 | target_pil = PIL.Image.open(target_fname)
214 | w, h = target_pil.size
215 | s = min(w, h)
216 | target_pil = target_pil.crop(((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2))
217 | target_pil= target_pil.convert('RGB')
218 | target_pil = target_pil.resize((Gs.output_shape[3], Gs.output_shape[2]), PIL.Image.ANTIALIAS)
219 | target_uint8 = np.array(target_pil, dtype=np.uint8)
220 | target_float = target_uint8.astype(np.float32).transpose([2, 0, 1]) * (2 / 255) - 1
221 |
222 | # Initialize projector.
223 | proj = Projector()
224 | proj.set_network(Gs)
225 | proj.start([target_float])
226 |
227 | # Setup output directory.
228 | os.makedirs(outdir, exist_ok=True)
229 | target_pil.save(f'{outdir}/target.png')
230 | writer = None
231 | if save_video:
232 | writer = imageio.get_writer(f'{outdir}/proj.mp4', mode='I', fps=60, codec='libx264', bitrate='16M')
233 |
234 | # Run projector.
235 | with tqdm.trange(proj.num_steps) as t:
236 | for step in t:
237 | assert step == proj.cur_step
238 | if writer is not None:
239 | writer.append_data(np.concatenate([target_uint8, proj.images_uint8[0]], axis=1))
240 | dist, loss = proj.step()
241 | t.set_postfix(dist=f'{dist[0]:.4f}', loss=f'{loss:.2f}')
242 |
243 | # Save results.
244 | PIL.Image.fromarray(proj.images_uint8[0], 'RGB').save(f'{outdir}/proj.png')
245 | np.savez(f'{outdir}/dlatents.npz', dlatents=proj.dlatents)
246 | if writer is not None:
247 | writer.close()
248 |
249 | #----------------------------------------------------------------------------
250 |
251 | def _str_to_bool(v):
252 | if isinstance(v, bool):
253 | return v
254 | if v.lower() in ('yes', 'true', 't', 'y', '1'):
255 | return True
256 | if v.lower() in ('no', 'false', 'f', 'n', '0'):
257 | return False
258 | raise argparse.ArgumentTypeError('Boolean value expected.')
259 |
260 | #----------------------------------------------------------------------------
261 |
262 | _examples = '''examples:
263 |
264 | python %(prog)s --outdir=out --target=targetimg.png \\
265 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl
266 | '''
267 |
268 | #----------------------------------------------------------------------------
269 |
270 | def main():
271 | parser = argparse.ArgumentParser(
272 | description='Project given image to the latent space of pretrained network pickle.',
273 | epilog=_examples,
274 | formatter_class=argparse.RawDescriptionHelpFormatter
275 | )
276 |
277 | parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
278 | parser.add_argument('--target', help='Target image file to project to', dest='target_fname', required=True)
279 | parser.add_argument('--save-video', help='Save an mp4 video of optimization progress (default: true)', type=_str_to_bool, default=True)
280 | parser.add_argument('--seed', help='Random seed', type=int, default=303)
281 | parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR')
282 | project(**vars(parser.parse_args()))
283 |
284 | #----------------------------------------------------------------------------
285 |
286 | if __name__ == "__main__":
287 | main()
288 |
289 | #----------------------------------------------------------------------------
290 |
--------------------------------------------------------------------------------
/stylegan2-ada/training/training_loop.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Main training loop."""
10 |
11 | import os
12 | import pickle
13 | import time
14 | import PIL.Image
15 | import numpy as np
16 | import tensorflow as tf
17 | import dnnlib
18 | import dnnlib.tflib as tflib
19 | from dnnlib.tflib.autosummary import autosummary
20 |
21 | from training import dataset
22 |
23 | #----------------------------------------------------------------------------
24 | # Select size and contents of the image snapshot grids that are exported
25 | # periodically during training.
26 |
27 | def setup_snapshot_image_grid(training_set):
28 | gw = np.clip(7680 // training_set.shape[2], 7, 32)
29 | gh = np.clip(4320 // training_set.shape[1], 4, 32)
30 |
31 | # Unconditional.
32 | if training_set.label_size == 0:
33 | reals, labels = training_set.get_minibatch_np(gw * gh)
34 | return (gw, gh), reals, labels
35 |
36 | # Row per class.
37 | cw, ch = (gw, 1)
38 | nw = (gw - 1) // cw + 1
39 | nh = (gh - 1) // ch + 1
40 |
41 | # Collect images.
42 | blocks = [[] for _i in range(nw * nh)]
43 | for _iter in range(1000000):
44 | real, label = training_set.get_minibatch_np(1)
45 | idx = np.argmax(label[0])
46 | while idx < len(blocks) and len(blocks[idx]) >= cw * ch:
47 | idx += training_set.label_size
48 | if idx < len(blocks):
49 | blocks[idx].append((real, label))
50 | if all(len(block) >= cw * ch for block in blocks):
51 | break
52 |
53 | # Layout grid.
54 | reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype)
55 | labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype)
56 | for i, block in enumerate(blocks):
57 | for j, (real, label) in enumerate(block):
58 | x = (i % nw) * cw + j % cw
59 | y = (i // nw) * ch + j // cw
60 | if x < gw and y < gh:
61 | reals[x + y * gw] = real[0]
62 | labels[x + y * gw] = label[0]
63 | return (gw, gh), reals, labels
64 |
65 | #----------------------------------------------------------------------------
66 |
67 | def save_image_grid(images, filename, drange, grid_size):
68 | lo, hi = drange
69 | gw, gh = grid_size
70 | images = np.asarray(images, dtype=np.float32)
71 | images = (images - lo) * (255 / (hi - lo))
72 | images = np.rint(images).clip(0, 255).astype(np.uint8)
73 | _N, C, H, W = images.shape
74 | images = images.reshape(gh, gw, C, H, W)
75 | images = images.transpose(0, 3, 1, 4, 2)
76 | images = images.reshape(gh * H, gw * W, C)
77 | PIL.Image.fromarray(images, {3: 'RGB', 1: 'L'}[C]).save(filename)
78 |
79 | #----------------------------------------------------------------------------
80 | # Main training script.
81 |
82 | def training_loop(
83 | run_dir = '.', # Output directory.
84 | G_args = {}, # Options for generator network.
85 | D_args = {}, # Options for discriminator network.
86 | G_opt_args = {}, # Options for generator optimizer.
87 | D_opt_args = {}, # Options for discriminator optimizer.
88 | loss_args = {}, # Options for loss function.
89 | train_dataset_args = {}, # Options for dataset to train with.
90 | metric_dataset_args = {}, # Options for dataset to evaluate metrics against.
91 | augment_args = {}, # Options for adaptive augmentations.
92 | metric_arg_list = [], # Metrics to evaluate during training.
93 | num_gpus = 1, # Number of GPUs to use.
94 | minibatch_size = 32, # Global minibatch size.
95 | minibatch_gpu = 4, # Number of samples processed at a time by one GPU.
96 | G_smoothing_kimg = 10, # Half-life of the exponential moving average (EMA) of generator weights.
97 | G_smoothing_rampup = None, # EMA ramp-up coefficient.
98 | minibatch_repeats = 4, # Number of minibatches to run in the inner loop.
99 | lazy_regularization = True, # Perform regularization as a separate training step?
100 | G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False.
101 | D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False.
102 | total_kimg = 25000, # Total length of the training, measured in thousands of real images.
103 | kimg_per_tick = 4, # Progress snapshot interval.
104 | image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
105 | network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'.
106 | resume_pkl = None, # Network pickle to resume training from.
107 | abort_fn = None, # Callback function for determining whether to abort training.
108 | progress_fn = None, # Callback function for updating training progress.
109 | ):
110 | assert minibatch_size % (num_gpus * minibatch_gpu) == 0
111 | start_time = time.time()
112 |
113 | print('Loading training set...')
114 | training_set = dataset.load_dataset(**train_dataset_args)
115 | print('Image shape:', np.int32(training_set.shape).tolist())
116 | print('Label shape:', [training_set.label_size])
117 | print()
118 |
119 | print('Constructing networks...')
120 | with tf.device('/gpu:0'):
121 | G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args)
122 | D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args)
123 | Gs = G.clone('Gs')
124 | if resume_pkl is not None:
125 | print(f'Resuming from "{resume_pkl}"')
126 | with dnnlib.util.open_url(resume_pkl) as f:
127 | rG, rD, rGs = pickle.load(f)
128 | G.copy_vars_from(rG)
129 | D.copy_vars_from(rD)
130 | Gs.copy_vars_from(rGs)
131 | G.print_layers()
132 | D.print_layers()
133 |
134 | print('Exporting sample images...')
135 | grid_size, grid_reals, grid_labels = setup_snapshot_image_grid(training_set)
136 | save_image_grid(grid_reals, os.path.join(run_dir, 'reals.png'), drange=[0,255], grid_size=grid_size)
137 | grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
138 | grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=minibatch_gpu)
139 | save_image_grid(grid_fakes, os.path.join(run_dir, 'fakes_init.png'), drange=[-1,1], grid_size=grid_size)
140 |
141 | print(f'Replicating networks across {num_gpus} GPUs...')
142 | G_gpus = [G]
143 | D_gpus = [D]
144 | for gpu in range(1, num_gpus):
145 | with tf.device(f'/gpu:{gpu}'):
146 | G_gpus.append(G.clone(f'{G.name}_gpu{gpu}'))
147 | D_gpus.append(D.clone(f'{D.name}_gpu{gpu}'))
148 |
149 | print('Initializing augmentations...')
150 | aug = None
151 | if augment_args.get('class_name', None) is not None:
152 | aug = dnnlib.util.construct_class_by_name(**augment_args)
153 | aug.init_validation_set(D_gpus=D_gpus, training_set=training_set)
154 |
155 | print('Setting up optimizers...')
156 | G_opt_args = dict(G_opt_args)
157 | D_opt_args = dict(D_opt_args)
158 | for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]:
159 | args['minibatch_multiplier'] = minibatch_size // num_gpus // minibatch_gpu
160 | if lazy_regularization:
161 | mb_ratio = reg_interval / (reg_interval + 1)
162 | args['learning_rate'] *= mb_ratio
163 | if 'beta1' in args: args['beta1'] **= mb_ratio
164 | if 'beta2' in args: args['beta2'] **= mb_ratio
165 | G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
166 | D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)
167 | G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)
168 | D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args)
169 |
170 | print('Constructing training graph...')
171 | data_fetch_ops = []
172 | training_set.configure(minibatch_gpu)
173 | for gpu, (G_gpu, D_gpu) in enumerate(zip(G_gpus, D_gpus)):
174 | with tf.name_scope(f'Train_gpu{gpu}'), tf.device(f'/gpu:{gpu}'):
175 |
176 | # Fetch training data via temporary variables.
177 | with tf.name_scope('DataFetch'):
178 | real_images_var = tf.Variable(name='images', trainable=False, initial_value=tf.zeros([minibatch_gpu] + training_set.shape))
179 | real_labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([minibatch_gpu, training_set.label_size]))
180 | real_images_write, real_labels_write = training_set.get_minibatch_tf()
181 | real_images_write = tflib.convert_images_from_uint8(real_images_write)
182 | data_fetch_ops += [tf.assign(real_images_var, real_images_write)]
183 | data_fetch_ops += [tf.assign(real_labels_var, real_labels_write)]
184 |
185 | # Evaluate loss function and register gradients.
186 | fake_labels = training_set.get_random_labels_tf(minibatch_gpu)
187 | terms = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, aug=aug, fake_labels=fake_labels, real_images=real_images_var, real_labels=real_labels_var, **loss_args)
188 | if lazy_regularization:
189 | if terms.G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(terms.G_reg * G_reg_interval), G_gpu.trainables)
190 | if terms.D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(terms.D_reg * D_reg_interval), D_gpu.trainables)
191 | else:
192 | if terms.G_reg is not None: terms.G_loss += terms.G_reg
193 | if terms.D_reg is not None: terms.D_loss += terms.D_reg
194 | G_opt.register_gradients(tf.reduce_mean(terms.G_loss), G_gpu.trainables)
195 | D_opt.register_gradients(tf.reduce_mean(terms.D_loss), D_gpu.trainables)
196 |
197 | print('Finalizing training ops...')
198 | data_fetch_op = tf.group(*data_fetch_ops)
199 | G_train_op = G_opt.apply_updates()
200 | D_train_op = D_opt.apply_updates()
201 | G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)
202 | D_reg_op = D_reg_opt.apply_updates(allow_no_op=True)
203 | Gs_beta_in = tf.placeholder(tf.float32, name='Gs_beta_in', shape=[])
204 | Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta_in)
205 | tflib.init_uninitialized_vars()
206 | with tf.device('/gpu:0'):
207 | peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
208 |
209 | print('Initializing metrics...')
210 | summary_log = tf.summary.FileWriter(run_dir)
211 | metrics = []
212 | for args in metric_arg_list:
213 | metric = dnnlib.util.construct_class_by_name(**args)
214 | metric.configure(dataset_args=metric_dataset_args, run_dir=run_dir)
215 | metrics.append(metric)
216 |
217 | print(f'Training for {total_kimg} kimg...')
218 | print()
219 | if progress_fn is not None:
220 | progress_fn(0, total_kimg)
221 | tick_start_time = time.time()
222 | maintenance_time = tick_start_time - start_time
223 | cur_nimg = 0
224 | cur_tick = -1
225 | tick_start_nimg = cur_nimg
226 | running_mb_counter = 0
227 |
228 | done = False
229 | while not done:
230 |
231 | # Compute EMA decay parameter.
232 | Gs_nimg = G_smoothing_kimg * 1000.0
233 | if G_smoothing_rampup is not None:
234 | Gs_nimg = min(Gs_nimg, cur_nimg * G_smoothing_rampup)
235 | Gs_beta = 0.5 ** (minibatch_size / max(Gs_nimg, 1e-8))
236 |
237 | # Run training ops.
238 | for _repeat_idx in range(minibatch_repeats):
239 | rounds = range(0, minibatch_size, minibatch_gpu * num_gpus)
240 | run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0)
241 | run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0)
242 | cur_nimg += minibatch_size
243 | running_mb_counter += 1
244 |
245 | # Fast path without gradient accumulation.
246 | if len(rounds) == 1:
247 | tflib.run([G_train_op, data_fetch_op])
248 | if run_G_reg:
249 | tflib.run(G_reg_op)
250 | tflib.run([D_train_op, Gs_update_op], {Gs_beta_in: Gs_beta})
251 | if run_D_reg:
252 | tflib.run(D_reg_op)
253 |
254 | # Slow path with gradient accumulation.
255 | else:
256 | for _round in rounds:
257 | tflib.run(G_train_op)
258 | if run_G_reg:
259 | tflib.run(G_reg_op)
260 | tflib.run(Gs_update_op, {Gs_beta_in: Gs_beta})
261 | for _round in rounds:
262 | tflib.run(data_fetch_op)
263 | tflib.run(D_train_op)
264 | if run_D_reg:
265 | tflib.run(D_reg_op)
266 |
267 | # Run validation.
268 | if aug is not None:
269 | aug.run_validation(minibatch_size=minibatch_size)
270 |
271 | # Tune augmentation parameters.
272 | if aug is not None:
273 | aug.tune(minibatch_size * minibatch_repeats)
274 |
275 | # Perform maintenance tasks once per tick.
276 | done = (cur_nimg >= total_kimg * 1000) or (abort_fn is not None and abort_fn())
277 | if done or cur_tick < 0 or cur_nimg >= tick_start_nimg + kimg_per_tick * 1000:
278 | cur_tick += 1
279 | tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
280 | tick_start_nimg = cur_nimg
281 | tick_end_time = time.time()
282 | total_time = tick_end_time - start_time
283 | tick_time = tick_end_time - tick_start_time
284 |
285 | # Report progress.
286 | print(' '.join([
287 | f"tick {autosummary('Progress/tick', cur_tick):<5d}",
288 | f"kimg {autosummary('Progress/kimg', cur_nimg / 1000.0):<8.1f}",
289 | f"time {dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)):<12s}",
290 | f"sec/tick {autosummary('Timing/sec_per_tick', tick_time):<7.1f}",
291 | f"sec/kimg {autosummary('Timing/sec_per_kimg', tick_time / tick_kimg):<7.2f}",
292 | f"maintenance {autosummary('Timing/maintenance_sec', maintenance_time):<6.1f}",
293 | f"gpumem {autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30):<5.1f}",
294 | f"augment {autosummary('Progress/augment', aug.strength if aug is not None else 0):.3f}",
295 | ]))
296 | autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
297 | autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))
298 | if progress_fn is not None:
299 | progress_fn(cur_nimg // 1000, total_kimg)
300 |
301 | # Save snapshots.
302 | if image_snapshot_ticks is not None and (done or cur_tick % image_snapshot_ticks == 0):
303 | grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=minibatch_gpu)
304 | save_image_grid(grid_fakes, os.path.join(run_dir, f'fakes{cur_nimg // 1000:06d}.png'), drange=[-1,1], grid_size=grid_size)
305 | if network_snapshot_ticks is not None and (done or cur_tick % network_snapshot_ticks == 0):
306 | pkl = os.path.join(run_dir, f'network-snapshot-{cur_nimg // 1000:06d}.pkl')
307 | with open(pkl, 'wb') as f:
308 | pickle.dump((G, D, Gs), f)
309 | if len(metrics):
310 | print('Evaluating metrics...')
311 | for metric in metrics:
312 | metric.run(pkl, num_gpus=num_gpus)
313 |
314 | # Update summaries.
315 | for metric in metrics:
316 | metric.update_autosummaries()
317 | tflib.autosummary.save_summaries(summary_log, cur_nimg)
318 | tick_start_time = time.time()
319 | maintenance_time = tick_start_time - tick_end_time
320 |
321 | print()
322 | print('Exiting...')
323 | summary_log.close()
324 | training_set.close()
325 |
326 | #----------------------------------------------------------------------------
327 |
--------------------------------------------------------------------------------
/stylegan2-ada/training/loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Loss functions."""
10 |
11 | import numpy as np
12 | import tensorflow as tf
13 | import dnnlib
14 | import dnnlib.tflib as tflib
15 | from dnnlib.tflib.autosummary import autosummary
16 |
17 | #----------------------------------------------------------------------------
18 | # Report statistic for all interested parties (AdaptiveAugment and tfevents).
19 |
20 | def report_stat(aug, name, value):
21 | if aug is not None:
22 | value = aug.report_stat(name, value)
23 | value = autosummary(name, value)
24 | return value
25 |
26 | #----------------------------------------------------------------------------
27 | # Report loss terms and collect them into EasyDict.
28 |
29 | def report_loss(aug, G_loss, D_loss, G_reg=None, D_reg=None):
30 | assert G_loss is not None and D_loss is not None
31 | terms = dnnlib.EasyDict(G_reg=None, D_reg=None)
32 | terms.G_loss = report_stat(aug, 'Loss/G/loss', G_loss)
33 | terms.D_loss = report_stat(aug, 'Loss/D/loss', D_loss)
34 | if G_reg is not None: terms.G_reg = report_stat(aug, 'Loss/G/reg', G_reg)
35 | if D_reg is not None: terms.D_reg = report_stat(aug, 'Loss/D/reg', D_reg)
36 | return terms
37 |
38 | #----------------------------------------------------------------------------
39 | # Evaluate G and return results as EasyDict.
40 |
41 | def eval_G(G, latents, labels, return_dlatents=False):
42 | r = dnnlib.EasyDict()
43 | r.args = dnnlib.EasyDict()
44 | r.args.is_training = True
45 | if return_dlatents:
46 | r.args.return_dlatents = True
47 | r.images = G.get_output_for(latents, labels, **r.args)
48 |
49 | r.dlatents = None
50 | if return_dlatents:
51 | r.images, r.dlatents = r.images
52 | return r
53 |
54 | #----------------------------------------------------------------------------
55 | # Evaluate D and return results as EasyDict.
56 |
57 | def eval_D(D, aug, images, labels, report=None, augment_inputs=True, return_aux=0):
58 | r = dnnlib.EasyDict()
59 | r.images_aug = images
60 | r.labels_aug = labels
61 | if augment_inputs and aug is not None:
62 | r.images_aug, r.labels_aug = aug.apply(r.images_aug, r.labels_aug)
63 |
64 | r.args = dnnlib.EasyDict()
65 | r.args.is_training = True
66 | if aug is not None:
67 | r.args.augment_strength = aug.get_strength_var()
68 | if return_aux > 0:
69 | r.args.score_size = return_aux + 1
70 | r.scores = D.get_output_for(r.images_aug, r.labels_aug, **r.args)
71 |
72 | r.aux = None
73 | if return_aux:
74 | r.aux = r.scores[:, 1:]
75 | r.scores = r.scores[:, :1]
76 |
77 | if report is not None:
78 | report_ops = [
79 | report_stat(aug, 'Loss/scores/' + report, r.scores),
80 | report_stat(aug, 'Loss/signs/' + report, tf.sign(r.scores)),
81 | report_stat(aug, 'Loss/squares/' + report, tf.square(r.scores)),
82 | ]
83 | with tf.control_dependencies(report_ops):
84 | r.scores = tf.identity(r.scores)
85 | return r
86 |
87 | #----------------------------------------------------------------------------
88 | # Non-saturating logistic loss with R1 and path length regularizers, used
89 | # in the paper "Analyzing and Improving the Image Quality of StyleGAN".
90 |
91 | def stylegan2(G, D, aug, fake_labels, real_images, real_labels, r1_gamma=10, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2, **_kwargs):
92 | # Evaluate networks for the main loss.
93 | minibatch_size = tf.shape(fake_labels)[0]
94 | fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
95 | G_fake = eval_G(G, fake_latents, fake_labels, return_dlatents=True)
96 | D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake')
97 | D_real = eval_D(D, aug, real_images, real_labels, report='real')
98 |
99 | # Non-saturating logistic loss from "Generative Adversarial Nets".
100 | with tf.name_scope('Loss_main'):
101 | G_loss = tf.nn.softplus(-D_fake.scores) # -log(sigmoid(D_fake.scores)), pylint: disable=invalid-unary-operand-type
102 | D_loss = tf.nn.softplus(D_fake.scores) # -log(1 - sigmoid(D_fake.scores))
103 | D_loss += tf.nn.softplus(-D_real.scores) # -log(sigmoid(D_real.scores)), pylint: disable=invalid-unary-operand-type
104 | G_reg = 0
105 | D_reg = 0
106 |
107 | # R1 regularizer from "Which Training Methods for GANs do actually Converge?".
108 | if r1_gamma != 0:
109 | with tf.name_scope('Loss_R1'):
110 | r1_grads = tf.gradients(tf.reduce_sum(D_real.scores), [real_images])[0]
111 | r1_penalty = tf.reduce_sum(tf.square(r1_grads), axis=[1,2,3])
112 | r1_penalty = report_stat(aug, 'Loss/r1_penalty', r1_penalty)
113 | D_reg += r1_penalty * (r1_gamma * 0.5)
114 |
115 | # Path length regularizer from "Analyzing and Improving the Image Quality of StyleGAN".
116 | if pl_weight != 0:
117 | with tf.name_scope('Loss_PL'):
118 |
119 | # Evaluate the regularization term using a smaller minibatch to conserve memory.
120 | G_pl = G_fake
121 | if pl_minibatch_shrink > 1:
122 | pl_minibatch_size = minibatch_size // pl_minibatch_shrink
123 | pl_latents = fake_latents[:pl_minibatch_size]
124 | pl_labels = fake_labels[:pl_minibatch_size]
125 | G_pl = eval_G(G, pl_latents, pl_labels, return_dlatents=True)
126 |
127 | # Compute |J*y|.
128 | pl_noise = tf.random_normal(tf.shape(G_pl.images)) / np.sqrt(np.prod(G.output_shape[2:]))
129 | pl_grads = tf.gradients(tf.reduce_sum(G_pl.images * pl_noise), [G_pl.dlatents])[0]
130 | pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1))
131 |
132 | # Track exponential moving average of |J*y|.
133 | with tf.control_dependencies(None):
134 | pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0, dtype=tf.float32)
135 | pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var)
136 | pl_update = tf.assign(pl_mean_var, pl_mean)
137 |
138 | # Calculate (|J*y|-a)^2.
139 | with tf.control_dependencies([pl_update]):
140 | pl_penalty = tf.square(pl_lengths - pl_mean)
141 | pl_penalty = report_stat(aug, 'Loss/pl_penalty', pl_penalty)
142 |
143 | # Apply weight.
144 | #
145 | # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean
146 | # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes:
147 | #
148 | # gamma_pl = pl_weight / num_pixels / num_affine_layers
149 | # = 2 / (r^2) / (log2(r) * 2 - 2)
150 | # = 1 / (r^2 * (log2(r) - 1))
151 | # = ln(2) / (r^2 * (ln(r) - ln(2))
152 | #
153 | G_reg += tf.tile(pl_penalty, [pl_minibatch_shrink]) * pl_weight
154 |
155 | return report_loss(aug, G_loss, D_loss, G_reg, D_reg)
156 |
157 | #----------------------------------------------------------------------------
158 | # Hybrid loss used for comparison methods used in the paper
159 | # "Training Generative Adversarial Networks with Limited Data".
160 |
161 | def cmethods(G, D, aug, fake_labels, real_images, real_labels,
162 | r1_gamma=10, r2_gamma=0,
163 | pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2,
164 | bcr_real_weight=0, bcr_fake_weight=0, bcr_augment=None,
165 | zcr_gen_weight=0, zcr_dis_weight=0, zcr_noise_std=0.1,
166 | auxrot_alpha=0, auxrot_beta=0,
167 | **_kwargs,
168 | ):
169 | # Evaluate networks for the main loss.
170 | minibatch_size = tf.shape(fake_labels)[0]
171 | fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
172 | G_fake = eval_G(G, fake_latents, fake_labels)
173 | D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake')
174 | D_real = eval_D(D, aug, real_images, real_labels, report='real')
175 |
176 | # Non-saturating logistic loss from "Generative Adversarial Nets".
177 | with tf.name_scope('Loss_main'):
178 | G_loss = tf.nn.softplus(-D_fake.scores) # -log(sigmoid(D_fake.scores)), pylint: disable=invalid-unary-operand-type
179 | D_loss = tf.nn.softplus(D_fake.scores) # -log(1 - sigmoid(D_fake.scores))
180 | D_loss += tf.nn.softplus(-D_real.scores) # -log(sigmoid(D_real.scores)), pylint: disable=invalid-unary-operand-type
181 | G_reg = 0
182 | D_reg = 0
183 |
184 | # R1 and R2 regularizers from "Which Training Methods for GANs do actually Converge?".
185 | if r1_gamma != 0 or r2_gamma != 0:
186 | with tf.name_scope('Loss_R1R2'):
187 | if r1_gamma != 0:
188 | r1_grads = tf.gradients(tf.reduce_sum(D_real.scores), [real_images])[0]
189 | r1_penalty = tf.reduce_sum(tf.square(r1_grads), axis=[1,2,3])
190 | r1_penalty = report_stat(aug, 'Loss/r1_penalty', r1_penalty)
191 | D_reg += r1_penalty * (r1_gamma * 0.5)
192 | if r2_gamma != 0:
193 | r2_grads = tf.gradients(tf.reduce_sum(D_fake.scores), [G_fake.images])[0]
194 | r2_penalty = tf.reduce_sum(tf.square(r2_grads), axis=[1,2,3])
195 | r2_penalty = report_stat(aug, 'Loss/r2_penalty', r2_penalty)
196 | D_reg += r2_penalty * (r2_gamma * 0.5)
197 |
198 | # Path length regularizer from "Analyzing and Improving the Image Quality of StyleGAN".
199 | if pl_weight != 0:
200 | with tf.name_scope('Loss_PL'):
201 | pl_minibatch_size = minibatch_size // pl_minibatch_shrink
202 | pl_latents = fake_latents[:pl_minibatch_size]
203 | pl_labels = fake_labels[:pl_minibatch_size]
204 | G_pl = eval_G(G, pl_latents, pl_labels, return_dlatents=True)
205 | pl_noise = tf.random_normal(tf.shape(G_pl.images)) / np.sqrt(np.prod(G.output_shape[2:]))
206 | pl_grads = tf.gradients(tf.reduce_sum(G_pl.images * pl_noise), [G_pl.dlatents])[0]
207 | pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1))
208 | with tf.control_dependencies(None):
209 | pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0, dtype=tf.float32)
210 | pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var)
211 | pl_update = tf.assign(pl_mean_var, pl_mean)
212 | with tf.control_dependencies([pl_update]):
213 | pl_penalty = tf.square(pl_lengths - pl_mean)
214 | pl_penalty = report_stat(aug, 'Loss/pl_penalty', pl_penalty)
215 | G_reg += tf.tile(pl_penalty, [pl_minibatch_shrink]) * pl_weight
216 |
217 | # bCR regularizer from "Improved consistency regularization for GANs".
218 | if (bcr_real_weight != 0 or bcr_fake_weight != 0) and bcr_augment is not None:
219 | with tf.name_scope('Loss_bCR'):
220 | if bcr_real_weight != 0:
221 | bcr_real_images, bcr_real_labels = dnnlib.util.call_func_by_name(D_real.images_aug, D_real.labels_aug, **bcr_augment)
222 | D_bcr_real = eval_D(D, aug, bcr_real_images, bcr_real_labels, report='real_bcr', augment_inputs=False)
223 | bcr_real_penalty = tf.square(D_bcr_real.scores - D_real.scores)
224 | bcr_real_penalty = report_stat(aug, 'Loss/bcr_penalty/real', bcr_real_penalty)
225 | D_loss += bcr_real_penalty * bcr_real_weight # NOTE: Must not use lazy regularization for this term.
226 | if bcr_fake_weight != 0:
227 | bcr_fake_images, bcr_fake_labels = dnnlib.util.call_func_by_name(D_fake.images_aug, D_fake.labels_aug, **bcr_augment)
228 | D_bcr_fake = eval_D(D, aug, bcr_fake_images, bcr_fake_labels, report='fake_bcr', augment_inputs=False)
229 | bcr_fake_penalty = tf.square(D_bcr_fake.scores - D_fake.scores)
230 | bcr_fake_penalty = report_stat(aug, 'Loss/bcr_penalty/fake', bcr_fake_penalty)
231 | D_loss += bcr_fake_penalty * bcr_fake_weight # NOTE: Must not use lazy regularization for this term.
232 |
233 | # zCR regularizer from "Improved consistency regularization for GANs".
234 | if zcr_gen_weight != 0 or zcr_dis_weight != 0:
235 | with tf.name_scope('Loss_zCR'):
236 | zcr_fake_latents = fake_latents + tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) * zcr_noise_std
237 | G_zcr = eval_G(G, zcr_fake_latents, fake_labels)
238 | if zcr_gen_weight > 0:
239 | zcr_gen_penalty = -tf.reduce_mean(tf.square(G_fake.images - G_zcr.images), axis=[1,2,3])
240 | zcr_gen_penalty = report_stat(aug, 'Loss/zcr_gen_penalty', zcr_gen_penalty)
241 | G_loss += zcr_gen_penalty * zcr_gen_weight
242 | if zcr_dis_weight > 0:
243 | D_zcr = eval_D(D, aug, G_zcr.images, fake_labels, report='fake_zcr', augment_inputs=False)
244 | zcr_dis_penalty = tf.square(D_fake.scores - D_zcr.scores)
245 | zcr_dis_penalty = report_stat(aug, 'Loss/zcr_dis_penalty', zcr_dis_penalty)
246 | D_loss += zcr_dis_penalty * zcr_dis_weight
247 |
248 | # Auxiliary rotation loss from "Self-supervised GANs via auxiliary rotation loss".
249 | if auxrot_alpha != 0 or auxrot_beta != 0:
250 | with tf.name_scope('Loss_AuxRot'):
251 | idx = tf.range(minibatch_size * 4, dtype=tf.int32) // minibatch_size
252 | b0 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 1))
253 | b1 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 3))
254 | b2 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 2))
255 | if auxrot_alpha != 0:
256 | auxrot_fake = tf.tile(G_fake.images, [4, 1, 1, 1])
257 | auxrot_fake = tf.where(b0, auxrot_fake, tf.reverse(auxrot_fake, [2]))
258 | auxrot_fake = tf.where(b1, auxrot_fake, tf.reverse(auxrot_fake, [3]))
259 | auxrot_fake = tf.where(b2, auxrot_fake, tf.transpose(auxrot_fake, [0, 1, 3, 2]))
260 | D_auxrot_fake = eval_D(D, aug, auxrot_fake, fake_labels, return_aux=4)
261 | G_loss += tf.nn.sparse_softmax_cross_entropy_with_logits(labels=idx, logits=D_auxrot_fake.aux) * auxrot_alpha
262 | if auxrot_beta != 0:
263 | auxrot_real = tf.tile(real_images, [4, 1, 1, 1])
264 | auxrot_real = tf.where(b0, auxrot_real, tf.reverse(auxrot_real, [2]))
265 | auxrot_real = tf.where(b1, auxrot_real, tf.reverse(auxrot_real, [3]))
266 | auxrot_real = tf.where(b2, auxrot_real, tf.transpose(auxrot_real, [0, 1, 3, 2]))
267 | D_auxrot_real = eval_D(D, aug, auxrot_real, real_labels, return_aux=4)
268 | D_loss += tf.nn.sparse_softmax_cross_entropy_with_logits(labels=idx, logits=D_auxrot_real.aux) * auxrot_beta
269 |
270 | return report_loss(aug, G_loss, D_loss, G_reg, D_reg)
271 |
272 | #----------------------------------------------------------------------------
273 | # WGAN-GP loss with epsilon penalty, used in the paper
274 | # "Progressive Growing of GANs for Improved Quality, Stability, and Variation".
275 |
276 | def wgangp(G, D, aug, fake_labels, real_images, real_labels, wgan_epsilon=0.001, wgan_lambda=10, wgan_target=1, **_kwargs):
277 | minibatch_size = tf.shape(fake_labels)[0]
278 | fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
279 | G_fake = eval_G(G, fake_latents, fake_labels)
280 | D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake')
281 | D_real = eval_D(D, aug, real_images, real_labels, report='real')
282 |
283 | # WGAN loss from "Wasserstein Generative Adversarial Networks".
284 | with tf.name_scope('Loss_main'):
285 | G_loss = -D_fake.scores # pylint: disable=invalid-unary-operand-type
286 | D_loss = D_fake.scores - D_real.scores
287 |
288 | # Epsilon penalty from "Progressive Growing of GANs for Improved Quality, Stability, and Variation"
289 | with tf.name_scope('Loss_epsilon'):
290 | epsilon_penalty = report_stat(aug, 'Loss/epsilon_penalty', tf.square(D_real.scores))
291 | D_loss += epsilon_penalty * wgan_epsilon
292 |
293 | # Gradient penalty from "Improved Training of Wasserstein GANs".
294 | with tf.name_scope('Loss_GP'):
295 | mix_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0, 1, dtype=G_fake.images.dtype)
296 | mix_images = tflib.lerp(tf.cast(real_images, G_fake.images.dtype), G_fake.images, mix_factors)
297 | mix_labels = real_labels # NOTE: Mixing is performed without respect to fake_labels.
298 | D_mix = eval_D(D, aug, mix_images, mix_labels, report='mix')
299 | mix_grads = tf.gradients(tf.reduce_sum(D_mix.scores), [mix_images])[0]
300 | mix_norms = tf.sqrt(tf.reduce_sum(tf.square(mix_grads), axis=[1,2,3]))
301 | mix_norms = report_stat(aug, 'Loss/mix_norms', mix_norms)
302 | gradient_penalty = tf.square(mix_norms - wgan_target)
303 | D_reg = gradient_penalty * (wgan_lambda / (wgan_target**2))
304 |
305 | return report_loss(aug, G_loss, D_loss, None, D_reg)
306 |
307 | #----------------------------------------------------------------------------
308 |
--------------------------------------------------------------------------------
/stylegan2-ada/dnnlib/util.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
4 | # and proprietary rights in and to this software, related documentation
5 | # and any modifications thereto. Any use, reproduction, disclosure or
6 | # distribution of this software and related documentation without an express
7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
8 |
9 | """Miscellaneous utility classes and functions."""
10 |
11 | import ctypes
12 | import fnmatch
13 | import importlib
14 | import inspect
15 | import numpy as np
16 | import os
17 | import shutil
18 | import sys
19 | import types
20 | import io
21 | import pickle
22 | import re
23 | import requests
24 | import html
25 | import hashlib
26 | import glob
27 | import tempfile
28 | import urllib
29 | import urllib.request
30 | import uuid
31 |
32 | from distutils.util import strtobool
33 | from typing import Any, List, Tuple, Union
34 |
35 |
36 | # Util classes
37 | # ------------------------------------------------------------------------------------------
38 |
39 |
40 | class EasyDict(dict):
41 | """Convenience class that behaves like a dict but allows access with the attribute syntax."""
42 |
43 | def __getattr__(self, name: str) -> Any:
44 | try:
45 | return self[name]
46 | except KeyError:
47 | raise AttributeError(name)
48 |
49 | def __setattr__(self, name: str, value: Any) -> None:
50 | self[name] = value
51 |
52 | def __delattr__(self, name: str) -> None:
53 | del self[name]
54 |
55 |
56 | class Logger(object):
57 | """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file."""
58 |
59 | def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True):
60 | self.file = None
61 |
62 | if file_name is not None:
63 | self.file = open(file_name, file_mode)
64 |
65 | self.should_flush = should_flush
66 | self.stdout = sys.stdout
67 | self.stderr = sys.stderr
68 |
69 | sys.stdout = self
70 | sys.stderr = self
71 |
72 | def __enter__(self) -> "Logger":
73 | return self
74 |
75 | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
76 | self.close()
77 |
78 | def write(self, text: str) -> None:
79 | """Write text to stdout (and a file) and optionally flush."""
80 | if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash
81 | return
82 |
83 | if self.file is not None:
84 | self.file.write(text)
85 |
86 | self.stdout.write(text)
87 |
88 | if self.should_flush:
89 | self.flush()
90 |
91 | def flush(self) -> None:
92 | """Flush written text to both stdout and a file, if open."""
93 | if self.file is not None:
94 | self.file.flush()
95 |
96 | self.stdout.flush()
97 |
98 | def close(self) -> None:
99 | """Flush, close possible files, and remove stdout/stderr mirroring."""
100 | self.flush()
101 |
102 | # if using multiple loggers, prevent closing in wrong order
103 | if sys.stdout is self:
104 | sys.stdout = self.stdout
105 | if sys.stderr is self:
106 | sys.stderr = self.stderr
107 |
108 | if self.file is not None:
109 | self.file.close()
110 |
111 |
112 | # Cache directories
113 | # ------------------------------------------------------------------------------------------
114 |
115 | _dnnlib_cache_dir = None
116 |
117 | def set_cache_dir(path: str) -> None:
118 | global _dnnlib_cache_dir
119 | _dnnlib_cache_dir = path
120 |
121 | def make_cache_dir_path(*paths: str) -> str:
122 | if _dnnlib_cache_dir is not None:
123 | return os.path.join(_dnnlib_cache_dir, *paths)
124 | if 'DNNLIB_CACHE_DIR' in os.environ:
125 | return os.path.join(os.environ['DNNLIB_CACHE_DIR'], *paths)
126 | if 'HOME' in os.environ:
127 | return os.path.join(os.environ['HOME'], '.cache', 'dnnlib', *paths)
128 | if 'USERPROFILE' in os.environ:
129 | return os.path.join(os.environ['USERPROFILE'], '.cache', 'dnnlib', *paths)
130 | return os.path.join(tempfile.gettempdir(), '.cache', 'dnnlib', *paths)
131 |
132 | # Small util functions
133 | # ------------------------------------------------------------------------------------------
134 |
135 |
136 | def format_time(seconds: Union[int, float]) -> str:
137 | """Convert the seconds to human readable string with days, hours, minutes and seconds."""
138 | s = int(np.rint(seconds))
139 |
140 | if s < 60:
141 | return "{0}s".format(s)
142 | elif s < 60 * 60:
143 | return "{0}m {1:02}s".format(s // 60, s % 60)
144 | elif s < 24 * 60 * 60:
145 | return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60)
146 | else:
147 | return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60)
148 |
149 |
150 | def ask_yes_no(question: str) -> bool:
151 | """Ask the user the question until the user inputs a valid answer."""
152 | while True:
153 | try:
154 | print("{0} [y/n]".format(question))
155 | return strtobool(input().lower())
156 | except ValueError:
157 | pass
158 |
159 |
160 | def tuple_product(t: Tuple) -> Any:
161 | """Calculate the product of the tuple elements."""
162 | result = 1
163 |
164 | for v in t:
165 | result *= v
166 |
167 | return result
168 |
169 |
170 | _str_to_ctype = {
171 | "uint8": ctypes.c_ubyte,
172 | "uint16": ctypes.c_uint16,
173 | "uint32": ctypes.c_uint32,
174 | "uint64": ctypes.c_uint64,
175 | "int8": ctypes.c_byte,
176 | "int16": ctypes.c_int16,
177 | "int32": ctypes.c_int32,
178 | "int64": ctypes.c_int64,
179 | "float32": ctypes.c_float,
180 | "float64": ctypes.c_double
181 | }
182 |
183 |
184 | def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]:
185 | """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes."""
186 | type_str = None
187 |
188 | if isinstance(type_obj, str):
189 | type_str = type_obj
190 | elif hasattr(type_obj, "__name__"):
191 | type_str = type_obj.__name__
192 | elif hasattr(type_obj, "name"):
193 | type_str = type_obj.name
194 | else:
195 | raise RuntimeError("Cannot infer type name from input")
196 |
197 | assert type_str in _str_to_ctype.keys()
198 |
199 | my_dtype = np.dtype(type_str)
200 | my_ctype = _str_to_ctype[type_str]
201 |
202 | assert my_dtype.itemsize == ctypes.sizeof(my_ctype)
203 |
204 | return my_dtype, my_ctype
205 |
206 |
207 | def is_pickleable(obj: Any) -> bool:
208 | try:
209 | with io.BytesIO() as stream:
210 | pickle.dump(obj, stream)
211 | return True
212 | except:
213 | return False
214 |
215 |
216 | # Functionality to import modules/objects by name, and call functions by name
217 | # ------------------------------------------------------------------------------------------
218 |
219 | def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]:
220 | """Searches for the underlying module behind the name to some python object.
221 | Returns the module and the object name (original name with module part removed)."""
222 |
223 | # allow convenience shorthands, substitute them by full names
224 | obj_name = re.sub("^np.", "numpy.", obj_name)
225 | obj_name = re.sub("^tf.", "tensorflow.", obj_name)
226 |
227 | # list alternatives for (module_name, local_obj_name)
228 | parts = obj_name.split(".")
229 | name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)]
230 |
231 | # try each alternative in turn
232 | for module_name, local_obj_name in name_pairs:
233 | try:
234 | module = importlib.import_module(module_name) # may raise ImportError
235 | get_obj_from_module(module, local_obj_name) # may raise AttributeError
236 | return module, local_obj_name
237 | except:
238 | pass
239 |
240 | # maybe some of the modules themselves contain errors?
241 | for module_name, _local_obj_name in name_pairs:
242 | try:
243 | importlib.import_module(module_name) # may raise ImportError
244 | except ImportError:
245 | if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"):
246 | raise
247 |
248 | # maybe the requested attribute is missing?
249 | for module_name, local_obj_name in name_pairs:
250 | try:
251 | module = importlib.import_module(module_name) # may raise ImportError
252 | get_obj_from_module(module, local_obj_name) # may raise AttributeError
253 | except ImportError:
254 | pass
255 |
256 | # we are out of luck, but we have no idea why
257 | raise ImportError(obj_name)
258 |
259 |
260 | def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any:
261 | """Traverses the object name and returns the last (rightmost) python object."""
262 | if obj_name == '':
263 | return module
264 | obj = module
265 | for part in obj_name.split("."):
266 | obj = getattr(obj, part)
267 | return obj
268 |
269 |
270 | def get_obj_by_name(name: str) -> Any:
271 | """Finds the python object with the given name."""
272 | module, obj_name = get_module_from_obj_name(name)
273 | return get_obj_from_module(module, obj_name)
274 |
275 |
276 | def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any:
277 | """Finds the python object with the given name and calls it as a function."""
278 | assert func_name is not None
279 | func_obj = get_obj_by_name(func_name)
280 | assert callable(func_obj)
281 | return func_obj(*args, **kwargs)
282 |
283 |
284 | def construct_class_by_name(*args, class_name: str = None, **kwargs) -> Any:
285 | """Finds the python class with the given name and constructs it with the given arguments."""
286 | return call_func_by_name(*args, func_name=class_name, **kwargs)
287 |
288 |
289 | def get_module_dir_by_obj_name(obj_name: str) -> str:
290 | """Get the directory path of the module containing the given object name."""
291 | module, _ = get_module_from_obj_name(obj_name)
292 | return os.path.dirname(inspect.getfile(module))
293 |
294 |
295 | def is_top_level_function(obj: Any) -> bool:
296 | """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'."""
297 | return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__
298 |
299 |
300 | def get_top_level_function_name(obj: Any) -> str:
301 | """Return the fully-qualified name of a top-level function."""
302 | assert is_top_level_function(obj)
303 | module = obj.__module__
304 | if module == '__main__':
305 | module = os.path.splitext(os.path.basename(sys.modules[module].__file__))[0]
306 | return module + "." + obj.__name__
307 |
308 |
309 | # File system helpers
310 | # ------------------------------------------------------------------------------------------
311 |
312 | def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]:
313 | """List all files recursively in a given directory while ignoring given file and directory names.
314 | Returns list of tuples containing both absolute and relative paths."""
315 | assert os.path.isdir(dir_path)
316 | base_name = os.path.basename(os.path.normpath(dir_path))
317 |
318 | if ignores is None:
319 | ignores = []
320 |
321 | result = []
322 |
323 | for root, dirs, files in os.walk(dir_path, topdown=True):
324 | for ignore_ in ignores:
325 | dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)]
326 |
327 | # dirs need to be edited in-place
328 | for d in dirs_to_remove:
329 | dirs.remove(d)
330 |
331 | files = [f for f in files if not fnmatch.fnmatch(f, ignore_)]
332 |
333 | absolute_paths = [os.path.join(root, f) for f in files]
334 | relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths]
335 |
336 | if add_base_to_relative:
337 | relative_paths = [os.path.join(base_name, p) for p in relative_paths]
338 |
339 | assert len(absolute_paths) == len(relative_paths)
340 | result += zip(absolute_paths, relative_paths)
341 |
342 | return result
343 |
344 |
345 | def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None:
346 | """Takes in a list of tuples of (src, dst) paths and copies files.
347 | Will create all necessary directories."""
348 | for file in files:
349 | target_dir_name = os.path.dirname(file[1])
350 |
351 | # will create all intermediate-level directories
352 | if not os.path.exists(target_dir_name):
353 | os.makedirs(target_dir_name)
354 |
355 | shutil.copyfile(file[0], file[1])
356 |
357 |
358 | # URL helpers
359 | # ------------------------------------------------------------------------------------------
360 |
361 | def is_url(obj: Any, allow_file_urls: bool = False) -> bool:
362 | """Determine whether the given object is a valid URL string."""
363 | if not isinstance(obj, str) or not "://" in obj:
364 | return False
365 | if allow_file_urls and obj.startswith('file://'):
366 | return True
367 | try:
368 | res = requests.compat.urlparse(obj)
369 | if not res.scheme or not res.netloc or not "." in res.netloc:
370 | return False
371 | res = requests.compat.urlparse(requests.compat.urljoin(obj, "/"))
372 | if not res.scheme or not res.netloc or not "." in res.netloc:
373 | return False
374 | except:
375 | return False
376 | return True
377 |
378 |
379 | def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True, return_filename: bool = False, cache: bool = True) -> Any:
380 | """Download the given URL and return a binary-mode file object to access the data."""
381 | assert num_attempts >= 1
382 | assert not (return_filename and (not cache))
383 |
384 | # Doesn't look like an URL scheme so interpret it as a local filename.
385 | if not re.match('^[a-z]+://', url):
386 | return url if return_filename else open(url, "rb")
387 |
388 | # Handle file URLs. This code handles unusual file:// patterns that
389 | # arise on Windows:
390 | #
391 | # file:///c:/foo.txt
392 | #
393 | # which would translate to a local '/c:/foo.txt' filename that's
394 | # invalid. Drop the forward slash for such pathnames.
395 | #
396 | # If you touch this code path, you should test it on both Linux and
397 | # Windows.
398 | #
399 | # Some internet resources suggest using urllib.request.url2pathname() but
400 | # but that converts forward slashes to backslashes and this causes
401 | # its own set of problems.
402 | if url.startswith('file://'):
403 | filename = urllib.parse.urlparse(url).path
404 | if re.match(r'^/[a-zA-Z]:', filename):
405 | filename = filename[1:]
406 | return filename if return_filename else open(filename, "rb")
407 |
408 | assert is_url(url)
409 |
410 | # Lookup from cache.
411 | if cache_dir is None:
412 | cache_dir = make_cache_dir_path('downloads')
413 |
414 | url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest()
415 | if cache:
416 | cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*"))
417 | if len(cache_files) == 1:
418 | filename = cache_files[0]
419 | return filename if return_filename else open(filename, "rb")
420 |
421 | # Download.
422 | url_name = None
423 | url_data = None
424 | with requests.Session() as session:
425 | if verbose:
426 | print("Downloading %s ..." % url, end="", flush=True)
427 | for attempts_left in reversed(range(num_attempts)):
428 | try:
429 | with session.get(url) as res:
430 | res.raise_for_status()
431 | if len(res.content) == 0:
432 | raise IOError("No data received")
433 |
434 | if len(res.content) < 8192:
435 | content_str = res.content.decode("utf-8")
436 | if "download_warning" in res.headers.get("Set-Cookie", ""):
437 | links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link]
438 | if len(links) == 1:
439 | url = requests.compat.urljoin(url, links[0])
440 | raise IOError("Google Drive virus checker nag")
441 | if "Google Drive - Quota exceeded" in content_str:
442 | raise IOError("Google Drive download quota exceeded -- please try again later")
443 |
444 | match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", ""))
445 | url_name = match[1] if match else url
446 | url_data = res.content
447 | if verbose:
448 | print(" done")
449 | break
450 | except:
451 | if not attempts_left:
452 | if verbose:
453 | print(" failed")
454 | raise
455 | if verbose:
456 | print(".", end="", flush=True)
457 |
458 | # Save to cache.
459 | if cache:
460 | safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name)
461 | cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name)
462 | temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name)
463 | os.makedirs(cache_dir, exist_ok=True)
464 | with open(temp_file, "wb") as f:
465 | f.write(url_data)
466 | os.replace(temp_file, cache_file) # atomic
467 | if return_filename:
468 | return cache_file
469 |
470 | # Return data as file object.
471 | assert not return_filename
472 | return io.BytesIO(url_data)
473 |
--------------------------------------------------------------------------------