├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── cat.png ├── examples ├── evaluate_imagenet.py ├── evaluate_imagenet_all.sh ├── generate_summary.py ├── imagenet_preprocessing.py └── train_yolov2.py ├── pyproject.toml ├── pytest.ini ├── setup.py ├── summary.png ├── tensornets ├── __init__.py ├── capsulenets.py ├── contrib_framework │ ├── __init__.py │ ├── arg_scope.py │ └── variables.py ├── contrib_layers │ ├── __init__.py │ ├── initializers.py │ ├── layers.py │ ├── normalization.py │ ├── optimizers.py │ ├── regularizers.py │ ├── rev_block_lib.py │ ├── summaries.py │ └── utils.py ├── darknets.py ├── datasets │ ├── __init__.py │ ├── coco.names │ ├── coco.py │ ├── imagenet.py │ ├── voc.names │ └── voc.py ├── densenets.py ├── detections.py ├── efficientnets.py ├── inceptions.py ├── layers.py ├── middles.py ├── mobilenets.py ├── nasnets.py ├── ops.py ├── preprocess.py ├── pretrained.py ├── references │ ├── __init__.py │ ├── coco.names │ ├── darkflow_utils │ │ ├── __init__.py │ │ ├── box.py │ │ ├── get_boxes.pyx │ │ ├── nms.pxd │ │ └── nms.pyx │ ├── rcnns.py │ ├── rpn_utils.py │ ├── voc.names │ ├── yolo_utils.py │ └── yolos.py ├── resnets.py ├── squeezenets.py ├── utils.py ├── version_utils.py ├── vggs.py ├── wavenets.py └── zf.py ├── tests ├── all_imagenet_models.py ├── basics_test.py └── utils_test.py └── translations ├── mobilenetv3_tfslim.py └── tfslim.py /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | matrix: 3 | include: 4 | - python: 3.6 5 | env: TEST_MODE=PEP8 6 | - python: 3.6 7 | env: TF_VERSION1=1.4.0 TF_VERSION2=1.5.0 8 | - python: 3.6 9 | env: TF_VERSION1=1.6.0 TF_VERSION2=1.7.0 10 | - python: 3.6 11 | env: TF_VERSION1=1.8.0 TF_VERSION2=1.9.0 12 | - python: 3.6 13 | env: TF_VERSION1=1.10.0 TF_VERSION2=1.11.0 14 | - python: 3.6 15 | env: TF_VERSION1=1.12.0 TF_VERSION2=1.13.1 16 | - python: 3.6 17 | env: TF_VERSION1=1.14.0 TF_VERSION2=1.15.0 18 | - python: 3.6 19 | env: TF_VERSION1=2.0.0 TF_VERSION2=2.1.0 20 | - python: 3.6 21 | env: TF_VERSION1=2.2.0 TF_VERSION2=2.2.0 22 | - python: 2.7 23 | env: TF_VERSION1=1.4.0 TF_VERSION2=1.15.0 24 | - python: 2.7 25 | env: TF_VERSION1=2.0.0 TF_VERSION2=2.1.0 26 | notifications: 27 | email: false 28 | # Adapted from 29 | # https://conda.io/docs/user-guide/tasks/use-conda-with-travis-ci.html 30 | install: 31 | - sudo apt-get update 32 | # We do this conditionally because it saves us some downloading if the 33 | # version is the same. 34 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 35 | wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; 36 | else 37 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 38 | fi 39 | - bash miniconda.sh -b -p $HOME/miniconda 40 | - export PATH="$HOME/miniconda/bin:$PATH" 41 | - hash -r 42 | - conda config --set always_yes yes --set changeps1 no 43 | - conda update -q conda 44 | # Useful for debugging any issues with conda 45 | - conda info -a 46 | 47 | # Replace dep1 dep2 ... with your dependencies 48 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION 49 | - source activate test-environment 50 | - pip install --only-binary=numpy,scipy numpy scipy cython opencv-python==4.1.0.25 51 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 52 | pip install pytest pytest-pep8; 53 | else 54 | pip install pytest==5.4.3 pytest-pep8; 55 | fi 56 | - pip install . 57 | - export R=$(($RANDOM%2)) 58 | - if [ $R -eq 0 ]; then 59 | export TF_VERSION=$TF_VERSION1; 60 | elif [ $R -eq 1 ]; then 61 | export TF_VERSION=$TF_VERSION2; 62 | fi 63 | - if [ -z "$TF_VERSION" ]; then 64 | pip install tensorflow==1.14.0; 65 | else 66 | pip install tensorflow==$TF_VERSION; 67 | fi 68 | 69 | # Ignore warnings 70 | - export TF16=$(python -c "from tensornets.utils import tf_later_than; print(tf_later_than('1.6.0'))") 71 | - if [[ $TF16 == "False" ]]; then 72 | echo 'filterwarnings =' >> pytest.ini; 73 | echo ' ignore::DeprecationWarning' >> pytest.ini; 74 | echo ' ignore::PendingDeprecationWarning' >> pytest.ini; 75 | fi 76 | 77 | # Detect whether core files are changed or not 78 | - export CORE_CHANGED=True; 79 | - for entry in `git diff --name-only HEAD~1`; do if [[ "$entry" == "README.md" ]]; then export CORE_CHANGED=False; fi; done 80 | - export NUM_CHANGED=$(git diff --name-only HEAD~1 | wc -l); 81 | - if [ $NUM_CHANGED -gt 1 ]; then CORE_CHANGED=True; fi 82 | 83 | # Detect one of markdown files is changed or not 84 | - export DOC_ONLY_CHANGED=False; 85 | - if [ $NUM_CHANGED == 1 ] && [[ "$(git diff --name-only HEAD~1)" == *"md" ]]; then 86 | export DOC_ONLY_CHANGED=True; 87 | fi 88 | 89 | script: 90 | - if [[ "$DOC_ONLY_CHANGED" == "False" ]]; then 91 | if [[ "$TEST_MODE" == "PEP8" ]]; then 92 | PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8; 93 | else 94 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --verbose --ignore=tests/all*; 95 | fi; 96 | fi 97 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Taehoon Lee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include pyproject.toml 4 | recursive-include tensornets/references/darkflow_utils * 5 | recursive-include tensornets *.names 6 | -------------------------------------------------------------------------------- /cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taehoonlee/tensornets/c9b1d78f806892193efdebee2789a47fd148b984/cat.png -------------------------------------------------------------------------------- /examples/evaluate_imagenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys 6 | import tensorflow.compat.v1 as tf 7 | import tensornets as nets 8 | 9 | from imagenet_preprocessing import input_fn as _input_fn 10 | from tensorflow import contrib 11 | 12 | 13 | tf.app.flags.DEFINE_integer( 14 | 'log_every_n_steps', 50, 15 | 'The frequency with which logs are print.') 16 | 17 | tf.app.flags.DEFINE_integer( 18 | 'batch_size', 200, 'The number of samples in each batch.') 19 | 20 | tf.app.flags.DEFINE_integer( 21 | 'steps', None, 'The number of steps for evaluation.') 22 | 23 | tf.app.flags.DEFINE_string( 24 | 'checkpoint_path', None, 25 | 'The directory where the model was written to or an absolute path to a ' 26 | 'checkpoint file.') 27 | 28 | tf.app.flags.DEFINE_string( 29 | 'model_name', 'ResNet50', 'The name of the architecture to evaluate.') 30 | 31 | tf.app.flags.DEFINE_string( 32 | 'dataset_dir', '/home/taehoonlee/Data/imagenet/tfrecords', 33 | 'The directory where the dataset files are stored.') 34 | 35 | tf.app.flags.DEFINE_integer( 36 | 'eval_image_size', 224, 'The eval image size') 37 | 38 | tf.app.flags.DEFINE_integer( 39 | 'normalize', 0, 'The normalization type') 40 | 41 | FLAGS = tf.app.flags.FLAGS 42 | 43 | 44 | # Simple trick to suppress the warning 45 | # "It seems that global step has not been increased" 46 | class hook(tf.train.StepCounterHook): 47 | def __init__(self, every_n_steps): 48 | self._steps = 0 49 | super(hook, self).__init__(every_n_steps) 50 | 51 | def after_run(self, run_context, run_values): 52 | self._steps += 1 53 | if self._timer.should_trigger_for_step(self._steps): 54 | t, steps = self._timer.update_last_triggered_step(self._steps) 55 | if t is not None: 56 | tf.logging.info("%g secs per step on average", t / steps) 57 | 58 | 59 | def input_fn(): 60 | return _input_fn( 61 | is_training=False, 62 | data_dir=FLAGS.dataset_dir, 63 | batch_size=FLAGS.batch_size, 64 | eval_image_size=FLAGS.eval_image_size, 65 | normalize=FLAGS.normalize) 66 | 67 | 68 | def model_fn(features, labels, mode): 69 | models = [] 70 | logits = [] 71 | classes = [] 72 | init_op = [tf.train.get_or_create_global_step().initializer] 73 | for (i, model_name) in enumerate(FLAGS.model_name.split(',')): 74 | with tf.device("/gpu:%d" % i): 75 | network_fn = getattr(nets, model_name) 76 | models.append(network_fn(features, is_training=False)) 77 | logits.append(models[i].get_outputs()[-2]) 78 | classes.append(tf.argmax(logits[i], axis=1)) 79 | if FLAGS.checkpoint_path is None: 80 | init_op.extend(models[i].pretrained()) 81 | 82 | scaffold = None 83 | if FLAGS.checkpoint_path is None: 84 | scaffold = tf.train.Scaffold(init_op=init_op) 85 | 86 | loss = [] 87 | for i in range(len(models)): 88 | cross_entropy = tf.losses.sparse_softmax_cross_entropy( 89 | logits=logits[i], labels=labels) 90 | loss.append(cross_entropy) 91 | loss = tf.reduce_sum(loss) 92 | 93 | metrics = None 94 | if mode == tf.estimator.ModeKeys.EVAL: 95 | metrics = {} 96 | for i in range(len(models)): 97 | top1 = tf.metrics.accuracy(labels=labels, predictions=classes[i]) 98 | top5 = contrib.metrics.streaming_sparse_recall_at_k( 99 | logits[i], tf.cast(labels, tf.int64), k=5) 100 | size = sum([w.shape.num_elements() 101 | for w in models[i].get_weights()]) 102 | run_meta = tf.RunMetadata() 103 | opts = tf.profiler.ProfileOptionBuilder.float_operation() 104 | opts['output'] = 'none' 105 | flops = tf.profiler.profile(tf.get_default_graph(), 106 | run_meta=run_meta, options=opts) 107 | metrics.update({"%dTop1" % i: top1, 108 | "%dTop5" % i: top5, 109 | "%dMAC" % i: (tf.constant(flops.total_float_ops), tf.no_op()), 110 | "%dSize" % i: (tf.constant(size), tf.no_op())}) 111 | 112 | return tf.estimator.EstimatorSpec( 113 | mode=mode, 114 | scaffold=scaffold, 115 | predictions=None, 116 | loss=loss, 117 | train_op=None, 118 | eval_metric_ops=metrics, 119 | export_outputs=None) 120 | 121 | 122 | def main(argv=None): 123 | if not FLAGS.dataset_dir: 124 | raise ValueError('You must supply the dataset directory.') 125 | 126 | tf.logging.set_verbosity(tf.logging.INFO) 127 | 128 | classifier = tf.estimator.Estimator( 129 | model_fn=model_fn, model_dir=FLAGS.checkpoint_path) 130 | 131 | if FLAGS.steps is None: 132 | FLAGS.steps = 50000 // FLAGS.batch_size 133 | 134 | results = classifier.evaluate( 135 | input_fn=input_fn, steps=FLAGS.steps, 136 | hooks=[hook(every_n_steps=FLAGS.log_every_n_steps)]) 137 | 138 | print("| {:5d} Samples | Top-1 | Top-5 | MAC | Size |".format( 139 | FLAGS.batch_size * FLAGS.steps)) 140 | print("|------------------|-------------|-------------|--------|--------|") 141 | for (i, model_name) in enumerate(FLAGS.model_name.split(',')): 142 | print("| {:16s} | {:6.3f} | {:6.3f} | {:5.1f}M | {:5.1f}M |".format( 143 | model_name.split('Net')[-1] if len(model_name) > 16 else model_name, 144 | 100 * (results["%dTop1" % i]), 145 | 100 * (results["%dTop5" % i]), 146 | results["%dMAC" % i] / 10e5, 147 | results["%dSize" % i] / 10e5)) 148 | 149 | 150 | if __name__ == '__main__': 151 | main(sys.argv[1:]) 152 | -------------------------------------------------------------------------------- /examples/evaluate_imagenet_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | CUDA_VISIBLE_DEVICES=0 screen -dLm bash -c "echo 0; \ 4 | python evaluate_imagenet.py --model_name=ResNet50 --eval_image_size=224 --normalize=1; \ 5 | python evaluate_imagenet.py --model_name=ResNet101 --eval_image_size=224 --normalize=1; \ 6 | python evaluate_imagenet.py --model_name=ResNet152 --eval_image_size=224 --normalize=1; \ 7 | python evaluate_imagenet.py --model_name=ResNeXt101c64 --eval_image_size=224 --normalize=3; \ 8 | python evaluate_imagenet.py --model_name=Inception3 --eval_image_size=299 --normalize=2; \ 9 | python evaluate_imagenet.py --model_name=MobileNet100 --eval_image_size=224 --normalize=2; \ 10 | python evaluate_imagenet.py --model_name=MobileNet35v2 --eval_image_size=224 --normalize=2; \ 11 | python evaluate_imagenet.py --model_name=MobileNet50v2 --eval_image_size=224 --normalize=2; \ 12 | python evaluate_imagenet.py --model_name=MobileNet140v2 --eval_image_size=224 --normalize=2; \ 13 | python evaluate_imagenet.py --model_name=MobileNet75v3large --eval_image_size=224 --normalize=2; \ 14 | python evaluate_imagenet.py --model_name=MobileNet75v3small --eval_image_size=224 --normalize=2; \ 15 | python evaluate_imagenet.py --model_name=EfficientNetB0 --eval_image_size=224 --normalize=3; \ 16 | python evaluate_imagenet.py --model_name=EfficientNetB1 --eval_image_size=240 --normalize=3; \ 17 | python evaluate_imagenet.py --model_name=EfficientNetB5 --eval_image_size=456 --normalize=3 --batch_size=100; \ 18 | python evaluate_imagenet.py --model_name=DenseNet121 --eval_image_size=224 --normalize=3; \ 19 | python evaluate_imagenet.py --model_name=VGG19 --eval_image_size=224 --normalize=1; \ 20 | python evaluate_imagenet.py --model_name=NASNetAlarge --eval_image_size=331 --normalize=2 --batch_size=100" 21 | 22 | CUDA_VISIBLE_DEVICES=1 screen -dLm bash -c "echo 1; \ 23 | python evaluate_imagenet.py --model_name=ResNet50v2 --eval_image_size=299 --normalize=2; \ 24 | python evaluate_imagenet.py --model_name=ResNet101v2 --eval_image_size=299 --normalize=2; \ 25 | python evaluate_imagenet.py --model_name=ResNet152v2 --eval_image_size=299 --normalize=2; \ 26 | python evaluate_imagenet.py --model_name=Inception4 --eval_image_size=299 --normalize=2; \ 27 | python evaluate_imagenet.py --model_name=InceptionResNet2 --eval_image_size=299 --normalize=2; \ 28 | python evaluate_imagenet.py --model_name=MobileNet75 --eval_image_size=224 --normalize=2; \ 29 | python evaluate_imagenet.py --model_name=MobileNet75v2 --eval_image_size=224 --normalize=2; \ 30 | python evaluate_imagenet.py --model_name=MobileNet100v2 --eval_image_size=224 --normalize=2; \ 31 | python evaluate_imagenet.py --model_name=MobileNet130v2 --eval_image_size=224 --normalize=2; \ 32 | python evaluate_imagenet.py --model_name=MobileNet100v3large --eval_image_size=224 --normalize=2; \ 33 | python evaluate_imagenet.py --model_name=MobileNet100v3small --eval_image_size=224 --normalize=2; \ 34 | python evaluate_imagenet.py --model_name=EfficientNetB2 --eval_image_size=260 --normalize=3; \ 35 | python evaluate_imagenet.py --model_name=EfficientNetB3 --eval_image_size=300 --normalize=3; \ 36 | python evaluate_imagenet.py --model_name=EfficientNetB6 --eval_image_size=528 --normalize=3 --batch_size=50; \ 37 | python evaluate_imagenet.py --model_name=DenseNet169 --eval_image_size=224 --normalize=3; \ 38 | python evaluate_imagenet.py --model_name=VGG16 --eval_image_size=224 --normalize=1; \ 39 | python evaluate_imagenet.py --model_name=PNASNetlarge --eval_image_size=331 --normalize=2 --batch_size=100" 40 | 41 | CUDA_VISIBLE_DEVICES=2 screen -dLm bash -c "echo 2; \ 42 | python evaluate_imagenet.py --model_name=ResNet200v2 --eval_image_size=224 --normalize=3; \ 43 | python evaluate_imagenet.py --model_name=ResNeXt50 --eval_image_size=224 --normalize=3; \ 44 | python evaluate_imagenet.py --model_name=ResNeXt101 --eval_image_size=224 --normalize=3; \ 45 | python evaluate_imagenet.py --model_name=WideResNet50 --eval_image_size=224 --normalize=5; \ 46 | python evaluate_imagenet.py --model_name=Inception1 --eval_image_size=224 --normalize=4; \ 47 | python evaluate_imagenet.py --model_name=Inception2 --eval_image_size=224 --normalize=2; \ 48 | python evaluate_imagenet.py --model_name=MobileNet25 --eval_image_size=224 --normalize=2; \ 49 | python evaluate_imagenet.py --model_name=MobileNet50 --eval_image_size=224 --normalize=2; \ 50 | python evaluate_imagenet.py --model_name=MobileNet100v3largemini --eval_image_size=224 --normalize=2; \ 51 | python evaluate_imagenet.py --model_name=MobileNet100v3smallmini --eval_image_size=224 --normalize=2; \ 52 | python evaluate_imagenet.py --model_name=EfficientNetB4 --eval_image_size=380 --normalize=3 --batch_size=100; \ 53 | python evaluate_imagenet.py --model_name=EfficientNetB7 --eval_image_size=600 --normalize=3 --batch_size=50; \ 54 | python evaluate_imagenet.py --model_name=DenseNet201 --eval_image_size=224 --normalize=3; \ 55 | python evaluate_imagenet.py --model_name=NASNetAmobile --eval_image_size=224 --normalize=2; \ 56 | python evaluate_imagenet.py --model_name=SqueezeNet --eval_image_size=224 --normalize=4" 57 | 58 | cat screenlog.0 | grep "^|" | sort | uniq 59 | -------------------------------------------------------------------------------- /examples/generate_summary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib as mpl 3 | import matplotlib.pyplot as plt 4 | 5 | from mistune import html # pip install mistune==2.0.0a4 6 | from bs4 import BeautifulSoup # pip install bs4 7 | 8 | with open('README.md') as f: 9 | contents = BeautifulSoup(html(f.read()), 'html.parser') 10 | 11 | table = [] 12 | for row in contents.select('table')[0].select('tr'): 13 | if len(row.find_all('td')) > 0: 14 | table.append([]) 15 | for col in row.find_all('td')[:-3]: 16 | table[-1].append(col.text) 17 | 18 | table = np.array(table) 19 | name = table[:, 0] 20 | top1 = np.asarray(table[:, 2], dtype=np.float) 21 | top5 = np.asarray(table[:, 3], dtype=np.float) 22 | mac = np.asarray([x[:-1] for x in table[:, 4]], dtype=np.float) 23 | size = np.asarray([x[:-1] for x in table[:, 5]], dtype=np.float) 24 | 25 | groups = [] 26 | groups.append( 27 | ('ResNet', '#EF5350', np.array([('ResNet' in x) and (len(x) < 10) for x in name]))) 28 | groups.append( 29 | ('ResNet2', '#EC407A', np.array([('ResNet' in x) and ('v2' in x) for x in name]))) 30 | groups.append( 31 | ('ResNeXt', '#AB47BC', np.array([('ResNeXt' in x) and ('c32' in x) for x in name]))) 32 | groups.append( 33 | ('Inception', '#5C6BC0', np.array([('Inception' in x) for x in name]))) 34 | groups.append( 35 | ('DenseNet', '#29B6F6', np.array([('DenseNet' in x) for x in name]))) 36 | # groups.append( 37 | # ('MobileNet', '#26A69A', np.array([('MobileNet' in x) and (len(x) < 13) for x in name]))) 38 | groups.append( 39 | ('MobileNet2', '#66BB6A', np.array([('MobileNet' in x) and ('v2' in x) for x in name]))) 40 | groups.append( 41 | ('MobileNet3', '#9CCC65', np.array([('v3large' in x) and ('mini' not in x) for x in name]))) 42 | groups.append( 43 | ('EfficientNet', '#FFA726', np.array([('EfficientNet' in x) for x in name]))) 44 | 45 | f, axarr = plt.subplots(2, 2, figsize=(8, 8)) 46 | for (label, color, index) in groups: 47 | kwargs = {'label': label, 'ls': '--', 'linewidth': 1, 48 | 'marker': 'o', 'markersize': 5, 'color': color} 49 | axarr[0, 0].plot(size[index], top1[index], **kwargs) 50 | axarr[0, 1].plot(size[index], top5[index], **kwargs) 51 | axarr[1, 0].plot(mac[index], top1[index], **kwargs) 52 | axarr[1, 1].plot(mac[index], top5[index], **kwargs) 53 | 54 | axarr[0, 0].legend() 55 | axarr[0, 0].set_xlabel('Size (M)') 56 | axarr[0, 0].set_xscale('log') 57 | axarr[0, 0].set_ylabel('Top-1 (%)') 58 | 59 | axarr[0, 1].set_xlabel('Size (M)') 60 | axarr[0, 1].set_xscale('log') 61 | axarr[0, 1].set_ylabel('Top-5 (%)') 62 | 63 | axarr[1, 0].set_xlabel('MAC (M)') 64 | axarr[1, 0].set_xscale('log') 65 | axarr[1, 0].set_ylabel('Top-1 (%)') 66 | 67 | axarr[1, 1].set_xlabel('MAC (M)') 68 | axarr[1, 1].set_xscale('log') 69 | axarr[1, 1].set_ylabel('Top-5 (%)') 70 | 71 | for i in range(2): 72 | for j in range(2): 73 | axarr[i, j].grid(linestyle=':') 74 | axarr[i, j].minorticks_on() 75 | axarr[i, j].tick_params(axis='both', which='both', direction='in') 76 | 77 | plt.tight_layout() 78 | plt.savefig('summary.png', dpi=200) 79 | -------------------------------------------------------------------------------- /examples/train_yolov2.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import tensorflow as tf 4 | import tensornets as nets 5 | 6 | from tensornets.datasets import voc 7 | 8 | data_dir = "/home/taehoonlee/Data/VOCdevkit/VOC%d" 9 | trains = voc.load_train([data_dir % 2007, data_dir % 2012], 10 | 'trainval', batch_size=48) 11 | 12 | # Define a model 13 | inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) 14 | is_training = tf.placeholder(tf.bool) 15 | model = nets.YOLOv2(inputs, nets.Darknet19, is_training=is_training) 16 | 17 | # Define an optimizer 18 | step = tf.Variable(0, trainable=False) 19 | lr = tf.train.piecewise_constant( 20 | step, [100, 180, 320, 570, 1000, 40000, 60000], 21 | [1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-4, 1e-5]) 22 | train = tf.train.MomentumOptimizer(lr, 0.9).minimize(model.loss, 23 | global_step=step) 24 | 25 | with tf.Session() as sess: 26 | 27 | # Load Darknet19 28 | sess.run(tf.global_variables_initializer()) 29 | sess.run(model.stem.pretrained()) 30 | 31 | # Note that there are 16551 images (5011 in VOC07 + 11540 in VOC12). 32 | # When the mini-batch size is 48, 1 epoch consists of 344(=16551/48) steps. 33 | # Thus, 233 epochs will cover 80152 steps. 34 | losses = [] 35 | for i in range(233): 36 | 37 | # Iterate on VOC07+12 trainval once 38 | _t = time.time() 39 | for (imgs, metas) in trains: 40 | # `trains` returns None when it covers the full batch once 41 | if imgs is None: 42 | break 43 | metas.insert(0, model.preprocess(imgs)) # for `inputs` 44 | metas.append(True) # for `is_training` 45 | outs = sess.run([train, model.loss], 46 | dict(zip(model.inputs, metas))) 47 | losses.append(outs[1]) 48 | 49 | # Report step, learning rate, loss, weight decay, runtime 50 | print('***** %d %.5f %.5f %.5f %.5f *****' % 51 | (sess.run(step), sess.run(lr), 52 | losses[-1], sess.run(tf.losses.get_regularization_loss()), 53 | time.time() - _t)) 54 | 55 | # Report with VOC07 test 56 | results = [] 57 | tests = voc.load(data_dir % 2007, 'test', total_num=100) 58 | for (img, scale) in tests: 59 | outs = sess.run(model, {inputs: model.preprocess(img), 60 | is_training: False}) 61 | results.append(model.get_boxes(outs, img.shape[1:3])) 62 | print(voc.evaluate(results, data_dir % 2007, 'test')) 63 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # Minimum requirements for the build system to execute. 3 | requires = ["setuptools", "cython", "numpy"] # PEP 508 specification 4 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | pep8ignore= 3 | *.py E226 E701 E111 E114 4 | tensornets/contrib_framework/*.py ALL 5 | tensornets/contrib_layers/*.py ALL 6 | pep8maxlinelength=119 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from sys import platform 4 | from setuptools import setup 5 | from setuptools.extension import Extension 6 | from Cython.Build import cythonize 7 | 8 | ext = 'tensornets.references.darkflow_utils' 9 | ext_modules = [Extension("%s.%s" % (ext, n), 10 | sources=["%s/%s.pyx" % (ext.replace('.', '/'), n)], 11 | libraries=[] if platform.startswith("win") else ['m'], 12 | include_dirs=[numpy.get_include()]) 13 | for n in ['nms', 'get_boxes']] 14 | 15 | setup(name='tensornets', 16 | version='0.4.6', 17 | description='high level network definitions in tensorflow', 18 | author='Taehoon Lee', 19 | author_email='me@taehoonlee.com', 20 | url='https://github.com/taehoonlee/tensornets', 21 | download_url='https://github.com/taehoonlee/tensornets/tarball/0.4.6', 22 | license='MIT', 23 | packages=['tensornets', 'tensornets.datasets', 24 | 'tensornets.contrib_framework', 'tensornets.contrib_layers', 25 | 'tensornets.references', ext], 26 | include_package_data=True, 27 | ext_modules=cythonize(ext_modules)) 28 | -------------------------------------------------------------------------------- /summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taehoonlee/tensornets/c9b1d78f806892193efdebee2789a47fd148b984/summary.png -------------------------------------------------------------------------------- /tensornets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .inceptions import GoogLeNet 4 | from .inceptions import Inception1 5 | from .inceptions import Inception2 6 | from .inceptions import Inception3 7 | from .inceptions import Inception4 8 | from .inceptions import InceptionResNet2 9 | 10 | from .resnets import ResNet50 11 | from .resnets import ResNet101 12 | from .resnets import ResNet152 13 | from .resnets import ResNet50v2 14 | from .resnets import ResNet101v2 15 | from .resnets import ResNet152v2 16 | from .resnets import ResNet200v2 17 | from .resnets import ResNeXt50 18 | from .resnets import ResNeXt101 19 | from .resnets import ResNeXt50c32 20 | from .resnets import ResNeXt101c32 21 | from .resnets import ResNeXt101c64 22 | from .resnets import WideResNet50 23 | 24 | from .nasnets import NASNetAlarge 25 | from .nasnets import NASNetAmobile 26 | from .nasnets import PNASNetlarge 27 | 28 | from .vggs import VGG16 29 | from .vggs import VGG19 30 | 31 | from .densenets import DenseNet121 32 | from .densenets import DenseNet169 33 | from .densenets import DenseNet201 34 | 35 | from .mobilenets import MobileNet25 36 | from .mobilenets import MobileNet50 37 | from .mobilenets import MobileNet75 38 | from .mobilenets import MobileNet100 39 | 40 | from .mobilenets import MobileNet35v2 41 | from .mobilenets import MobileNet50v2 42 | from .mobilenets import MobileNet75v2 43 | from .mobilenets import MobileNet100v2 44 | from .mobilenets import MobileNet130v2 45 | from .mobilenets import MobileNet140v2 46 | 47 | from .mobilenets import MobileNet75v3 48 | from .mobilenets import MobileNet100v3 49 | from .mobilenets import MobileNet75v3large 50 | from .mobilenets import MobileNet100v3large 51 | from .mobilenets import MobileNet100v3largemini 52 | from .mobilenets import MobileNet75v3small 53 | from .mobilenets import MobileNet100v3small 54 | from .mobilenets import MobileNet100v3smallmini 55 | 56 | from .efficientnets import EfficientNetB0 57 | from .efficientnets import EfficientNetB1 58 | from .efficientnets import EfficientNetB2 59 | from .efficientnets import EfficientNetB3 60 | from .efficientnets import EfficientNetB4 61 | from .efficientnets import EfficientNetB5 62 | from .efficientnets import EfficientNetB6 63 | from .efficientnets import EfficientNetB7 64 | 65 | from .squeezenets import SqueezeNet 66 | 67 | from .capsulenets import CapsuleNet 68 | 69 | from .wavenets import WaveNet 70 | 71 | from .references import YOLOv3COCO 72 | from .references import YOLOv3VOC 73 | from .references import YOLOv2COCO 74 | from .references import YOLOv2VOC 75 | from .references import TinyYOLOv2COCO 76 | from .references import TinyYOLOv2VOC 77 | 78 | from .references import FasterRCNN_ZF_VOC 79 | from .references import FasterRCNN_VGG16_VOC 80 | 81 | from .darknets import Darknet19 82 | from .darknets import TinyDarknet19 83 | 84 | from .zf import ZF 85 | 86 | from .detections import YOLOv2 87 | from .detections import TinyYOLOv2 88 | from .detections import FasterRCNN 89 | 90 | from .preprocess import preprocess 91 | from .pretrained import assign as pretrained 92 | 93 | from .utils import * 94 | 95 | __version__ = '0.4.6' 96 | 97 | remove_utils(__name__, ['init']) 98 | -------------------------------------------------------------------------------- /tensornets/capsulenets.py: -------------------------------------------------------------------------------- 1 | """Collection of CapsuleNet variants 2 | 3 | The reference paper: 4 | 5 | - Dynamic Routing Between Capsules 6 | - Sara Sabour, Nicholas Frosst, Geoffrey E. Hinton 7 | - https://arxiv.org/abs/1710.09829 8 | 9 | The reference implementations: 10 | 11 | 1. TensorFlow CapsNet 12 | - https://github.com/naturomics/CapsNet-Tensorflow 13 | 2. Keras CapsNet 14 | - https://github.com/XifengGuo/CapsNet-Keras 15 | """ 16 | from __future__ import absolute_import 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from .layers import batch_norm 22 | from .layers import conv2d 23 | from .layers import convrelu as conv 24 | 25 | from .ops import * 26 | from .utils import ops_to_outputs 27 | from .utils import set_args 28 | from .utils import var_scope 29 | 30 | 31 | def __args__(is_training): 32 | return [([batch_norm], {'scale': True, 'is_training': is_training, 33 | 'epsilon': 1e-5, 'scope': 'bn'}), 34 | ([conv2d], {'padding': 'VALID', 'activation_fn': None, 35 | 'biases_initializer': None, 'scope': 'conv'})] 36 | 37 | 38 | @ops_to_outputs 39 | def squash(x, epsilon=1e-9, name=None): 40 | norm = tf.reduce_sum(tf.square(x), axis=-1, keep_dims=True) 41 | scale = norm / (1. + norm) / tf.sqrt(norm + epsilon) 42 | return tf.multiply(x, scale, name=name) 43 | 44 | 45 | @var_scope('primary') 46 | def primary(x, filters, length, kernel_size, stride, scope=None): 47 | x = conv(x, filters * length, kernel_size, stride=stride, scope='conv') 48 | pixels = np.prod(x.shape[1:-1].as_list()) 49 | x = reshape(x, (-1, pixels * filters, length), name='out') 50 | return x 51 | 52 | 53 | @var_scope('digit') 54 | def digit(x, filters, length, iters=3, scope=None): 55 | filters0 = int(x.shape[1]) if tf_later_than('2') else x.shape[1].value 56 | length0 = int(x.shape[2]) if tf_later_than('2') else x.shape[2].value 57 | 58 | # fully-connected weights between capsules: [1152, 8, 10 * 16] 59 | w = tf.get_variable('weights', shape=(filters0, length0, filters * length), 60 | dtype=tf.float32) 61 | 62 | # coupling logits: [1152, 10] 63 | b = tf.zeros((filters0, filters)) 64 | 65 | # prediction vectors: [None, 1152, 10, 16] 66 | uhat = tf.scan(lambda a, b: tf.matmul(b, w), tf.expand_dims(x, 2), 67 | initializer=tf.zeros([filters0, 1, filters * length])) 68 | uhat = reshape(uhat, (-1, filters0, filters, length), name='predvec') 69 | 70 | for r in range(iters): 71 | with tf.variable_scope("iter%d" % r): 72 | # coupling coefficients: [1152, 10] 73 | c = softmax(b, name='softmax') 74 | # activity vector: [None, 10, 16] 75 | v = squash(tf.reduce_sum(uhat * tf.expand_dims(c, -1), axis=1), 76 | name='out') 77 | # agreement: [None, 1152, 10] 78 | a = reduce_sum(tf.multiply(uhat, tf.expand_dims(v, 1)), axis=-1, 79 | name='agreement') 80 | # updates coupling logits 81 | b = b + reduce_sum(a, axis=0, name='delta') 82 | return v 83 | 84 | 85 | @var_scope('capsulenet') 86 | @set_args(__args__) 87 | def capsulenet_mnist(x, is_training=False, classes=10, scope=None, reuse=None): 88 | x = conv(x, 256, 9, stride=1, scope='conv1') 89 | x = primary(x, 32, 8, 9, stride=2, scope='primary') 90 | x = digit(x, 10, 16, scope='digit') 91 | return x 92 | 93 | 94 | # Simple alias. 95 | CapsuleNet = capsulenet_mnist 96 | -------------------------------------------------------------------------------- /tensornets/contrib_framework/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A module containing TensorFlow ops whose API may change in the future.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # TODO(ptucker): Add these to tf.contrib.variables? 22 | # pylint: disable=wildcard-import 23 | from .arg_scope import * 24 | #from .checkpoint_ops import * 25 | #from .ops import * 26 | #from .prettyprint_ops import * 27 | #from .script_ops import * 28 | #from .sort_ops import * 29 | from .variables import * 30 | # pylint: enable=wildcard-import 31 | -------------------------------------------------------------------------------- /tensornets/contrib_framework/arg_scope.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the arg_scope used for scoping layers arguments. 16 | 17 | Allows one to define models much more compactly by eliminating boilerplate 18 | code. This is accomplished through the use of argument scoping (arg_scope). 19 | 20 | Example of how to use tf.contrib.framework.arg_scope: 21 | 22 | ``` 23 | from third_party.tensorflow.contrib.layers.python import layers 24 | 25 | arg_scope = tf.contrib.framework.arg_scope 26 | 27 | with arg_scope([layers.conv2d], padding='SAME', 28 | initializer=layers.variance_scaling_initializer(), 29 | regularizer=layers.l2_regularizer(0.05)): 30 | net = layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') 31 | net = layers.conv2d(net, 256, [5, 5], scope='conv2') 32 | ``` 33 | The first call to conv2d will behave as follows: 34 | layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 35 | initializer=layers.variance_scaling_initializer(), 36 | regularizer=layers.l2_regularizer(0.05), scope='conv1') 37 | 38 | The second call to conv2d will also use the arg_scope's default for padding: 39 | layers.conv2d(inputs, 256, [5, 5], padding='SAME', 40 | initializer=layers.variance_scaling_initializer(), 41 | regularizer=layers.l2_regularizer(0.05), scope='conv2') 42 | 43 | Example of how to reuse an arg_scope: 44 | 45 | ``` 46 | with arg_scope([layers.conv2d], padding='SAME', 47 | initializer=layers.variance_scaling_initializer(), 48 | regularizer=layers.l2_regularizer(0.05)) as sc: 49 | net = layers.conv2d(net, 256, [5, 5], scope='conv1') 50 | .... 51 | 52 | with arg_scope(sc): 53 | net = layers.conv2d(net, 256, [5, 5], scope='conv2') 54 | ``` 55 | 56 | Example of how to use tf.contrib.framework.add_arg_scope to enable your 57 | function to be called within an arg_scope later: 58 | 59 | @tf.contrib.framework.add_arg_scope 60 | def conv2d(*args, **kwargs) 61 | """ 62 | from __future__ import absolute_import 63 | from __future__ import division 64 | from __future__ import print_function 65 | 66 | from tensorflow.python.util import tf_contextlib 67 | from tensorflow.python.util import tf_decorator 68 | 69 | __all__ = [ 70 | 'arg_scope', 'add_arg_scope', 'current_arg_scope', 'has_arg_scope', 71 | 'arg_scoped_arguments', 'arg_scope_func_key' 72 | ] 73 | 74 | _ARGSTACK = [{}] 75 | 76 | _DECORATED_OPS = {} 77 | 78 | 79 | def _get_arg_stack(): 80 | if _ARGSTACK: 81 | return _ARGSTACK 82 | else: 83 | _ARGSTACK.append({}) 84 | return _ARGSTACK 85 | 86 | 87 | def current_arg_scope(): 88 | stack = _get_arg_stack() 89 | return stack[-1] 90 | 91 | 92 | def arg_scope_func_key(op): 93 | return getattr(op, '_key_op', str(op)) 94 | 95 | 96 | def _name_op(op): 97 | return (op.__module__, op.__name__) 98 | 99 | 100 | def _kwarg_names(func): 101 | kwargs_length = len(func.__defaults__) if func.__defaults__ else 0 102 | return func.__code__.co_varnames[-kwargs_length:func.__code__.co_argcount] 103 | 104 | 105 | def _add_op(op): 106 | key_op = arg_scope_func_key(op) 107 | _DECORATED_OPS[key_op] = _kwarg_names(op) 108 | 109 | 110 | @tf_contextlib.contextmanager 111 | def arg_scope(list_ops_or_scope, **kwargs): 112 | """Stores the default arguments for the given set of list_ops. 113 | 114 | For usage, please see examples at top of the file. 115 | 116 | Args: 117 | list_ops_or_scope: List or tuple of operations to set argument scope for or 118 | a dictionary containing the current scope. When list_ops_or_scope is a 119 | dict, kwargs must be empty. When list_ops_or_scope is a list or tuple, 120 | then every op in it need to be decorated with @add_arg_scope to work. 121 | **kwargs: keyword=value that will define the defaults for each op in 122 | list_ops. All the ops need to accept the given set of arguments. 123 | 124 | Yields: 125 | the current_scope, which is a dictionary of {op: {arg: value}} 126 | Raises: 127 | TypeError: if list_ops is not a list or a tuple. 128 | ValueError: if any op in list_ops has not be decorated with @add_arg_scope. 129 | """ 130 | if isinstance(list_ops_or_scope, dict): 131 | # Assumes that list_ops_or_scope is a scope that is being reused. 132 | if kwargs: 133 | raise ValueError('When attempting to re-use a scope by suppling a' 134 | 'dictionary, kwargs must be empty.') 135 | current_scope = list_ops_or_scope.copy() 136 | try: 137 | _get_arg_stack().append(current_scope) 138 | yield current_scope 139 | finally: 140 | _get_arg_stack().pop() 141 | else: 142 | # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs. 143 | if not isinstance(list_ops_or_scope, (list, tuple)): 144 | raise TypeError('list_ops_or_scope must either be a list/tuple or reused ' 145 | 'scope (i.e. dict)') 146 | try: 147 | current_scope = current_arg_scope().copy() 148 | for op in list_ops_or_scope: 149 | key = arg_scope_func_key(op) 150 | if not has_arg_scope(op): 151 | raise ValueError('%s is not decorated with @add_arg_scope', 152 | _name_op(op)) 153 | if key in current_scope: 154 | current_kwargs = current_scope[key].copy() 155 | current_kwargs.update(kwargs) 156 | current_scope[key] = current_kwargs 157 | else: 158 | current_scope[key] = kwargs.copy() 159 | _get_arg_stack().append(current_scope) 160 | yield current_scope 161 | finally: 162 | _get_arg_stack().pop() 163 | 164 | 165 | def add_arg_scope(func): 166 | """Decorates a function with args so it can be used within an arg_scope. 167 | 168 | Args: 169 | func: function to decorate. 170 | 171 | Returns: 172 | A tuple with the decorated function func_with_args(). 173 | """ 174 | 175 | def func_with_args(*args, **kwargs): 176 | current_scope = current_arg_scope() 177 | current_args = kwargs 178 | key_func = arg_scope_func_key(func) 179 | if key_func in current_scope: 180 | current_args = current_scope[key_func].copy() 181 | current_args.update(kwargs) 182 | return func(*args, **current_args) 183 | 184 | _add_op(func) 185 | setattr(func_with_args, '_key_op', arg_scope_func_key(func)) 186 | return tf_decorator.make_decorator(func, func_with_args) 187 | 188 | 189 | def has_arg_scope(func): 190 | """Checks whether a func has been decorated with @add_arg_scope or not. 191 | 192 | Args: 193 | func: function to check. 194 | 195 | Returns: 196 | a boolean. 197 | """ 198 | return arg_scope_func_key(func) in _DECORATED_OPS 199 | 200 | 201 | def arg_scoped_arguments(func): 202 | """Returns the list kwargs that arg_scope can set for a func. 203 | 204 | Args: 205 | func: function which has been decorated with @add_arg_scope. 206 | 207 | Returns: 208 | a list of kwargs names. 209 | """ 210 | assert has_arg_scope(func) 211 | return _DECORATED_OPS[arg_scope_func_key(func)] 212 | -------------------------------------------------------------------------------- /tensornets/contrib_layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """layers module with higher level NN primitives.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=wildcard-import 22 | #from .embedding_ops import * 23 | #from .encoders import * 24 | #from .feature_column import * 25 | #from .feature_column_ops import * 26 | from .initializers import * 27 | from .layers import * 28 | from .normalization import * 29 | from .optimizers import * 30 | from .regularizers import * 31 | from .rev_block_lib import * 32 | from .summaries import * 33 | #from .target_column import * 34 | #from tensorflow.contrib.layers.python.ops.bucketization_op import * 35 | #from tensorflow.contrib.layers.python.ops.sparse_feature_cross_op import * 36 | # pylint: enable=wildcard-import 37 | -------------------------------------------------------------------------------- /tensornets/contrib_layers/initializers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Weight initializers for use with layers.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import math 22 | 23 | from tensorflow.python.framework import dtypes 24 | from tensorflow.python.ops import random_ops 25 | 26 | 27 | __all__ = ['xavier_initializer', 'xavier_initializer_conv2d', 28 | 'variance_scaling_initializer'] 29 | 30 | 31 | def xavier_initializer(uniform=True, seed=None, dtype=dtypes.float32): 32 | """Returns an initializer performing "Xavier" initialization for weights. 33 | 34 | This function implements the weight initialization from: 35 | 36 | Xavier Glorot and Yoshua Bengio (2010): 37 | [Understanding the difficulty of training deep feedforward neural 38 | networks. International conference on artificial intelligence and 39 | statistics.]( 40 | http://www.jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf) 41 | 42 | This initializer is designed to keep the scale of the gradients roughly the 43 | same in all layers. In uniform distribution this ends up being the range: 44 | `x = sqrt(6. / (in + out)); [-x, x]` and for normal distribution a standard 45 | deviation of `sqrt(2. / (in + out))` is used. 46 | 47 | Args: 48 | uniform: Whether to use uniform or normal distributed random initialization. 49 | seed: A Python integer. Used to create random seeds. See 50 | `tf.compat.v1.set_random_seed` for behavior. 51 | dtype: The data type. Only floating point types are supported. 52 | 53 | Returns: 54 | An initializer for a weight matrix. 55 | """ 56 | return variance_scaling_initializer(factor=1.0, mode='FAN_AVG', 57 | uniform=uniform, seed=seed, dtype=dtype) 58 | 59 | xavier_initializer_conv2d = xavier_initializer 60 | 61 | 62 | def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False, 63 | seed=None, dtype=dtypes.float32): 64 | """Returns an initializer that generates tensors without scaling variance. 65 | 66 | When initializing a deep network, it is in principle advantageous to keep 67 | the scale of the input variance constant, so it does not explode or diminish 68 | by reaching the final layer. This initializer use the following formula: 69 | 70 | ```python 71 | if mode='FAN_IN': # Count only number of input connections. 72 | n = fan_in 73 | elif mode='FAN_OUT': # Count only number of output connections. 74 | n = fan_out 75 | elif mode='FAN_AVG': # Average number of inputs and output connections. 76 | n = (fan_in + fan_out)/2.0 77 | 78 | truncated_normal(shape, 0.0, stddev=sqrt(factor / n)) 79 | ``` 80 | 81 | * To get [Delving Deep into Rectifiers]( 82 | http://arxiv.org/pdf/1502.01852v1.pdf) (also know as the "MSRA 83 | initialization"), use (Default):
84 | `factor=2.0 mode='FAN_IN' uniform=False` 85 | * To get [Convolutional Architecture for Fast Feature Embedding]( 86 | http://arxiv.org/abs/1408.5093), use:
87 | `factor=1.0 mode='FAN_IN' uniform=True` 88 | * To get [Understanding the difficulty of training deep feedforward neural 89 | networks](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf), 90 | use:
91 | `factor=1.0 mode='FAN_AVG' uniform=True.` 92 | * To get `xavier_initializer` use either:
93 | `factor=1.0 mode='FAN_AVG' uniform=True`, or
94 | `factor=1.0 mode='FAN_AVG' uniform=False`. 95 | 96 | Args: 97 | factor: Float. A multiplicative factor. 98 | mode: String. 'FAN_IN', 'FAN_OUT', 'FAN_AVG'. 99 | uniform: Whether to use uniform or normal distributed random initialization. 100 | seed: A Python integer. Used to create random seeds. See 101 | `tf.compat.v1.set_random_seed` for behavior. 102 | dtype: The data type. Only floating point types are supported. 103 | 104 | Returns: 105 | An initializer that generates tensors with unit variance. 106 | 107 | Raises: 108 | ValueError: if `dtype` is not a floating point type. 109 | TypeError: if `mode` is not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']. 110 | """ 111 | if not dtype.is_floating: 112 | raise TypeError('Cannot create initializer for non-floating point type.') 113 | if mode not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']: 114 | raise TypeError('Unknown mode %s [FAN_IN, FAN_OUT, FAN_AVG]', mode) 115 | 116 | # pylint: disable=unused-argument 117 | def _initializer(shape, dtype=dtype, partition_info=None): 118 | """Initializer function.""" 119 | if not dtype.is_floating: 120 | raise TypeError('Cannot create initializer for non-floating point type.') 121 | # Estimating fan_in and fan_out is not possible to do perfectly, but we try. 122 | # This is the right thing for matrix multiply and convolutions. 123 | if shape: 124 | fan_in = float(shape[-2]) if len(shape) > 1 else float(shape[-1]) 125 | fan_out = float(shape[-1]) 126 | else: 127 | fan_in = 1.0 128 | fan_out = 1.0 129 | for dim in shape[:-2]: 130 | fan_in *= float(dim) 131 | fan_out *= float(dim) 132 | if mode == 'FAN_IN': 133 | # Count only number of input connections. 134 | n = fan_in 135 | elif mode == 'FAN_OUT': 136 | # Count only number of output connections. 137 | n = fan_out 138 | elif mode == 'FAN_AVG': 139 | # Average number of inputs and output connections. 140 | n = (fan_in + fan_out) / 2.0 141 | if uniform: 142 | # To get stddev = math.sqrt(factor / n) need to adjust for uniform. 143 | limit = math.sqrt(3.0 * factor / n) 144 | return random_ops.random_uniform(shape, -limit, limit, 145 | dtype, seed=seed) 146 | else: 147 | # To get stddev = math.sqrt(factor / n) need to adjust for truncated. 148 | trunc_stddev = math.sqrt(1.3 * factor / n) 149 | return random_ops.truncated_normal(shape, 0.0, trunc_stddev, dtype, 150 | seed=seed) 151 | # pylint: enable=unused-argument 152 | 153 | return _initializer 154 | -------------------------------------------------------------------------------- /tensornets/contrib_layers/regularizers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Regularizers for use with layers.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import numbers 22 | 23 | from tensorflow.python.framework import constant_op 24 | from tensorflow.python.framework import ops 25 | from tensorflow.python.ops import math_ops 26 | from tensorflow.python.ops import nn 27 | from tensorflow.python.ops import standard_ops 28 | from tensorflow.python.platform import tf_logging as logging 29 | 30 | __all__ = ['l1_regularizer', 31 | 'l2_regularizer', 32 | 'l1_l2_regularizer', 33 | 'sum_regularizer', 34 | 'apply_regularization'] 35 | 36 | 37 | def l1_regularizer(scale, scope=None): 38 | """Returns a function that can be used to apply L1 regularization to weights. 39 | 40 | L1 regularization encourages sparsity. 41 | 42 | Args: 43 | scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer. 44 | scope: An optional scope name. 45 | 46 | Returns: 47 | A function with signature `l1(weights)` that apply L1 regularization. 48 | 49 | Raises: 50 | ValueError: If scale is negative or if scale is not a float. 51 | """ 52 | if isinstance(scale, numbers.Integral): 53 | raise ValueError('scale cannot be an integer: %s' % scale) 54 | if isinstance(scale, numbers.Real): 55 | if scale < 0.: 56 | raise ValueError('Setting a scale less than 0 on a regularizer: %g' % 57 | scale) 58 | if scale == 0.: 59 | logging.info('Scale of 0 disables regularizer.') 60 | return lambda _: None 61 | 62 | def l1(weights, name=None): 63 | """Applies L1 regularization to weights.""" 64 | with ops.name_scope(scope, 'l1_regularizer', [weights]) as name: 65 | my_scale = ops.convert_to_tensor(scale, 66 | dtype=weights.dtype.base_dtype, 67 | name='scale') 68 | return standard_ops.multiply( 69 | my_scale, 70 | standard_ops.reduce_sum(standard_ops.abs(weights)), 71 | name=name) 72 | 73 | return l1 74 | 75 | 76 | def l2_regularizer(scale, scope=None): 77 | """Returns a function that can be used to apply L2 regularization to weights. 78 | 79 | Small values of L2 can help prevent overfitting the training data. 80 | 81 | Args: 82 | scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer. 83 | scope: An optional scope name. 84 | 85 | Returns: 86 | A function with signature `l2(weights)` that applies L2 regularization. 87 | 88 | Raises: 89 | ValueError: If scale is negative or if scale is not a float. 90 | """ 91 | if isinstance(scale, numbers.Integral): 92 | raise ValueError('scale cannot be an integer: %s' % (scale,)) 93 | if isinstance(scale, numbers.Real): 94 | if scale < 0.: 95 | raise ValueError('Setting a scale less than 0 on a regularizer: %g.' % 96 | scale) 97 | if scale == 0.: 98 | logging.info('Scale of 0 disables regularizer.') 99 | return lambda _: None 100 | 101 | def l2(weights): 102 | """Applies l2 regularization to weights.""" 103 | with ops.name_scope(scope, 'l2_regularizer', [weights]) as name: 104 | my_scale = ops.convert_to_tensor(scale, 105 | dtype=weights.dtype.base_dtype, 106 | name='scale') 107 | return standard_ops.multiply(my_scale, nn.l2_loss(weights), name=name) 108 | 109 | return l2 110 | 111 | 112 | def l1_l2_regularizer(scale_l1=1.0, scale_l2=1.0, scope=None): 113 | """Returns a function that can be used to apply L1 L2 regularizations. 114 | 115 | Args: 116 | scale_l1: A scalar multiplier `Tensor` for L1 regularization. 117 | scale_l2: A scalar multiplier `Tensor` for L2 regularization. 118 | scope: An optional scope name. 119 | 120 | Returns: 121 | A function with signature `l1_l2(weights)` that applies a weighted sum of 122 | L1 L2 regularization. 123 | 124 | Raises: 125 | ValueError: If scale is negative or if scale is not a float. 126 | """ 127 | if isinstance(scale_l1, numbers.Integral): 128 | raise ValueError('scale_l1 cannot be an integer: %s' % (scale_l1,)) 129 | if isinstance(scale_l2, numbers.Integral): 130 | raise ValueError('scale_l2 cannot be an integer: %s' % (scale_l2,)) 131 | scope = scope or 'l1_l2_regularizer' 132 | if scale_l1 == 0.: 133 | return l2_regularizer(scale_l2, scope) 134 | if scale_l2 == 0.: 135 | return l1_regularizer(scale_l1, scope) 136 | return sum_regularizer([l1_regularizer(scale_l1), 137 | l2_regularizer(scale_l2)], 138 | scope=scope) 139 | 140 | 141 | def sum_regularizer(regularizer_list, scope=None): 142 | """Returns a function that applies the sum of multiple regularizers. 143 | 144 | Args: 145 | regularizer_list: A list of regularizers to apply. 146 | scope: An optional scope name 147 | 148 | Returns: 149 | A function with signature `sum_reg(weights)` that applies the 150 | sum of all the input regularizers. 151 | """ 152 | regularizer_list = [reg for reg in regularizer_list if reg is not None] 153 | if not regularizer_list: 154 | return None 155 | 156 | def sum_reg(weights): 157 | """Applies the sum of all the input regularizers.""" 158 | with ops.name_scope(scope, 'sum_regularizer', [weights]) as name: 159 | regularizer_tensors = [] 160 | for reg in regularizer_list: 161 | tensor = reg(weights) 162 | if tensor is not None: 163 | regularizer_tensors.append(tensor) 164 | return math_ops.add_n( 165 | regularizer_tensors, name=name) if regularizer_tensors else None 166 | 167 | return sum_reg 168 | 169 | 170 | def apply_regularization(regularizer, weights_list=None): 171 | """Returns the summed penalty by applying `regularizer` to the `weights_list`. 172 | 173 | Adding a regularization penalty over the layer weights and embedding weights 174 | can help prevent overfitting the training data. Regularization over layer 175 | biases is less common/useful, but assuming proper data preprocessing/mean 176 | subtraction, it usually shouldn't hurt much either. 177 | 178 | Args: 179 | regularizer: A function that takes a single `Tensor` argument and returns 180 | a scalar `Tensor` output. 181 | weights_list: List of weights `Tensors` or `Variables` to apply 182 | `regularizer` over. Defaults to the `GraphKeys.WEIGHTS` collection if 183 | `None`. 184 | 185 | Returns: 186 | A scalar representing the overall regularization penalty. 187 | 188 | Raises: 189 | ValueError: If `regularizer` does not return a scalar output, or if we find 190 | no weights. 191 | """ 192 | if not weights_list: 193 | weights_list = ops.get_collection(ops.GraphKeys.WEIGHTS) 194 | if not weights_list: 195 | raise ValueError('No weights to regularize.') 196 | with ops.name_scope('get_regularization_penalty', 197 | values=weights_list) as scope: 198 | penalties = [regularizer(w) for w in weights_list] 199 | penalties = [ 200 | p if p is not None else constant_op.constant(0.0) for p in penalties 201 | ] 202 | for p in penalties: 203 | if p.get_shape().ndims != 0: 204 | raise ValueError('regularizer must return a scalar Tensor instead of a ' 205 | 'Tensor with rank %d.' % p.get_shape().ndims) 206 | 207 | summed_penalty = math_ops.add_n(penalties, name=scope) 208 | ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, summed_penalty) 209 | return summed_penalty 210 | -------------------------------------------------------------------------------- /tensornets/contrib_layers/summaries.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Utility functions for summary creation.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import functools 22 | import re 23 | 24 | from tensorflow.python.framework import dtypes 25 | from tensorflow.python.framework import ops 26 | from tensorflow.python.ops import standard_ops 27 | from tensorflow.python.summary import summary 28 | 29 | __all__ = [ 30 | 'summarize_tensor', 31 | 'summarize_activation', 32 | 'summarize_tensors', 33 | 'summarize_collection', 34 | 'summarize_variables', 35 | 'summarize_weights', 36 | 'summarize_biases', 37 | 'summarize_activations', 38 | ] 39 | 40 | # TODO(wicke): add more unit tests for summarization functions. 41 | 42 | 43 | def _add_scalar_summary(tensor, tag=None): 44 | """Add a scalar summary operation for the tensor. 45 | 46 | Args: 47 | tensor: The tensor to summarize. 48 | tag: The tag to use, if None then use tensor's op's name. 49 | 50 | Returns: 51 | The created histogram summary. 52 | 53 | Raises: 54 | ValueError: If the tag is already in use or the rank is not 0. 55 | """ 56 | tensor.get_shape().assert_has_rank(0) 57 | tag = tag or '%s_summary' % tensor.op.name 58 | return summary.scalar(tag, tensor) 59 | 60 | 61 | def _add_histogram_summary(tensor, tag=None): 62 | """Add a summary operation for the histogram of a tensor. 63 | 64 | Args: 65 | tensor: The tensor to summarize. 66 | tag: The tag to use, if None then use tensor's op's name. 67 | 68 | Returns: 69 | The created histogram summary. 70 | 71 | Raises: 72 | ValueError: If the tag is already in use. 73 | """ 74 | tag = tag or '%s_summary' % tensor.op.name 75 | return summary.histogram(tag, tensor) 76 | 77 | 78 | def summarize_activation(op): 79 | """Summarize an activation. 80 | 81 | This applies the given activation and adds useful summaries specific to the 82 | activation. 83 | 84 | Args: 85 | op: The tensor to summarize (assumed to be a layer activation). 86 | Returns: 87 | The summary op created to summarize `op`. 88 | """ 89 | if op.op.type in ('Relu', 'Softplus', 'Relu6'): 90 | # Using inputs to avoid floating point equality and/or epsilons. 91 | _add_scalar_summary( 92 | standard_ops.reduce_mean( 93 | standard_ops.to_float( 94 | standard_ops.less(op.op.inputs[ 95 | 0], standard_ops.cast(0.0, op.op.inputs[0].dtype)))), 96 | '%s/zeros' % op.op.name) 97 | if op.op.type == 'Relu6': 98 | _add_scalar_summary( 99 | standard_ops.reduce_mean( 100 | standard_ops.to_float( 101 | standard_ops.greater(op.op.inputs[ 102 | 0], standard_ops.cast(6.0, op.op.inputs[0].dtype)))), 103 | '%s/sixes' % op.op.name) 104 | return _add_histogram_summary(op, '%s/activation' % op.op.name) 105 | 106 | 107 | def summarize_tensor(tensor, tag=None): 108 | """Summarize a tensor using a suitable summary type. 109 | 110 | This function adds a summary op for `tensor`. The type of summary depends on 111 | the shape of `tensor`. For scalars, a `scalar_summary` is created, for all 112 | other tensors, `histogram_summary` is used. 113 | 114 | Args: 115 | tensor: The tensor to summarize 116 | tag: The tag to use, if None then use tensor's op's name. 117 | 118 | Returns: 119 | The summary op created or None for string tensors. 120 | """ 121 | # Skips string tensors and boolean tensors (not handled by the summaries). 122 | if (tensor.dtype.is_compatible_with(dtypes.string) or 123 | tensor.dtype.base_dtype == dtypes.bool): 124 | return None 125 | 126 | if tensor.get_shape().ndims == 0: 127 | # For scalars, use a scalar summary. 128 | return _add_scalar_summary(tensor, tag) 129 | else: 130 | # We may land in here if the rank is still unknown. The histogram won't 131 | # hurt if this ends up being a scalar. 132 | return _add_histogram_summary(tensor, tag) 133 | 134 | 135 | def summarize_tensors(tensors, summarizer=summarize_tensor): 136 | """Summarize a set of tensors.""" 137 | return [summarizer(tensor) for tensor in tensors] 138 | 139 | 140 | def summarize_collection(collection, 141 | name_filter=None, 142 | summarizer=summarize_tensor): 143 | """Summarize a graph collection of tensors, possibly filtered by name.""" 144 | tensors = [] 145 | for op in ops.get_collection(collection): 146 | if name_filter is None or re.match(name_filter, op.op.name): 147 | tensors.append(op) 148 | return summarize_tensors(tensors, summarizer) 149 | 150 | 151 | # Utility functions for commonly used collections 152 | summarize_variables = functools.partial(summarize_collection, 153 | ops.GraphKeys.GLOBAL_VARIABLES) 154 | 155 | summarize_weights = functools.partial(summarize_collection, 156 | ops.GraphKeys.WEIGHTS) 157 | 158 | summarize_biases = functools.partial(summarize_collection, ops.GraphKeys.BIASES) 159 | 160 | 161 | def summarize_activations(name_filter=None, summarizer=summarize_activation): 162 | """Summarize activations, using `summarize_activation` to summarize.""" 163 | return summarize_collection(ops.GraphKeys.ACTIVATIONS, name_filter, 164 | summarizer) 165 | -------------------------------------------------------------------------------- /tensornets/darknets.py: -------------------------------------------------------------------------------- 1 | """Darknet19 embedded in YOLO 2 | 3 | The reference paper: 4 | 5 | - YOLO9000: Better, Faster, Stronger, CVPR 2017 (Best Paper Honorable Mention) 6 | - Joseph Redmon, Ali Farhadi 7 | - https://arxiv.org/abs/1612.08242 8 | 9 | The reference implementation: 10 | 11 | 1. Darknet 12 | - https://pjreddie.com/darknet/yolo/ 13 | """ 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | 17 | import tensorflow as tf 18 | 19 | from .layers import batch_norm 20 | from .layers import bias_add 21 | from .layers import conv2d 22 | from .layers import darkconv as conv 23 | from .layers import fc 24 | from .layers import max_pool2d as pool 25 | 26 | from .ops import * 27 | from .utils import set_args 28 | from .utils import var_scope 29 | 30 | 31 | def __args__(is_training): 32 | return [([batch_norm], {'is_training': is_training}), 33 | ([bias_add, conv2d], {}), 34 | ([pool], {'padding': 'SAME'})] 35 | 36 | 37 | @var_scope('stack') 38 | def _stack(x, filters, blocks, scope=None): 39 | for i in range(1, blocks+1): 40 | if i % 2 > 0: 41 | x = conv(x, filters, 3, scope=str(i)) 42 | else: 43 | x = conv(x, filters // 2, 1, scope=str(i)) 44 | return x 45 | 46 | 47 | @var_scope('darknet19') 48 | @set_args(__args__) 49 | def darknet19(x, is_training=False, classes=1000, 50 | stem=False, scope=None, reuse=None): 51 | x = _stack(x, 32, 1, scope='conv1') 52 | x = pool(x, 2, stride=2, scope='pool1') 53 | x = _stack(x, 64, 1, scope='conv2') 54 | x = pool(x, 2, stride=2, scope='pool2') 55 | x = _stack(x, 128, 3, scope='conv3') 56 | x = pool(x, 2, stride=2, scope='pool3') 57 | x = _stack(x, 256, 3, scope='conv4') 58 | x = pool(x, 2, stride=2, scope='pool4') 59 | x = _stack(x, 512, 5, scope='conv5') 60 | x = pool(x, 2, stride=2, scope='pool5') 61 | x = _stack(x, 1024, 5, scope='conv6') 62 | if stem: return x 63 | 64 | x = reduce_mean(x, [1, 2], name='avgpool') 65 | x = fc(x, classes, scope='logits') 66 | x = softmax(x, name='probs') 67 | return x 68 | 69 | 70 | @var_scope('tinydarknet19') 71 | @set_args(__args__) 72 | def tinydarknet19(x, is_training=False, classes=1000, 73 | stem=False, scope=None, reuse=None): 74 | x = conv(x, 16, 3, scope='conv1') 75 | x = pool(x, 2, stride=2, scope='pool1') 76 | x = conv(x, 32, 3, scope='conv2') 77 | x = pool(x, 2, stride=2, scope='pool2') 78 | x = conv(x, 64, 3, scope='conv3') 79 | x = pool(x, 2, stride=2, scope='pool3') 80 | x = conv(x, 128, 3, scope='conv4') 81 | x = pool(x, 2, stride=2, scope='pool4') 82 | x = conv(x, 256, 3, scope='conv5') 83 | x = pool(x, 2, stride=2, scope='pool5') 84 | x = conv(x, 512, 3, scope='conv6') 85 | if stem: return x 86 | 87 | x = reduce_mean(x, [1, 2], name='avgpool') 88 | x = fc(x, classes, scope='logits') 89 | x = softmax(x, name='probs') 90 | return x 91 | 92 | 93 | # Simple alias. 94 | Darknet19 = darknet19 95 | TinyDarknet19 = tinydarknet19 96 | -------------------------------------------------------------------------------- /tensornets/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import coco 4 | from . import imagenet 5 | from . import voc 6 | -------------------------------------------------------------------------------- /tensornets/datasets/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /tensornets/datasets/coco.py: -------------------------------------------------------------------------------- 1 | """Collection of MS COCO utils 2 | 3 | The codes were adapted from [py-faster-rcnn](https://github.com/ 4 | rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py). 5 | """ 6 | from __future__ import division 7 | 8 | import os 9 | import json 10 | import numpy as np 11 | 12 | try: 13 | import cv2 14 | except ImportError: 15 | cv2 = None 16 | 17 | try: 18 | from pycocotools.coco import COCO 19 | except ImportError: 20 | COCO = None 21 | 22 | try: 23 | xrange # Python 2 24 | except NameError: 25 | xrange = range # Python 3 26 | 27 | 28 | metas = {} 29 | 30 | with open(os.path.join(os.path.dirname(__file__), 'coco.names'), 'r') as f: 31 | classnames = [line.rstrip() for line in f.readlines()] 32 | 33 | 34 | def classidx(classname): 35 | return dict((k, i) for (i, k) in enumerate(classnames))[classname] 36 | 37 | 38 | def area(box): 39 | if box.ndim == 1: 40 | return (box[2] - box[0] + 1.) * (box[3] - box[1] + 1.) 41 | else: 42 | return (box[:, 2] - box[:, 0] + 1.) * (box[:, 3] - box[:, 1] + 1.) 43 | 44 | 45 | def get_files(data_dir, data_name, total_num=None): 46 | assert COCO is not None, '`datasets.coco` requires `pycocotools`.' 47 | if data_name not in metas: 48 | metas[data_name] = COCO("%s/annotations/instances_%s.json" % 49 | (data_dir, data_name)) 50 | images = metas[data_name].imgs 51 | fileids = images.keys() 52 | if total_num is not None: 53 | fileids = fileids[:total_num] 54 | files = [images[i]['file_name'] for i in fileids] 55 | return fileids, files 56 | 57 | 58 | def get_annotations(data_dir, data_name, ids): 59 | assert COCO is not None, '`datasets.coco` requires `pycocotools`.' 60 | if data_name not in metas: 61 | metas[data_name] = COCO("%s/annotations/instances_%s.json" % 62 | (data_dir, data_name)) 63 | cmap = dict([(b, a) for (a, b) in enumerate(metas[data_name].getCatIds())]) 64 | annotations = {} 65 | for i in ids: 66 | annids = metas[data_name].getAnnIds(imgIds=i, iscrowd=None) 67 | objs = metas[data_name].loadAnns(annids) 68 | annotations[i] = [[] for _ in range(80)] 69 | width = metas[data_name].imgs[i]['width'] 70 | height = metas[data_name].imgs[i]['height'] 71 | valid_objs = [] 72 | for obj in objs: 73 | x1 = np.max((0, obj['bbox'][0])) 74 | y1 = np.max((0, obj['bbox'][1])) 75 | x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) 76 | y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) 77 | if obj['area'] > 0 and x2 >= x1 and y2 >= y1: 78 | obj_struct = {'bbox': [x1, y1, x2, y2]} 79 | cidx = cmap[obj['category_id']] 80 | annotations[i][cidx].append(obj_struct) 81 | return annotations 82 | 83 | 84 | def load(data_dir, data_name, min_shorter_side=None, max_longer_side=1000, 85 | batch_size=1, total_num=None): 86 | assert cv2 is not None, '`load` requires `cv2`.' 87 | _, files = get_files(data_dir, data_name, total_num) 88 | total_num = len(files) 89 | 90 | for batch_start in range(0, total_num, batch_size): 91 | x = cv2.imread("%s/%s/%s" % (data_dir, data_name, files[batch_start])) 92 | if min_shorter_side is not None: 93 | scale = float(min_shorter_side) / np.min(x.shape[:2]) 94 | else: 95 | scale = 1.0 96 | if round(scale * np.max(x.shape[:2])) > max_longer_side: 97 | scale = float(max_longer_side) / np.max(x.shape[:2]) 98 | x = cv2.resize(x, None, None, fx=scale, fy=scale, 99 | interpolation=cv2.INTER_LINEAR) 100 | x = np.array([x], dtype=np.float32) 101 | scale = np.array([scale], dtype=np.float32) 102 | yield x, scale 103 | del x 104 | 105 | 106 | def evaluate_class(ids, scores, boxes, annotations, files, ovthresh): 107 | if scores.shape[0] == 0: 108 | return 0.0, np.zeros(len(ids)), np.zeros(len(ids)) 109 | 110 | # extract gt objects for this class 111 | diff = [np.array([0 for obj in annotations[filename]]) 112 | for filename in files] 113 | total = sum([sum(x == 0) for x in diff]) 114 | detected = dict(zip(files, [[False] * len(x) for x in diff])) 115 | 116 | # sort by confidence 117 | sorted_ind = np.argsort(-scores) 118 | ids = ids[sorted_ind] 119 | boxes = boxes[sorted_ind, :] 120 | 121 | # go down dets and mark TPs and FPs 122 | tp_list = [] 123 | fp_list = [] 124 | for d in range(len(ids)): 125 | actual = np.array([x['bbox'] for x in annotations[ids[d]]]) 126 | difficult = np.array([0 for x in annotations[ids[d]]]) 127 | 128 | if actual.size > 0: 129 | iw = np.maximum(np.minimum(actual[:, 2], boxes[d, 2]) - 130 | np.maximum(actual[:, 0], boxes[d, 0]) + 1, 0) 131 | ih = np.maximum(np.minimum(actual[:, 3], boxes[d, 3]) - 132 | np.maximum(actual[:, 1], boxes[d, 1]) + 1, 0) 133 | inters = iw * ih 134 | overlaps = inters / (area(actual) + area(boxes[d, :]) - inters) 135 | jmax = np.argmax(overlaps) 136 | ovmax = overlaps[jmax] 137 | else: 138 | ovmax = -np.inf 139 | 140 | tp = 0. 141 | fp = 0. 142 | if ovmax > ovthresh: 143 | if difficult[jmax] == 0: 144 | if not detected[ids[d]][jmax]: 145 | tp = 1. 146 | detected[ids[d]][jmax] = True 147 | else: 148 | fp = 1. 149 | else: 150 | fp = 1. 151 | tp_list.append(tp) 152 | fp_list.append(fp) 153 | 154 | tp = np.cumsum(tp_list) 155 | fp = np.cumsum(fp_list) 156 | recall = tp / float(total) 157 | precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 158 | ap = np.mean([0 if np.sum(recall >= t) == 0 159 | else np.max(precision[recall >= t]) 160 | for t in np.linspace(0, 1, 11)]) 161 | 162 | return ap, precision, recall 163 | 164 | 165 | def evaluate(results, data_dir, data_name, ovthresh=0.5, verbose=True): 166 | fileids, _ = get_files(data_dir, data_name) 167 | fileids = fileids[:len(results)] 168 | annotations = get_annotations(data_dir, data_name, fileids) 169 | aps = [] 170 | 171 | for c in range(80): 172 | ids = [] 173 | scores = [] 174 | boxes = [] 175 | for (i, fileid) in enumerate(fileids): 176 | pred = results[i][c] 177 | if pred.shape[0] > 0: 178 | for k in xrange(pred.shape[0]): 179 | ids.append(fileid) 180 | scores.append(pred[k, -1]) 181 | boxes.append(pred[k, :4] + 1) 182 | ids = np.array(ids) 183 | scores = np.array(scores) 184 | boxes = np.array(boxes) 185 | _annotations = dict((k, v[c]) for (k, v) in annotations.items()) 186 | ap, _, _ = evaluate_class(ids, scores, boxes, _annotations, 187 | fileids, ovthresh) 188 | aps += [ap] 189 | 190 | strs = '' 191 | for c in range(80): 192 | strs += "| %6s " % classnames[c][:6] 193 | strs += '|\n' 194 | 195 | for ap in aps: 196 | strs += '|--------' 197 | strs += '|\n' 198 | 199 | for ap in aps: 200 | strs += "| %.4f " % ap 201 | strs += '|\n' 202 | 203 | strs += "Mean = %.4f" % np.mean(aps) 204 | return strs 205 | -------------------------------------------------------------------------------- /tensornets/datasets/imagenet.py: -------------------------------------------------------------------------------- 1 | """Collection of ImageNet utils 2 | """ 3 | from __future__ import absolute_import 4 | 5 | import os 6 | import numpy as np 7 | import concurrent.futures as cf 8 | 9 | from os.path import isfile, join 10 | from ..utils import crop, load_img 11 | 12 | 13 | def get_files(data_dir, data_name, max_rows=None): 14 | """Reads a \`data_name.txt\` (e.g., \`val.txt\`) from 15 | http://www.image-net.org/challenges/LSVRC/2012/ 16 | """ 17 | files, labels = np.split( 18 | np.genfromtxt("%s/%s.txt" % (data_dir, data_name), 19 | dtype=np.str, max_rows=max_rows), 20 | [1], axis=1) 21 | files = files.flatten() 22 | labels = np.asarray(labels.flatten(), dtype=np.int) 23 | return files, labels 24 | 25 | 26 | def get_labels(data_dir, data_name, max_rows=None): 27 | _, labels = get_files(data_dir, data_name, max_rows) 28 | return labels 29 | 30 | 31 | def load_single(filename, resize_wh, crop_wh, crop_locs): 32 | img = load_img(filename, target_size=resize_wh) 33 | return crop(img, crop_wh, crop_locs) 34 | 35 | 36 | def load(data_dir, data_name, batch_size, resize_wh, 37 | crop_locs, crop_wh, total_num=None): 38 | 39 | files, labels = get_files(data_dir, data_name, total_num) 40 | total_num = len(labels) 41 | 42 | for batch_start in range(0, total_num, batch_size): 43 | 44 | data_spec = [batch_size, 1, crop_wh, crop_wh, 3] 45 | if isinstance(crop_locs, list): 46 | data_spec[1] = len(crop_locs) 47 | elif crop_locs == 10: 48 | data_spec[1] = 10 49 | X = np.zeros(data_spec, np.float32) 50 | 51 | jobs = [] 52 | with cf.ThreadPoolExecutor(max_workers=48) as executor: 53 | for (k, f) in enumerate(files[batch_start:batch_start+batch_size]): 54 | filename = os.path.join("%s/ILSVRC2012_img_val" % data_dir, f) 55 | if os.path.isfile(filename): 56 | jobs.append(executor.submit( 57 | load_single, (*(filename, resize_wh, crop_wh, crop_locs)))) 58 | 59 | cf.wait(jobs) 60 | 61 | for (k, out) in enumerate(jobs): 62 | X[k] = out.result() 63 | 64 | yield X.reshape((-1, crop_wh, crop_wh, 3)), \ 65 | labels[batch_start:batch_start+batch_size] 66 | 67 | del X 68 | -------------------------------------------------------------------------------- /tensornets/datasets/voc.names: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /tensornets/datasets/voc.py: -------------------------------------------------------------------------------- 1 | """Collection of PASCAL VOC utils 2 | 3 | The codes were refactored from [py-faster-rcnn](https://github.com/ 4 | rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py) and 5 | [darkflow](https://github.com/thtrieu/darkflow/blob/master/darkflow/ 6 | net/yolov2/data.py). Especially, each part was from the following: 7 | 8 | 1. get_annotations: parse_rec in py-faster-rcnn 9 | 2. evaluate_class: voc_ap in py-faster-rcnn 10 | 3. evaluate: voc_eval in py-faster-rcnn 11 | 4. load_train: _batch in darkflow 12 | """ 13 | from __future__ import division 14 | 15 | import os 16 | import numpy as np 17 | import xml.etree.ElementTree as ET 18 | 19 | try: 20 | import cv2 21 | except ImportError: 22 | cv2 = None 23 | 24 | try: 25 | xrange # Python 2 26 | except NameError: 27 | xrange = range # Python 3 28 | 29 | try: 30 | reduce 31 | except NameError: 32 | from functools import reduce 33 | 34 | 35 | with open(os.path.join(os.path.dirname(__file__), 'voc.names'), 'r') as f: 36 | classnames = [line.rstrip() for line in f.readlines()] 37 | 38 | 39 | def classidx(classname): 40 | return dict((k, i) for (i, k) in enumerate(classnames))[classname] 41 | 42 | 43 | def area(box): 44 | if box.ndim == 1: 45 | return (box[2] - box[0] + 1.) * (box[3] - box[1] + 1.) 46 | else: 47 | return (box[:, 2] - box[:, 0] + 1.) * (box[:, 3] - box[:, 1] + 1.) 48 | 49 | 50 | def get_files(data_dir, data_name, total_num=None): 51 | with open("%s/ImageSets/Main/%s.txt" % (data_dir, data_name)) as f: 52 | files = [x.strip() for x in f.readlines()] 53 | if total_num is not None: 54 | files = files[:total_num] 55 | return files 56 | 57 | 58 | def get_annotations(data_dir, files): 59 | annotations = {} 60 | for filename in files: 61 | tree = ET.parse("%s/Annotations/%s.xml" % (data_dir, filename)) 62 | annotations[filename] = [[] for _ in range(20)] 63 | for obj in tree.findall('object'): 64 | obj_struct = {} 65 | obj_struct['name'] = obj.find('name').text 66 | obj_struct['pose'] = obj.find('pose').text 67 | obj_struct['truncated'] = int(obj.find('truncated').text) 68 | obj_struct['difficult'] = int(obj.find('difficult').text) 69 | bbox = obj.find('bndbox') 70 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 71 | int(bbox.find('ymin').text), 72 | int(bbox.find('xmax').text), 73 | int(bbox.find('ymax').text)] 74 | cidx = classidx(obj_struct['name']) 75 | annotations[filename][cidx].append(obj_struct) 76 | return annotations 77 | 78 | 79 | def load(data_dir, data_name, min_shorter_side=None, max_longer_side=1000, 80 | batch_size=1, total_num=None): 81 | assert cv2 is not None, '`load` requires `cv2`.' 82 | files = get_files(data_dir, data_name, total_num) 83 | total_num = len(files) 84 | 85 | for batch_start in range(0, total_num, batch_size): 86 | x = cv2.imread("%s/JPEGImages/%s.jpg" % (data_dir, files[batch_start])) 87 | if min_shorter_side is not None: 88 | scale = float(min_shorter_side) / np.min(x.shape[:2]) 89 | else: 90 | scale = 1.0 91 | if round(scale * np.max(x.shape[:2])) > max_longer_side: 92 | scale = float(max_longer_side) / np.max(x.shape[:2]) 93 | x = cv2.resize(x, None, None, fx=scale, fy=scale, 94 | interpolation=cv2.INTER_LINEAR) 95 | x = np.array([x], dtype=np.float32) 96 | scale = np.array([scale], dtype=np.float32) 97 | yield x, scale 98 | del x 99 | 100 | 101 | def evaluate_class(ids, scores, boxes, annotations, files, ovthresh): 102 | if scores.shape[0] == 0: 103 | return 0.0, np.zeros(len(ids)), np.zeros(len(ids)) 104 | 105 | # extract gt objects for this class 106 | diff = [np.array([obj['difficult'] for obj in annotations[filename]]) 107 | for filename in files] 108 | total = sum([sum(x == 0) for x in diff]) 109 | detected = dict(zip(files, [[False] * len(x) for x in diff])) 110 | 111 | # sort by confidence 112 | sorted_ind = np.argsort(-scores) 113 | ids = ids[sorted_ind] 114 | boxes = boxes[sorted_ind, :] 115 | 116 | # go down dets and mark TPs and FPs 117 | tp_list = [] 118 | fp_list = [] 119 | for d in range(len(ids)): 120 | actual = np.array([x['bbox'] for x in annotations[ids[d]]]) 121 | difficult = np.array([x['difficult'] for x in annotations[ids[d]]]) 122 | 123 | if actual.size > 0: 124 | iw = np.maximum(np.minimum(actual[:, 2], boxes[d, 2]) - 125 | np.maximum(actual[:, 0], boxes[d, 0]) + 1, 0) 126 | ih = np.maximum(np.minimum(actual[:, 3], boxes[d, 3]) - 127 | np.maximum(actual[:, 1], boxes[d, 1]) + 1, 0) 128 | inters = iw * ih 129 | overlaps = inters / (area(actual) + area(boxes[d, :]) - inters) 130 | jmax = np.argmax(overlaps) 131 | ovmax = overlaps[jmax] 132 | else: 133 | ovmax = -np.inf 134 | 135 | tp = 0. 136 | fp = 0. 137 | if ovmax > ovthresh: 138 | if difficult[jmax] == 0: 139 | if not detected[ids[d]][jmax]: 140 | tp = 1. 141 | detected[ids[d]][jmax] = True 142 | else: 143 | fp = 1. 144 | else: 145 | fp = 1. 146 | tp_list.append(tp) 147 | fp_list.append(fp) 148 | 149 | tp = np.cumsum(tp_list) 150 | fp = np.cumsum(fp_list) 151 | recall = tp / float(total) 152 | precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 153 | ap = np.mean([0 if np.sum(recall >= t) == 0 154 | else np.max(precision[recall >= t]) 155 | for t in np.linspace(0, 1, 11)]) 156 | 157 | return ap, precision, recall 158 | 159 | 160 | def evaluate(results, data_dir, data_name, ovthresh=0.5, verbose=True): 161 | files = get_files(data_dir, data_name) 162 | files = files[:len(results)] 163 | annotations = get_annotations(data_dir, files) 164 | aps = [] 165 | 166 | for c in range(20): 167 | ids = [] 168 | scores = [] 169 | boxes = [] 170 | for (i, filename) in enumerate(files): 171 | pred = results[i][c] 172 | if pred.shape[0] > 0: 173 | for k in xrange(pred.shape[0]): 174 | ids.append(filename) 175 | scores.append(pred[k, -1]) 176 | boxes.append(pred[k, :4] + 1) 177 | ids = np.array(ids) 178 | scores = np.array(scores) 179 | boxes = np.array(boxes) 180 | _annotations = dict((k, v[c]) for (k, v) in annotations.items()) 181 | ap, _, _ = evaluate_class(ids, scores, boxes, _annotations, 182 | files, ovthresh) 183 | aps += [ap] 184 | 185 | strs = '' 186 | for c in range(20): 187 | strs += "| %6s " % classnames[c][:6] 188 | strs += '|\n' 189 | 190 | for ap in aps: 191 | strs += '|--------' 192 | strs += '|\n' 193 | 194 | for ap in aps: 195 | strs += "| %.4f " % ap 196 | strs += '|\n' 197 | 198 | strs += "Mean = %.4f" % np.mean(aps) 199 | return strs 200 | 201 | 202 | def load_train(data_dir, data_name, 203 | batch_size=64, shuffle=True, 204 | target_size=416, anchors=5, classes=20, 205 | total_num=None, dtype=np.float32): 206 | assert cv2 is not None, '`load_train` requires `cv2`.' 207 | if isinstance(data_dir, list): 208 | files = [] 209 | annotations = {} 210 | for d in data_dir: 211 | files.append(get_files(d, data_name, total_num)) 212 | annotations.update(get_annotations(d, files[-1])) 213 | dirs = np.concatenate([i * np.ones(len(f), dtype=np.int) 214 | for (i, f) in enumerate(files)]) 215 | files = reduce(lambda x, y: x + y, files) 216 | else: 217 | files = get_files(data_dir, data_name, total_num) 218 | annotations = get_annotations(data_dir, files) 219 | dirs = np.zeros(len(files), dtype=np.int) 220 | data_dir = [data_dir] # put in list for consistent further processing 221 | 222 | total_num = len(files) 223 | for f in files: 224 | annotations[f] = reduce(lambda x, y: x + y, annotations[f]) 225 | 226 | if isinstance(target_size, int): 227 | target_size = (target_size, target_size) 228 | feature_size = [x // 32 for x in target_size] 229 | cells = feature_size[0] * feature_size[1] 230 | 231 | b = 0 232 | while True: 233 | if b == 0: 234 | if shuffle is True: 235 | idx = np.random.permutation(total_num) 236 | else: 237 | idx = np.arange(total_num) 238 | if b + batch_size > total_num: 239 | b = 0 240 | yield None, None 241 | else: 242 | batch_num = batch_size 243 | 244 | imgs = np.zeros((batch_num,) + target_size + (3,), dtype=dtype) 245 | probs = np.zeros((batch_num, cells, anchors, classes), dtype=dtype) 246 | confs = np.zeros((batch_num, cells, anchors), dtype=dtype) 247 | coord = np.zeros((batch_num, cells, anchors, 4), dtype=dtype) 248 | proid = np.zeros((batch_num, cells, anchors, classes), dtype=dtype) 249 | prear = np.zeros((batch_num, cells, 4), dtype=dtype) 250 | areas = np.zeros((batch_num, cells, anchors), dtype=dtype) 251 | upleft = np.zeros((batch_num, cells, anchors, 2), dtype=dtype) 252 | botright = np.zeros((batch_num, cells, anchors, 2), dtype=dtype) 253 | 254 | for i in range(batch_num): 255 | d = data_dir[dirs[idx[b + i]]] 256 | f = files[idx[b + i]] 257 | x = cv2.imread("%s/JPEGImages/%s.jpg" % (d, f)) 258 | h, w = x.shape[:2] 259 | cellx = 1. * w / feature_size[1] 260 | celly = 1. * h / feature_size[0] 261 | 262 | processed_objs = [] 263 | for obj in annotations[f]: 264 | bbox = obj['bbox'] 265 | centerx = .5 * (bbox[0] + bbox[2]) # xmin, xmax 266 | centery = .5 * (bbox[1] + bbox[3]) # ymin, ymax 267 | cx = centerx / cellx 268 | cy = centery / celly 269 | if cx >= feature_size[1] or cy >= feature_size[0]: 270 | continue 271 | processed_objs += [[ 272 | classidx(obj['name']), 273 | cx - np.floor(cx), # centerx 274 | cy - np.floor(cy), # centery 275 | np.sqrt(float(bbox[2] - bbox[0]) / w), 276 | np.sqrt(float(bbox[3] - bbox[1]) / h), 277 | int(np.floor(cy) * feature_size[1] + np.floor(cx)) 278 | ]] 279 | 280 | # Calculate placeholders' values 281 | for obj in processed_objs: 282 | probs[i, obj[5], :, :] = [[0.] * classes] * anchors 283 | probs[i, obj[5], :, obj[0]] = 1. 284 | proid[i, obj[5], :, :] = [[1.] * classes] * anchors 285 | coord[i, obj[5], :, :] = [obj[1:5]] * anchors 286 | prear[i, obj[5], 0] = obj[1] - obj[3]**2 * .5 * feature_size[1] 287 | prear[i, obj[5], 1] = obj[2] - obj[4]**2 * .5 * feature_size[0] 288 | prear[i, obj[5], 2] = obj[1] + obj[3]**2 * .5 * feature_size[1] 289 | prear[i, obj[5], 3] = obj[2] + obj[4]**2 * .5 * feature_size[0] 290 | confs[i, obj[5], :] = [1.] * anchors 291 | 292 | # Finalise the placeholders' values 293 | ul = np.expand_dims(prear[i, :, 0:2], 1) 294 | br = np.expand_dims(prear[i, :, 2:4], 1) 295 | wh = br - ul 296 | area = wh[:, :, 0] * wh[:, :, 1] 297 | upleft[i, :, :, :] = np.concatenate([ul] * anchors, 1) 298 | botright[i, :, :, :] = np.concatenate([br] * anchors, 1) 299 | areas[i, :, :] = np.concatenate([area] * anchors, 1) 300 | 301 | imgs[i] = cv2.resize(x, target_size, 302 | interpolation=cv2.INTER_LINEAR) 303 | yield imgs, [probs, confs, coord, proid, areas, upleft, botright] 304 | b += batch_size 305 | -------------------------------------------------------------------------------- /tensornets/densenets.py: -------------------------------------------------------------------------------- 1 | """Collection of DenseNet variants 2 | 3 | The reference paper: 4 | 5 | - Densely Connected Convolutional Networks, CVPR 2017 (Best Paper Award) 6 | - Gao Huang, Zhuang Liu, Kilian Q. Weinberger, Laurens van der Maaten 7 | - https://arxiv.org/abs/1608.06993 8 | 9 | The reference implementation: 10 | 11 | 1. Torch DenseNets 12 | - https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua 13 | """ 14 | from __future__ import absolute_import 15 | 16 | import tensorflow as tf 17 | 18 | from .layers import avg_pool2d 19 | from .layers import batch_norm 20 | from .layers import conv2d 21 | from .layers import fc 22 | from .layers import max_pool2d 23 | from .layers import convbnrelu as conv 24 | 25 | from .ops import * 26 | from .utils import pad_info 27 | from .utils import set_args 28 | from .utils import var_scope 29 | 30 | 31 | def __args__(is_training): 32 | return [([avg_pool2d, max_pool2d], {'scope': 'pool'}), 33 | ([batch_norm], {'scale': True, 'is_training': is_training, 34 | 'epsilon': 1e-5, 'scope': 'bn'}), 35 | ([conv2d], {'padding': 'VALID', 'activation_fn': None, 36 | 'biases_initializer': None, 'scope': 'conv'}), 37 | ([fc], {'activation_fn': None, 'scope': 'fc'})] 38 | 39 | 40 | def densenet(x, blocks, is_training, classes, stem, scope=None, reuse=None): 41 | x = pad(x, pad_info(7), name='conv1/pad') 42 | x = conv(x, 64, 7, stride=2, scope='conv1') 43 | x = pad(x, pad_info(3), name='pool1/pad') 44 | x = max_pool2d(x, 3, stride=2, scope='pool1') 45 | 46 | x = dense(x, blocks[0], scope='conv2') 47 | x = transition(x, scope='pool2') 48 | x = dense(x, blocks[1], scope='conv3') 49 | x = transition(x, scope='pool3') 50 | x = dense(x, blocks[2], scope='conv4') 51 | x = transition(x, scope='pool4') 52 | x = dense(x, blocks[3], scope='conv5') 53 | 54 | x = batch_norm(x) 55 | x = relu(x) 56 | if stem: return x 57 | 58 | x = reduce_mean(x, [1, 2], name='avgpool') 59 | x = fc(x, classes, scope='logits') 60 | x = softmax(x, name='probs') 61 | return x 62 | 63 | 64 | @var_scope('densenet121') 65 | @set_args(__args__) 66 | def densenet121(x, is_training=False, classes=1000, 67 | stem=False, scope=None, reuse=None): 68 | return densenet(x, [6, 12, 24, 16], is_training, classes, 69 | stem, scope, reuse) 70 | 71 | 72 | @var_scope('densenet169') 73 | @set_args(__args__) 74 | def densenet169(x, is_training=False, classes=1000, 75 | stem=False, scope=None, reuse=None): 76 | return densenet(x, [6, 12, 32, 32], is_training, classes, 77 | stem, scope, reuse) 78 | 79 | 80 | @var_scope('densenet201') 81 | @set_args(__args__) 82 | def densenet201(x, is_training=False, classes=1000, 83 | stem=False, scope=None, reuse=None): 84 | return densenet(x, [6, 12, 48, 32], is_training, classes, 85 | stem, scope, reuse) 86 | 87 | 88 | @var_scope('dense') 89 | def dense(x, blocks, scope=None): 90 | for i in range(blocks): 91 | x = block(x, scope="block%d" % (i + 1)) 92 | return x 93 | 94 | 95 | @var_scope('transition') 96 | def transition(x, reduction=0.5, scope=None): 97 | x = batch_norm(x) 98 | x = relu(x) 99 | infilters = int(x.shape[-1]) if tf_later_than('2') else x.shape[-1].value 100 | x = conv2d(x, int(infilters * reduction), 1, stride=1) 101 | x = avg_pool2d(x, 2, stride=2, scope='pool') 102 | return x 103 | 104 | 105 | @var_scope('block') 106 | def block(x, growth_rate=32, scope=None): 107 | x1 = batch_norm(x) 108 | x1 = relu(x1) 109 | x1 = conv(x1, 4 * growth_rate, 1, stride=1, scope='1') 110 | x1 = conv2d(x1, growth_rate, 3, stride=1, padding='SAME', scope='2/conv') 111 | x = concat([x, x1], axis=3, name='out') 112 | return x 113 | 114 | 115 | # Simple alias. 116 | DenseNet121 = densenet121 117 | DenseNet169 = densenet169 118 | DenseNet201 = densenet201 119 | -------------------------------------------------------------------------------- /tensornets/detections.py: -------------------------------------------------------------------------------- 1 | """Collection of generic object detection models 2 | 3 | The reference papers: 4 | 5 | 1. YOLOv2 6 | - YOLO9000: Better, Faster, Stronger, CVPR 2017 (Best Paper Honorable Mention) 7 | - Joseph Redmon, Ali Farhadi 8 | - https://arxiv.org/abs/1612.08242 9 | 2. Faster R-CNN 10 | - Faster R-CNN: Towards Real-Time Object Detection 11 | with Region Proposal Networks, NIPS 2015 12 | - Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun 13 | - https://arxiv.org/abs/1506.01497 14 | 15 | The reference implementations: 16 | 17 | 1. Darknet 18 | - https://pjreddie.com/darknet/yolo/ 19 | 2. darkflow 20 | - https://github.com/thtrieu/darkflow 21 | 3. Caffe and Python utils 22 | - https://github.com/rbgirshick/py-faster-rcnn 23 | 4. RoI pooling in TensorFlow 24 | - https://github.com/deepsense-ai/roi-pooling 25 | """ 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | 29 | import tensorflow as tf 30 | 31 | from .layers import batch_norm 32 | from .layers import bias_add 33 | from .layers import conv2d 34 | from .layers import darkconv 35 | from .layers import dropout 36 | from .layers import flatten 37 | from .layers import fc 38 | from .layers import max_pool2d 39 | 40 | from .ops import * 41 | from .utils import remove_head 42 | from .utils import set_args 43 | from .utils import var_scope 44 | 45 | from .references.yolos import get_v2_boxes as yolo_boxes 46 | from .references.yolos import opts 47 | from .references.yolos import v2_inputs 48 | from .references.yolos import v2_loss 49 | from .references.rcnns import get_boxes as rcnn_boxes 50 | from .references.rcnns import roi_pool2d 51 | from .references.rcnns import rp_net 52 | 53 | 54 | def __args_yolo__(is_training): 55 | return [([batch_norm], {'is_training': is_training}), 56 | ([bias_add, conv2d], {}), 57 | ([max_pool2d], {'padding': 'SAME'})] 58 | 59 | 60 | def __args_rcnn__(is_training): 61 | return [([conv2d], {'activation_fn': None, 'scope': 'conv'}), 62 | ([dropout], {'is_training': is_training}), 63 | ([fc], {'activation_fn': None, 'scope': 'fc'})] 64 | 65 | 66 | @var_scope('genYOLOv2') 67 | @set_args(__args_yolo__) 68 | def yolov2(x, stem_fn, stem_out=None, is_training=False, classes=20, 69 | scope=None, reuse=None): 70 | inputs = x 71 | opt = opts('yolov2' + data_name(classes)) 72 | stem = x = stem_fn(x, is_training, stem=True, scope='stem') 73 | p = x.p 74 | 75 | if stem_out is not None: 76 | stem = x = remove_head(x, stem_out) 77 | 78 | x = darkconv(x, 1024, 3, scope='conv7') 79 | x = darkconv(x, 1024, 3, scope='conv8') 80 | 81 | p = darkconv(p, 64, 1, scope='conv5a') 82 | p = local_flatten(p, 2, name='flat5a') 83 | 84 | x = concat([p, x], axis=3, name='concat') 85 | x = darkconv(x, 1024, 3, scope='conv9') 86 | x = darkconv(x, (classes + 5) * 5, 1, onlyconv=True, scope='linear') 87 | x.aliases = [] 88 | 89 | def get_boxes(*args, **kwargs): 90 | return yolo_boxes(opt, *args, **kwargs) 91 | x.get_boxes = get_boxes 92 | x.stem = stem 93 | x.inputs = [inputs] 94 | x.inputs += v2_inputs(x.shape[1:3], opt['num'], classes, x.dtype) 95 | if isinstance(is_training, tf.Tensor): 96 | x.inputs.append(is_training) 97 | x.loss = v2_loss(x, opt['anchors'], classes) 98 | return x 99 | 100 | 101 | def data_name(classes): 102 | return 'voc' if classes == 20 else '' 103 | 104 | 105 | @var_scope('genTinyYOLOv2') 106 | @set_args(__args_yolo__) 107 | def tinyyolov2(x, stem_fn, stem_out=None, is_training=False, classes=20, 108 | scope=None, reuse=None): 109 | inputs = x 110 | opt = opts('tinyyolov2' + data_name(classes)) 111 | stem = x = stem_fn(x, is_training, stem=True, scope='stem') 112 | 113 | if stem_out is not None: 114 | stem = x = remove_head(x, stem_out) 115 | 116 | x = max_pool2d(x, 2, stride=1, scope='pool6') 117 | x = darkconv(x, 1024, 3, scope='conv7') 118 | x = darkconv(x, 1024 if classes == 20 else 512, 3, scope='conv8') 119 | x = darkconv(x, (classes + 5) * 5, 1, onlyconv=True, scope='linear') 120 | x.aliases = [] 121 | 122 | def get_boxes(*args, **kwargs): 123 | return yolo_boxes(opt, *args, **kwargs) 124 | x.get_boxes = get_boxes 125 | x.stem = stem 126 | x.inputs = [inputs] 127 | x.inputs += v2_inputs(x.shape[1:3], opt['num'], classes, x.dtype) 128 | if isinstance(is_training, tf.Tensor): 129 | x.inputs.append(is_training) 130 | x.loss = v2_loss(x, opt['anchors'], classes) 131 | return x 132 | 133 | 134 | @var_scope('genFasterRCNN') 135 | @set_args(__args_rcnn__) 136 | def fasterrcnn(x, stem_fn, stem_out=None, is_training=False, classes=21, 137 | scope=None, reuse=None): 138 | def roi_pool_fn(x, filters, kernel_size): 139 | rois = rp_net(x, filters, height, width, scales) 140 | x = roi_pool2d(x, kernel_size, rois) 141 | return x, rois[0] / scales 142 | 143 | scales = tf.placeholder(tf.float32, [None]) 144 | height = tf.cast(tf.shape(x)[1], dtype=tf.float32) 145 | width = tf.cast(tf.shape(x)[2], dtype=tf.float32) 146 | 147 | stem = x = stem_fn(x, is_training, stem=True, scope='stem') 148 | 149 | if stem_out is not None: 150 | stem = x = remove_head(x, stem_out) 151 | 152 | if 'zf' in stem.model_name: 153 | x, rois = roi_pool_fn(x, 256, 6) 154 | else: 155 | x, rois = roi_pool_fn(x, 512, 7) 156 | 157 | x = flatten(x) 158 | x = fc(x, 4096, scope='fc6') 159 | x = relu(x, name='relu6') 160 | x = dropout(x, keep_prob=0.5, scope='drop6') 161 | x = fc(x, 4096, scope='fc7') 162 | x = relu(x, name='relu7') 163 | x = dropout(x, keep_prob=0.5, scope='drop7') 164 | x = concat([softmax(fc(x, classes, scope='logits'), name='probs'), 165 | fc(x, 4 * classes, scope='boxes'), 166 | rois], axis=1, name='out') 167 | x.get_boxes = rcnn_boxes 168 | x.scales = scales 169 | x.stem = stem 170 | return x 171 | 172 | 173 | # Simple alias. 174 | YOLOv2 = yolov2 175 | TinyYOLOv2 = tinyyolov2 176 | FasterRCNN = fasterrcnn 177 | -------------------------------------------------------------------------------- /tensornets/efficientnets.py: -------------------------------------------------------------------------------- 1 | """Collection of EfficientNet variants 2 | 3 | The reference paper: 4 | 5 | - EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks, ICML 2019 6 | - Mingxing Tan, Quoc V. Le 7 | - https://arxiv.org/abs/1905.11946 8 | 9 | The reference implementations: 10 | 11 | 1. Keras 12 | - https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py 13 | 2. TF TPU 14 | - https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py 15 | """ 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | 19 | import math 20 | import tensorflow as tf 21 | 22 | from .layers import batch_norm 23 | from .layers import conv2d 24 | from .layers import convbn 25 | from .layers import convbnswish as conv 26 | from .layers import dropout 27 | from .layers import fc 28 | from .layers import sconv2d 29 | from .layers import sconvbnswish as sconv 30 | 31 | from .ops import * 32 | from .ops import _swish 33 | from .utils import pad_info 34 | from .utils import set_args 35 | from .utils import var_scope 36 | 37 | 38 | def __args__(is_training): 39 | return [([batch_norm], {'decay': 0.99, 'scale': True, 40 | 'is_training': is_training, 'scope': 'bn'}), 41 | ([conv2d], {'padding': 'SAME', 'activation_fn': None, 42 | 'biases_initializer': None, 'scope': 'conv'}), 43 | ([dropout], {'is_training': is_training, 'scope': 'dropout'}), 44 | ([fc], {'activation_fn': None, 'scope': 'fc'}), 45 | ([sconv2d], {'padding': 'SAME', 'activation_fn': None, 46 | 'biases_initializer': None, 'scope': 'sconv'})] 47 | 48 | 49 | def blocks_args(): 50 | return [ 51 | {'blocks': 1, 'filters_in': 32, 'filters_out': 16, 'kernel_size': 3, 52 | 'stride': 1, 'expand_ratio': 1, 'se_ratio': 0.25}, 53 | {'blocks': 2, 'filters_in': 16, 'filters_out': 24, 'kernel_size': 3, 54 | 'stride': 2, 'expand_ratio': 6, 'se_ratio': 0.25}, 55 | {'blocks': 2, 'filters_in': 24, 'filters_out': 40, 'kernel_size': 5, 56 | 'stride': 2, 'expand_ratio': 6, 'se_ratio': 0.25}, 57 | {'blocks': 3, 'filters_in': 40, 'filters_out': 80, 'kernel_size': 3, 58 | 'stride': 2, 'expand_ratio': 6, 'se_ratio': 0.25}, 59 | {'blocks': 3, 'filters_in': 80, 'filters_out': 112, 'kernel_size': 5, 60 | 'stride': 1, 'expand_ratio': 6, 'se_ratio': 0.25}, 61 | {'blocks': 4, 'filters_in': 112, 'filters_out': 192, 'kernel_size': 5, 62 | 'stride': 2, 'expand_ratio': 6, 'se_ratio': 0.25}, 63 | {'blocks': 1, 'filters_in': 192, 'filters_out': 320, 'kernel_size': 3, 64 | 'stride': 1, 'expand_ratio': 6, 'se_ratio': 0.25} 65 | ] 66 | 67 | 68 | def efficientnet(x, width_coefficient, depth_coefficient, 69 | default_size, is_training, classes, stem, 70 | keep_prob=0.8, drop_rate=0.2, width_divisor=8, 71 | scope=None, reuse=None): 72 | def width(w, coefficient=width_coefficient, divisor=width_divisor): 73 | w *= coefficient 74 | new_w = max(divisor, int(w + divisor / 2) // divisor * divisor) 75 | if new_w < 0.9 * w: 76 | new_w += divisor 77 | return int(new_w) 78 | 79 | def depth(d, coefficient=depth_coefficient): 80 | return int(math.ceil(d * coefficient)) 81 | 82 | b = 0 83 | x = conv(x, width(32), 3, stride=2, scope='stem') 84 | blocks_total = float(sum(args['blocks'] for args in blocks_args())) 85 | for args in blocks_args(): 86 | filters_in = width(args['filters_in']) 87 | filters_out = width(args['filters_out']) 88 | for j in range(depth(args['blocks'])): 89 | x = block(x, filters_in if j == 0 else filters_out, filters_out, 90 | args['kernel_size'], 1 if j > 0 else args['stride'], 91 | args['expand_ratio'], args['se_ratio'], 92 | drop_rate * b / blocks_total, 93 | scope="block{}".format(b)) 94 | b += 1 95 | x = conv(x, width(1280), 1, scope='head') 96 | if stem: return x 97 | x = reduce_mean(x, [1, 2], name='avgpool') 98 | x = dropout(x, keep_prob=keep_prob, scope='dropout') 99 | x = fc(x, classes, scope='logits') 100 | x = softmax(x, name='probs') 101 | return x 102 | 103 | 104 | @var_scope('se') 105 | def se(i, filters_se, filters_out, scope=None): 106 | x = reduce_mean(i, [1, 2], keepdims=True, name='squeeze') 107 | x = conv2d(x, filters_se, 1, activation_fn=_swish, 108 | biases_initializer=tf.zeros_initializer(), scope='reduce') 109 | x = conv2d(x, filters_out, 1, activation_fn=tf.sigmoid, 110 | biases_initializer=tf.zeros_initializer(), scope='expand') 111 | x = multiply(i, x, name='excite') 112 | return x 113 | 114 | 115 | @var_scope('block') 116 | def block(i, filters_in=32, filters_out=16, kernel_size=3, stride=1, 117 | expand_ratio=1, se_ratio=0., drop_rate=0., scope=None): 118 | filters = filters_in * expand_ratio 119 | x = conv(i, filters, 1, scope='econv') if expand_ratio != 1 else i 120 | x = sconv(x, None, kernel_size, 1, stride=stride, scope='sconv') 121 | if 0 < se_ratio <= 1: 122 | x = se(x, max(1, int(filters_in * se_ratio)), filters, scope='se') 123 | x = convbn(x, filters_out, 1, scope='pconv') 124 | if (stride == 1) and (filters_in == filters_out): 125 | if drop_rate > 0: 126 | x = dropout(x, keep_prob=1 - drop_rate, scope='dropout') 127 | x = add(i, x, name='add') 128 | return x 129 | 130 | 131 | @var_scope('efficientnetb0') 132 | @set_args(__args__) 133 | def efficientnetb0(x, is_training=False, classes=1000, 134 | stem=False, scope=None, reuse=None): 135 | return efficientnet(x, 1.0, 1.0, 224, is_training, classes, stem, 136 | keep_prob=0.8, scope=scope, reuse=reuse) 137 | 138 | 139 | @var_scope('efficientnetb1') 140 | @set_args(__args__) 141 | def efficientnetb1(x, is_training=False, classes=1000, 142 | stem=False, scope=None, reuse=None): 143 | return efficientnet(x, 1.0, 1.1, 240, is_training, classes, stem, 144 | keep_prob=0.8, scope=scope, reuse=reuse) 145 | 146 | 147 | @var_scope('efficientnetb2') 148 | @set_args(__args__) 149 | def efficientnetb2(x, is_training=False, classes=1000, 150 | stem=False, scope=None, reuse=None): 151 | return efficientnet(x, 1.1, 1.2, 260, is_training, classes, stem, 152 | keep_prob=0.7, scope=scope, reuse=reuse) 153 | 154 | 155 | @var_scope('efficientnetb3') 156 | @set_args(__args__) 157 | def efficientnetb3(x, is_training=False, classes=1000, 158 | stem=False, scope=None, reuse=None): 159 | return efficientnet(x, 1.2, 1.4, 300, is_training, classes, stem, 160 | keep_prob=0.7, scope=scope, reuse=reuse) 161 | 162 | 163 | @var_scope('efficientnetb4') 164 | @set_args(__args__) 165 | def efficientnetb4(x, is_training=False, classes=1000, 166 | stem=False, scope=None, reuse=None): 167 | return efficientnet(x, 1.4, 1.8, 380, is_training, classes, stem, 168 | keep_prob=0.6, scope=scope, reuse=reuse) 169 | 170 | 171 | @var_scope('efficientnetb5') 172 | @set_args(__args__) 173 | def efficientnetb5(x, is_training=False, classes=1000, 174 | stem=False, scope=None, reuse=None): 175 | return efficientnet(x, 1.6, 2.2, 456, is_training, classes, stem, 176 | keep_prob=0.6, scope=scope, reuse=reuse) 177 | 178 | 179 | @var_scope('efficientnetb6') 180 | @set_args(__args__) 181 | def efficientnetb6(x, is_training=False, classes=1000, 182 | stem=False, scope=None, reuse=None): 183 | return efficientnet(x, 1.8, 2.6, 528, is_training, classes, stem, 184 | keep_prob=0.5, scope=scope, reuse=reuse) 185 | 186 | 187 | @var_scope('efficientnetb7') 188 | @set_args(__args__) 189 | def efficientnetb7(x, is_training=False, classes=1000, 190 | stem=False, scope=None, reuse=None): 191 | return efficientnet(x, 2.0, 3.1, 600, is_training, classes, stem, 192 | keep_prob=0.5, scope=scope, reuse=reuse) 193 | 194 | 195 | # Simple alias. 196 | EfficientNetB0 = efficientnetb0 197 | EfficientNetB1 = efficientnetb1 198 | EfficientNetB2 = efficientnetb2 199 | EfficientNetB3 = efficientnetb3 200 | EfficientNetB4 = efficientnetb4 201 | EfficientNetB5 = efficientnetb5 202 | EfficientNetB6 = efficientnetb6 203 | EfficientNetB7 = efficientnetb7 204 | -------------------------------------------------------------------------------- /tensornets/layers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from .ops import conv2d_primitive 7 | from .ops import leaky_relu 8 | from .ops import relu 9 | from .ops import relu6 10 | from .ops import reshape 11 | from .ops import swish 12 | from .utils import arg_scope 13 | from .utils import remove_commons 14 | from .version_utils import tf_later_than 15 | 16 | 17 | if tf_later_than('1.14'): 18 | tf = tf.compat.v1 19 | 20 | 21 | if tf_later_than('2'): 22 | from .contrib_layers import avg_pool2d 23 | from .contrib_layers import batch_norm 24 | from .contrib_layers import bias_add 25 | from .contrib_layers import conv2d 26 | from .contrib_layers import dropout 27 | from .contrib_layers import flatten 28 | from .contrib_layers import fully_connected as fc 29 | from .contrib_layers import l2_regularizer as l2 30 | from .contrib_layers import max_pool2d 31 | from .contrib_layers import separable_conv2d as sconv2d 32 | from .contrib_layers import variance_scaling_initializer 33 | else: 34 | from tensorflow.contrib.layers import avg_pool2d 35 | from tensorflow.contrib.layers import batch_norm 36 | from tensorflow.contrib.layers import bias_add 37 | from tensorflow.contrib.layers import conv2d 38 | from tensorflow.contrib.layers import dropout 39 | from tensorflow.contrib.layers import flatten 40 | from tensorflow.contrib.layers import fully_connected as fc 41 | from tensorflow.contrib.layers import l2_regularizer as l2 42 | from tensorflow.contrib.layers import max_pool2d 43 | from tensorflow.contrib.layers import separable_conv2d as sconv2d 44 | from tensorflow.contrib.layers import variance_scaling_initializer 45 | 46 | 47 | conv1d = conv2d 48 | 49 | 50 | def convbn(*args, **kwargs): 51 | scope = kwargs.pop('scope', None) 52 | with tf.variable_scope(scope): 53 | return batch_norm(conv2d(*args, **kwargs)) 54 | 55 | 56 | def convbnact(*args, **kwargs): 57 | scope = kwargs.pop('scope', None) 58 | activation_fn = kwargs.pop('activation_fn', None) 59 | with tf.variable_scope(scope): 60 | return activation_fn(batch_norm(conv2d(*args, **kwargs))) 61 | 62 | 63 | def convrelu(*args, **kwargs): 64 | scope = kwargs.pop('scope', None) 65 | with tf.variable_scope(scope): 66 | return relu(conv2d(*args, **kwargs)) 67 | 68 | 69 | def convrelu0(*args, **kwargs): 70 | scope = kwargs.pop('scope', None) 71 | kwargs['biases_initializer'] = tf.zeros_initializer() 72 | with tf.variable_scope(scope): 73 | return relu(conv2d(*args, **kwargs)) 74 | 75 | 76 | def convbnrelu(*args, **kwargs): 77 | scope = kwargs.pop('scope', None) 78 | with tf.variable_scope(scope): 79 | return relu(batch_norm(conv2d(*args, **kwargs))) 80 | 81 | 82 | def convbnrelu6(*args, **kwargs): 83 | scope = kwargs.pop('scope', None) 84 | with tf.variable_scope(scope): 85 | return relu6(batch_norm(conv2d(*args, **kwargs))) 86 | 87 | 88 | def convbnswish(*args, **kwargs): 89 | scope = kwargs.pop('scope', None) 90 | with tf.variable_scope(scope): 91 | return swish(batch_norm(conv2d(*args, **kwargs))) 92 | 93 | 94 | def gconvbn(*args, **kwargs): 95 | scope = kwargs.pop('scope', None) 96 | with tf.variable_scope(scope): 97 | x = sconv2d(*args, **kwargs) 98 | c = args[-1] 99 | infilters = int(x.shape[-1]) if tf_later_than('2') else x.shape[-1].value 100 | f = infilters // c 101 | g = f // c 102 | kernel = np.zeros((1, 1, f * c, f), np.float32) 103 | for i in range(f): 104 | start = (i // c) * c * c + i % c 105 | end = start + c * c 106 | kernel[:, :, start:end:c, i] = 1. 107 | x = conv2d_primitive(x, tf.constant(kernel), strides=[1, 1, 1, 1], 108 | padding='VALID', name='gconv') 109 | return batch_norm(x) 110 | 111 | 112 | def sconvbn(*args, **kwargs): 113 | scope = kwargs.pop('scope', None) 114 | with tf.variable_scope(scope): 115 | return batch_norm(sconv2d(*args, **kwargs)) 116 | 117 | 118 | def sconvbnact(*args, **kwargs): 119 | scope = kwargs.pop('scope', None) 120 | activation_fn = kwargs.pop('activation_fn', None) 121 | with tf.variable_scope(scope): 122 | return activation_fn(batch_norm(sconv2d(*args, **kwargs))) 123 | 124 | 125 | def sconvbnrelu(*args, **kwargs): 126 | scope = kwargs.pop('scope', None) 127 | with tf.variable_scope(scope): 128 | return relu(batch_norm(sconv2d(*args, **kwargs))) 129 | 130 | 131 | def sconvbnrelu6(*args, **kwargs): 132 | scope = kwargs.pop('scope', None) 133 | with tf.variable_scope(scope): 134 | return relu6(batch_norm(sconv2d(*args, **kwargs))) 135 | 136 | 137 | def sconvbnswish(*args, **kwargs): 138 | scope = kwargs.pop('scope', None) 139 | with tf.variable_scope(scope): 140 | return swish(batch_norm(sconv2d(*args, **kwargs))) 141 | 142 | 143 | def darkconv(*args, **kwargs): 144 | scope = kwargs.pop('scope', None) 145 | onlyconv = kwargs.pop('onlyconv', False) 146 | with tf.variable_scope(scope): 147 | conv_kwargs = { 148 | 'padding': 'SAME', 149 | 'activation_fn': None, 150 | 'weights_initializer': variance_scaling_initializer(1.53846), 151 | 'weights_regularizer': l2(5e-4), 152 | 'biases_initializer': None, 153 | 'scope': 'conv'} 154 | if onlyconv: 155 | conv_kwargs.pop('biases_initializer') 156 | with arg_scope([conv2d], **conv_kwargs): 157 | x = conv2d(*args, **kwargs) 158 | if onlyconv: return x 159 | x = batch_norm(x, decay=0.99, center=False, scale=True, 160 | epsilon=1e-5, scope='bn') 161 | x = bias_add(x, scope='bias') 162 | x = leaky_relu(x, alpha=0.1, name='lrelu') 163 | return x 164 | 165 | 166 | remove_commons(__name__) 167 | -------------------------------------------------------------------------------- /tensornets/nasnets.py: -------------------------------------------------------------------------------- 1 | """Collection of NASNet variants 2 | 3 | The reference papers: 4 | 5 | 1. Original (a.k.a. NASNet) 6 | - Learning Transferable Architectures for Scalable Image Recognition, CVPR 2018 (arXiv 2017) 7 | - Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le 8 | - https://arxiv.org/abs/1707.07012 9 | 2. PNASNet 10 | - Progressive Neural Architecture Search, ECCV 2018 (arXiv 2017) 11 | - Chenxi Liu et al. 12 | - https://arxiv.org/abs/1712.00559 13 | 14 | The reference implementation: 15 | 16 | 1. TF Slim 17 | - https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/{nasnet,pnasnet}.py 18 | """ 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | 22 | import tensorflow as tf 23 | 24 | from .layers import avg_pool2d 25 | from .layers import batch_norm 26 | from .layers import conv2d 27 | from .layers import dropout 28 | from .layers import fc 29 | from .layers import max_pool2d 30 | from .layers import sconv2d 31 | from .layers import convbn as conv 32 | from .layers import sconvbn 33 | 34 | from .ops import * 35 | from .utils import pad_info 36 | from .utils import set_args 37 | from .utils import var_scope 38 | 39 | 40 | def __args__(is_training): 41 | return [([avg_pool2d, max_pool2d], {'padding': 'SAME', 'scope': 'pool'}), 42 | ([batch_norm], {'decay': 0.9997, 'scale': True, 'epsilon': 0.001, 43 | 'is_training': is_training, 'fused': True, 44 | 'scope': 'bn'}), 45 | ([conv2d], {'padding': 'SAME', 'activation_fn': None, 46 | 'biases_initializer': None, 'scope': 'conv'}), 47 | ([dropout], {'is_training': is_training, 'scope': 'dropout'}), 48 | ([fc], {'activation_fn': None, 'scope': 'fc'}), 49 | ([sconv2d], {'padding': 'SAME', 'activation_fn': None, 50 | 'biases_initializer': None, 51 | 'scope': 'sconv'})] 52 | 53 | 54 | @var_scope('sconv') 55 | def sconv(x, filters, kernel_size, stride=1, scope=None): 56 | x = relu(x) 57 | x = sconvbn(x, filters, kernel_size, stride=stride, 58 | depth_multiplier=1, scope='0') 59 | x = relu(x) 60 | x = sconvbn(x, filters, kernel_size, 61 | depth_multiplier=1, scope='1') 62 | return x 63 | 64 | 65 | def nasnet(x, stem_filters, normals, filters, skip_reduction, use_aux, 66 | scaling, is_training, classes, stem, scope=None, reuse=None): 67 | x = conv(x, stem_filters, 3, stride=2, padding='VALID', scope='conv0') 68 | 69 | x, p = reductionA(x, None, filters * scaling ** (-2), scope='stem1') 70 | x, p = reductionA(x, p, filters * scaling ** (-1), scope='stem2') 71 | 72 | for i in range(1, normals + 1): 73 | x, p = normalA(x, p, filters, scope="normal%d" % i) 74 | 75 | x, p0 = reductionA(x, p, filters * scaling, 76 | scope="reduction%d" % normals) 77 | p = p0 if not skip_reduction else p 78 | 79 | for i in range(normals + 1, normals * 2 + 1): 80 | x, p = normalA(x, p, filters * scaling, scope="normal%d" % i) 81 | 82 | if use_aux is True: 83 | a = aux(x, classes, scope='aux') 84 | 85 | x, p0 = reductionA(x, p, filters * scaling ** 2, 86 | scope="reduction%d" % (normals * 2)) 87 | p = p0 if not skip_reduction else p 88 | 89 | for i in range(normals * 2 + 1, normals * 3 + 1): 90 | x, p = normalA(x, p, filters * scaling ** 2, scope="normal%d" % i) 91 | 92 | x = relu(x, name='relu') 93 | if stem: return x 94 | x = reduce_mean(x, [1, 2], name='avgpool') 95 | x = dropout(x, keep_prob=0.5, scope='dropout') 96 | x = fc(x, classes, scope='logits') 97 | x = softmax(x, name='probs') 98 | return x 99 | 100 | 101 | @var_scope('nasnetAlarge') 102 | @set_args(__args__) 103 | def nasnetAlarge(x, is_training=False, classes=1000, 104 | stem=False, scope=None, reuse=None): 105 | return nasnet(x, 96, 6, 168, True, True, 2, 106 | is_training, classes, stem, scope, reuse) 107 | 108 | 109 | @var_scope('nasnetAmobile') 110 | @set_args(__args__) 111 | def nasnetAmobile(x, is_training=False, classes=1000, 112 | stem=False, scope=None, reuse=None): 113 | return nasnet(x, 32, 4, 44, False, True, 2, 114 | is_training, classes, stem, scope, reuse) 115 | 116 | 117 | def pnasnet(x, stem_filters, blocks, filters, 118 | scaling, is_training, classes, stem, scope=None, reuse=None): 119 | x = conv(x, stem_filters, 3, stride=2, padding='VALID', scope='conv0') 120 | 121 | x, p = normalP(x, None, filters * scaling ** (-2), stride=2, scope='stem1') 122 | x, p = normalP(x, p, filters * scaling ** (-1), stride=2, scope='stem2') 123 | 124 | for i in range(1, blocks + 1): 125 | x, p = normalP(x, p, filters, scope="normal%d" % i) 126 | 127 | x, p = normalP(x, p, filters * scaling, stride=2, 128 | scope="reduction%d" % blocks) 129 | 130 | for i in range(blocks + 1, blocks * 2): 131 | x, p = normalP(x, p, filters * scaling, scope="normal%d" % i) 132 | 133 | x, p = normalP(x, p, filters * scaling ** 2, stride=2, 134 | scope="reduction%d" % (blocks * 2 - 1)) 135 | 136 | for i in range(blocks * 2, blocks * 3 - 1): 137 | x, p = normalP(x, p, filters * scaling ** 2, scope="normal%d" % i) 138 | 139 | x = relu(x, name='relu') 140 | if stem: return x 141 | x = reduce_mean(x, [1, 2], name='avgpool') 142 | x = dropout(x, keep_prob=0.5, scope='dropout') 143 | x = fc(x, classes, scope='logits') 144 | x = softmax(x, name='probs') 145 | return x 146 | 147 | 148 | @var_scope('pnasnetlarge') 149 | @set_args(__args__) 150 | def pnasnetlarge(x, is_training=False, classes=1000, 151 | stem=False, scope=None, reuse=None): 152 | return pnasnet(x, 96, 4, 216, 2, is_training, classes, stem, scope, reuse) 153 | 154 | 155 | @var_scope('adjust') 156 | def adjust(p, x, filters, scope=None): 157 | if p is None: 158 | p = x 159 | elif int(p.shape[1]) != int(x.shape[1]): 160 | p = relu(p, name='relu') 161 | p1 = avg_pool2d(p, 1, stride=2, padding='VALID', scope='pool1') 162 | p1 = conv2d(p1, int(filters / 2), 1, scope='conv1') 163 | p2 = pad(p, [[0, 0], [0, 1], [0, 1], [0, 0]])[:, 1:, 1:, :] 164 | p2 = avg_pool2d(p2, 1, stride=2, padding='VALID', scope='pool2') 165 | p2 = conv2d(p2, int(filters / 2), 1, scope='conv2') 166 | p = concat([p1, p2], axis=3, name='concat') 167 | p = batch_norm(p, scope='bn') 168 | elif int(p.shape[-1]) != filters: 169 | p = relu(p, name='relu') 170 | p = conv(p, filters, 1, scope='prev_1x1') 171 | return p 172 | 173 | 174 | @var_scope('normalA') 175 | def normalA(x, p, filters, scope=None): 176 | """Normal cell for NASNet-A (Fig. 4 in the paper)""" 177 | p = adjust(p, x, filters) 178 | 179 | h = relu(x) 180 | h = conv(h, filters, 1, scope='1x1') 181 | 182 | x1 = add(sconv(h, filters, 3, scope='left1'), h, name='add1') 183 | x2 = add(sconv(p, filters, 3, scope='left2'), 184 | sconv(h, filters, 5, scope='right2'), name='add2') 185 | x3 = add(avg_pool2d(h, 3, stride=1, scope='left3'), p, name='add3') 186 | x4 = add(avg_pool2d(p, 3, stride=1, scope='left4'), 187 | avg_pool2d(p, 3, stride=1, scope='right4'), name='add4') 188 | x5 = add(sconv(p, filters, 5, scope='left5'), 189 | sconv(p, filters, 3, scope='right5'), name='add5') 190 | 191 | return concat([p, x2, x5, x3, x4, x1], axis=3, name='concat'), x 192 | 193 | 194 | @var_scope('reductionA') 195 | def reductionA(x, p, filters, scope=None): 196 | """Reduction cell for NASNet-A (Fig. 4 in the paper)""" 197 | filters = int(filters) 198 | p = adjust(p, x, filters) 199 | 200 | h = relu(x) 201 | h = conv(h, filters, 1, scope='1x1') 202 | 203 | x1 = add(sconv(p, filters, 7, stride=2, scope='left1'), 204 | sconv(h, filters, 5, stride=2, scope='right1'), name='add1') 205 | x2 = add(max_pool2d(h, 3, scope='left2'), 206 | sconv(p, filters, 7, stride=2, scope='right2'), name='add2') 207 | x3 = add(avg_pool2d(h, 3, scope='left3'), 208 | sconv(p, filters, 5, stride=2, scope='right3'), name='add3') 209 | x4 = add(max_pool2d(h, 3, scope='left4'), 210 | sconv(x1, filters, 3, scope='right4'), name='add4') 211 | x5 = add(avg_pool2d(x1, 3, stride=1, scope='left5'), x2, name='add5') 212 | 213 | return concat([x2, x3, x5, x4], axis=3, name='concat'), x 214 | 215 | 216 | @var_scope('pool') 217 | def pool(x, filters, stride, scope=None): 218 | y = max_pool2d(x, 3, stride=stride) 219 | infilters = int(x.shape[-1]) if tf_later_than('2') else x.shape[-1].value 220 | if infilters != filters: 221 | y = conv(y, filters, 1, scope='1x1') 222 | return y 223 | 224 | 225 | @var_scope('normalP') 226 | def normalP(x, p, filters, stride=1, scope=None): 227 | filters = int(filters) 228 | p = adjust(p, x, filters) 229 | 230 | h = relu(x) 231 | h = conv(h, filters, 1, scope='1x1') 232 | 233 | x1 = add(sconv(p, filters, 5, stride, scope='left1'), 234 | pool(p, filters, stride, scope='right1'), name='add1') 235 | x2 = add(sconv(h, filters, 7, stride, scope='left2'), 236 | pool(h, filters, stride, scope='right2'), name='add2') 237 | x3 = add(sconv(h, filters, 5, stride, scope='left3'), 238 | sconv(h, filters, 3, stride, scope='right3'), name='add3') 239 | x4 = add(sconv(x3, filters, 3, scope='left4'), 240 | pool(h, filters, stride, scope='right4'), name='add4') 241 | x5 = add( 242 | sconv(p, filters, 3, stride, scope='left5'), 243 | conv(relu(h), filters, 1, stride, scope='right5') if stride > 1 else h, 244 | name='add5') 245 | 246 | return concat([x1, x2, x3, x4, x5], axis=3, name='concat'), x 247 | 248 | 249 | @var_scope('aux') 250 | def aux(x, classes, scope=None): 251 | x = relu(x, name='relu1') 252 | x = avg_pool2d(x, 5, stride=3, padding='VALID', scope='pool') 253 | x = conv(x, 128, 1, scope='proj') 254 | x = relu(x, name='relu2') 255 | x = conv(x, 768, int(x.shape[1]), padding='VALID', scope='conv') 256 | x = relu(x, name='relu3') 257 | x = squeeze(x, [1, 2], name='squeeze') 258 | x = fc(x, classes, scope='logits') 259 | x = softmax(x, name='probs') 260 | return x 261 | 262 | 263 | # Simple alias. 264 | NASNetAlarge = nasnetAlarge 265 | NASNetAmobile = nasnetAmobile 266 | PNASNetlarge = pnasnetlarge 267 | -------------------------------------------------------------------------------- /tensornets/ops.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import os 4 | import tensorflow as tf 5 | 6 | from .utils import ops_to_outputs 7 | from .version_utils import tf_later_than 8 | 9 | 10 | if tf_later_than('1.14'): 11 | tf = tf.compat.v1 12 | 13 | 14 | try: 15 | reduce 16 | except NameError: 17 | from functools import reduce 18 | 19 | 20 | if tf_later_than('1.6'): 21 | # Note that `tf.nn.leaky_relu` has existed since 1.4.0, 22 | # but 1.4.0, 1.4.1, 1.5.0, 1.5.1 do not support float16. 23 | _leaky_relu = tf.nn.leaky_relu 24 | else: 25 | def _leaky_relu(x, alpha=0.2, name=None): 26 | return tf.add(tf.nn.relu(x), -alpha * tf.nn.relu(-x), name=name) 27 | 28 | 29 | TNETS_CUSTOM_SWISH = os.environ.get('TNETS_CUSTOM_SWISH', 'false').lower() 30 | if TNETS_CUSTOM_SWISH != 'true': 31 | if TNETS_CUSTOM_SWISH != '1': 32 | TNETS_CUSTOM_SWISH = 'false' 33 | else: 34 | TNETS_CUSTOM_SWISH = 'true' 35 | 36 | if tf_later_than('1.5') and not (TNETS_CUSTOM_SWISH == 'true'): 37 | # Note that `tf.nn.swish` has existed since 1.5.0. 38 | _swish = tf.nn.swish 39 | else: 40 | def _swish(x, name=None): 41 | return tf.multiply(x, tf.sigmoid(x), name=name) 42 | 43 | 44 | if tf_later_than('1.5'): 45 | # Note that `tf.reduce_mean` has existed since 1.0, 46 | # but the parameter name `keep_dims` has been changed to `keepdims`. 47 | _reduce_mean = tf.reduce_mean 48 | else: 49 | def _reduce_mean(input_tensor, axis=None, keepdims=False, name=None): 50 | return tf.reduce_mean(input_tensor, axis=axis, keep_dims=keepdims, 51 | name=name) 52 | 53 | 54 | def _hard_sigmoid(x, name=None): 55 | return tf.divide(tf.nn.relu6(x + 3.), 6., name=name) 56 | 57 | 58 | def _hard_swish(x, name=None): 59 | return tf.multiply(x, tf.nn.relu6(x + 3.) / 6., name=name) 60 | 61 | 62 | argmax = ops_to_outputs(tf.argmax) 63 | add = ops_to_outputs(tf.add) 64 | concat = ops_to_outputs(tf.concat) 65 | conv2d_primitive = ops_to_outputs(tf.nn.conv2d) 66 | expand_dims = ops_to_outputs(tf.expand_dims) 67 | gather = ops_to_outputs(tf.gather) 68 | hard_sigmoid = ops_to_outputs(_hard_sigmoid) 69 | hard_swish = ops_to_outputs(_hard_swish) 70 | leaky_relu = ops_to_outputs(_leaky_relu) 71 | lrn = ops_to_outputs(tf.nn.lrn) 72 | maximum = ops_to_outputs(tf.maximum) 73 | multiply = ops_to_outputs(tf.multiply) 74 | one_hot = ops_to_outputs(tf.one_hot) 75 | pad = ops_to_outputs(tf.pad) 76 | reduce_max = ops_to_outputs(tf.reduce_max) 77 | reduce_mean = ops_to_outputs(_reduce_mean) 78 | reduce_sum = ops_to_outputs(tf.reduce_sum) 79 | relu = ops_to_outputs(tf.nn.relu) 80 | relu6 = ops_to_outputs(tf.nn.relu6) 81 | reshape = ops_to_outputs(tf.reshape) 82 | sigmoid = ops_to_outputs(tf.sigmoid) 83 | softmax = ops_to_outputs(tf.nn.softmax) 84 | sqrt = ops_to_outputs(tf.sqrt) 85 | square = ops_to_outputs(tf.square) 86 | squeeze = ops_to_outputs(tf.squeeze) 87 | stack = ops_to_outputs(tf.stack) 88 | swish = ops_to_outputs(_swish) 89 | tanh = ops_to_outputs(tf.tanh) 90 | to_int32 = ops_to_outputs(tf.to_int32) 91 | 92 | 93 | @ops_to_outputs 94 | def srn(x, depth_radius, alpha=1.0, beta=0.5, name=None): 95 | # Refer to the following code snippet 96 | # https://github.com/tensorflow/tensorflow/issues/1246#issuecomment-188588051 97 | squared_sum = tf.nn.depthwise_conv2d( 98 | tf.square(x), 99 | tf.ones([depth_radius] * 2 + [tf.shape(x)[3], 1], dtype=tf.float32), 100 | [1, 1, 1, 1], 101 | 'SAME') 102 | alpha = tf.constant(alpha / (depth_radius ** 2), dtype=tf.float32) 103 | beta = tf.constant(beta, dtype=tf.float32) 104 | return tf.divide(x, (1.0 + alpha * squared_sum) ** beta, name=name) 105 | 106 | 107 | @ops_to_outputs 108 | def upsample(x, stride, name=None): 109 | if isinstance(stride, int): 110 | stride = (stride, stride) 111 | assert isinstance(stride, tuple) 112 | b = tf.shape(x)[0] 113 | h = tf.shape(x)[1] * stride[0] 114 | w = tf.shape(x)[2] * stride[1] 115 | c = int(x.shape[-1]) if tf_later_than('2') else x.shape[-1].value 116 | x = tf.expand_dims(x, 2) 117 | x = tf.expand_dims(x, 4) 118 | x = tf.tile(x, (1, 1, stride[0], 1, stride[1], 1)) 119 | return tf.reshape(x, (b, h, w, c), name=name) 120 | 121 | 122 | @ops_to_outputs 123 | def local_flatten(x, kernel_size, name=None): 124 | if isinstance(kernel_size, int): 125 | kernel_size = (kernel_size, kernel_size) 126 | assert isinstance(kernel_size, tuple) 127 | x = [[tf.strided_slice(x, (0, i, j), tf.shape(x)[:-1], (1,) + kernel_size) 128 | for j in range(kernel_size[1])] for i in range(kernel_size[0])] 129 | return tf.concat(reduce(lambda x, y: x + y, x), axis=-1, name=name) 130 | -------------------------------------------------------------------------------- /tensornets/preprocess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | try: 4 | import cv2 5 | except ImportError: 6 | cv2 = None 7 | 8 | 9 | def preprocess(scopes, inputs): 10 | import warnings 11 | from .utils import parse_scopes 12 | if not isinstance(scopes, list): 13 | scopes = [scopes] 14 | outputs = [] 15 | for scope in scopes: 16 | model_name = parse_scopes(scope)[0] 17 | try: 18 | outputs.append(__preprocess_dict__[model_name](inputs)) 19 | except KeyError: 20 | found = False 21 | for (key, fun) in __preprocess_dict__.items(): 22 | if key in model_name.lower(): 23 | found = True 24 | outputs.append(fun(inputs)) 25 | break 26 | if not found: 27 | warnings.warn('No pre-processing will be performed ' 28 | 'because the pre-processing for ' + 29 | model_name + ' are not found.') 30 | outputs.append(inputs) 31 | if len(outputs) == 1: 32 | outputs = outputs[0] 33 | return outputs 34 | 35 | 36 | def direct(model_name, target_size): 37 | if 'yolo' in model_name.lower(): 38 | def _direct(inputs): 39 | return __preprocess_dict__[model_name](inputs, target_size) 40 | else: 41 | def _direct(inputs): 42 | return __preprocess_dict__[model_name](inputs) 43 | return _direct 44 | 45 | 46 | def bair_preprocess(x): 47 | # Refer to the following BAIR Caffe Model Zoo 48 | # https://github.com/BVLC/caffe/blob/master/models/bvlc_googlenet/train_val.prototxt 49 | x = x.copy() 50 | x = x[:, :, :, ::-1] 51 | x[:, :, :, 0] -= 104. 52 | x[:, :, :, 1] -= 117. 53 | x[:, :, :, 2] -= 123. 54 | return x 55 | 56 | 57 | def tfslim_preprocess(x): 58 | # Copied from keras (equivalent to the same as in TF Slim) 59 | x = x.copy() 60 | x /= 255. 61 | x -= 0.5 62 | x *= 2. 63 | return x 64 | 65 | 66 | def keras_resnet_preprocess(x): 67 | # Copied from keras 68 | x = x.copy() 69 | x = x[:, :, :, ::-1] 70 | x[:, :, :, 0] -= 103.939 71 | x[:, :, :, 1] -= 116.779 72 | x[:, :, :, 2] -= 123.68 73 | return x 74 | 75 | 76 | def fb_preprocess(x): 77 | # Refer to the following Torch ResNets 78 | # https://github.com/facebook/fb.resnet.torch/blob/master/pretrained/classify.lua 79 | x = x.copy() 80 | x /= 255. 81 | x[:, :, :, 0] -= 0.485 82 | x[:, :, :, 1] -= 0.456 83 | x[:, :, :, 2] -= 0.406 84 | x[:, :, :, 0] /= 0.229 85 | x[:, :, :, 1] /= 0.224 86 | x[:, :, :, 2] /= 0.225 87 | return x 88 | 89 | 90 | def wrn_preprocess(x): 91 | # Refer to the following Torch WideResNets 92 | # https://github.com/szagoruyko/wide-residual-networks/blob/master/pytorch/main.py 93 | x = x.copy() 94 | x /= 255. 95 | x[:, :, :, 0] -= 0.491 96 | x[:, :, :, 1] -= 0.482 97 | x[:, :, :, 2] -= 0.447 98 | x[:, :, :, 0] /= 0.247 99 | x[:, :, :, 1] /= 0.244 100 | x[:, :, :, 2] /= 0.262 101 | return x 102 | 103 | 104 | def darknet_preprocess(x, target_size=None): 105 | # Refer to the following darkflow 106 | # https://github.com/thtrieu/darkflow/blob/master/darkflow/net/yolo/predict.py 107 | if target_size is None or target_size[0] is None or target_size[1] is None: 108 | y = x.copy() 109 | else: 110 | h, w = target_size 111 | assert cv2 is not None, 'resizing requires `cv2`.' 112 | y = np.zeros((len(x), h, w, x.shape[3])) 113 | for i in range(len(x)): 114 | y[i] = cv2.resize(x[i], (w, h), interpolation=cv2.INTER_CUBIC) 115 | y = y[:, :, :, ::-1] 116 | y /= 255. 117 | return y 118 | 119 | 120 | def faster_rcnn_preprocess(x): 121 | # Refer to the following py-faster-rcnn 122 | # https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/test.py#L22 123 | # https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/fast_rcnn/config.py#L181 124 | y = x.copy() 125 | y[:, :, :, 0] -= 102.9801 126 | y[:, :, :, 1] -= 115.9465 127 | y[:, :, :, 2] -= 122.7717 128 | return y 129 | 130 | 131 | # Dictionary for pre-processing functions. 132 | __preprocess_dict__ = { 133 | 'inception': tfslim_preprocess, 134 | 'inception1': bair_preprocess, 135 | 'inception2': tfslim_preprocess, 136 | 'inception3': tfslim_preprocess, 137 | 'inception4': tfslim_preprocess, 138 | 'inceptionresnet2_tfslim': tfslim_preprocess, 139 | 'resnet': keras_resnet_preprocess, 140 | 'resnet50': keras_resnet_preprocess, 141 | 'resnet101': keras_resnet_preprocess, 142 | 'resnet152': keras_resnet_preprocess, 143 | 'resnetv2': tfslim_preprocess, 144 | 'resnet50v2': tfslim_preprocess, 145 | 'resnet101v2': tfslim_preprocess, 146 | 'resnet152v2': tfslim_preprocess, 147 | 'resnet200v2': fb_preprocess, 148 | 'resnext': fb_preprocess, 149 | 'resnext50': fb_preprocess, 150 | 'resnext101': fb_preprocess, 151 | 'resnext50c32': fb_preprocess, 152 | 'resnext101c32': fb_preprocess, 153 | 'resnext101c64': fb_preprocess, 154 | 'wideresnet50': wrn_preprocess, 155 | 'nasnetAlarge': tfslim_preprocess, 156 | 'nasnetAmobile': tfslim_preprocess, 157 | 'pnasnetlarge': tfslim_preprocess, 158 | 'vgg16': keras_resnet_preprocess, 159 | 'vgg19': keras_resnet_preprocess, 160 | 'densenet': fb_preprocess, 161 | 'densenet121': fb_preprocess, 162 | 'densenet169': fb_preprocess, 163 | 'densenet201': fb_preprocess, 164 | 'mobilenet': tfslim_preprocess, 165 | 'mobilenet25': tfslim_preprocess, 166 | 'mobilenet50': tfslim_preprocess, 167 | 'mobilenet75': tfslim_preprocess, 168 | 'mobilenet100': tfslim_preprocess, 169 | 'mobilenetv2': tfslim_preprocess, 170 | 'mobilenet35v2': tfslim_preprocess, 171 | 'mobilenet50v2': tfslim_preprocess, 172 | 'mobilenet75v2': tfslim_preprocess, 173 | 'mobilenet100v2': tfslim_preprocess, 174 | 'mobilenet130v2': tfslim_preprocess, 175 | 'mobilenet140v2': tfslim_preprocess, 176 | 'mobilenet75v3large': tfslim_preprocess, 177 | 'mobilenet100v3large': tfslim_preprocess, 178 | 'mobilenet100v3largemini': tfslim_preprocess, 179 | 'mobilenet75v3small': tfslim_preprocess, 180 | 'mobilenet100v3small': tfslim_preprocess, 181 | 'mobilenet100v3smallmini': tfslim_preprocess, 182 | 'efficientnet': fb_preprocess, 183 | 'efficientnetb0': fb_preprocess, 184 | 'efficientnetb1': fb_preprocess, 185 | 'efficientnetb2': fb_preprocess, 186 | 'efficientnetb3': fb_preprocess, 187 | 'efficientnetb4': fb_preprocess, 188 | 'efficientnetb5': fb_preprocess, 189 | 'efficientnetb6': fb_preprocess, 190 | 'efficientnetb7': fb_preprocess, 191 | 'squeezenet': bair_preprocess, 192 | 'zf': faster_rcnn_preprocess, 193 | 'darknet19': darknet_preprocess, 194 | 'tinydarknet19': darknet_preprocess, 195 | 'REFyolov3coco': darknet_preprocess, 196 | 'REFyolov3voc': darknet_preprocess, 197 | 'REFyolov2coco': darknet_preprocess, 198 | 'REFyolov2voc': darknet_preprocess, 199 | 'REFtinyyolov2voc': darknet_preprocess, 200 | 'REFfasterrcnnZFvoc': faster_rcnn_preprocess, 201 | 'REFfasterrcnnVGG16voc': faster_rcnn_preprocess, 202 | 'genYOLOv2': darknet_preprocess, 203 | 'genTinyYOLOv2': darknet_preprocess, 204 | 'genFasterRCNN': faster_rcnn_preprocess, 205 | } 206 | -------------------------------------------------------------------------------- /tensornets/references/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .yolos import YOLOv3COCO 4 | from .yolos import YOLOv3VOC 5 | from .yolos import YOLOv2COCO 6 | from .yolos import YOLOv2VOC 7 | from .yolos import TinyYOLOv2COCO 8 | from .yolos import TinyYOLOv2VOC 9 | 10 | from .rcnns import FasterRCNN_ZF_VOC 11 | from .rcnns import FasterRCNN_VGG16_VOC 12 | -------------------------------------------------------------------------------- /tensornets/references/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /tensornets/references/darkflow_utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Collection of darkflow utils 2 | 3 | The codes were copied without modification from the original darkflow 4 | (https://github.com/thtrieu/darkflow), and each module was from the following: 5 | 6 | 1. nms 7 | - ${darkflow}/darkflow/cython_utils/nms.pyx 8 | 2. get_boxes 9 | - ${darkflow}/darkflow/cython_utils/cy_yolo2_findboxes.pyx 10 | 11 | Additionally, `yolov3_box` was adapted from `yolov2_box` by taehoonlee. 12 | """ 13 | from __future__ import absolute_import 14 | 15 | try: 16 | from . import get_boxes 17 | except ImportError: 18 | class emptyboxes: 19 | yolov3_box = None 20 | yolov2_box = None 21 | get_boxes = emptyboxes() 22 | -------------------------------------------------------------------------------- /tensornets/references/darkflow_utils/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class BoundBox: 5 | def __init__(self, classes): 6 | self.x, self.y = float(), float() 7 | self.w, self.h = float(), float() 8 | self.c = float() 9 | self.class_num = classes 10 | self.probs = np.zeros((classes,)) 11 | 12 | 13 | def overlap(x1, w1, x2, w2): 14 | l1 = x1 - w1 / 2. 15 | l2 = x2 - w2 / 2. 16 | left = max(l1, l2) 17 | r1 = x1 + w1 / 2. 18 | r2 = x2 + w2 / 2. 19 | right = min(r1, r2) 20 | return right - left 21 | 22 | 23 | def box_intersection(a, b): 24 | w = overlap(a.x, a.w, b.x, b.w) 25 | h = overlap(a.y, a.h, b.y, b.h) 26 | if w < 0 or h < 0: return 0 27 | area = w * h 28 | return area 29 | 30 | 31 | def box_union(a, b): 32 | i = box_intersection(a, b) 33 | u = a.w * a.h + b.w * b.h - i 34 | return u 35 | 36 | 37 | def box_iou(a, b): 38 | return box_intersection(a, b) / box_union(a, b) 39 | 40 | 41 | def prob_compare(box): 42 | return box.probs[box.class_num] 43 | 44 | 45 | def prob_compare2(boxa, boxb): 46 | if (boxa.pi < boxb.pi): 47 | return 1 48 | elif(boxa.pi == boxb.pi): 49 | return 0 50 | else: 51 | return -1 52 | -------------------------------------------------------------------------------- /tensornets/references/darkflow_utils/get_boxes.pyx: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | cimport cython 6 | ctypedef np.float_t DTYPE_t 7 | from libc.math cimport exp 8 | from libc.math cimport pow 9 | from .box import BoundBox 10 | from .nms cimport NMS 11 | 12 | #expit 13 | @cython.boundscheck(False) # turn off bounds-checking for entire function 14 | @cython.wraparound(False) # turn off negative index wrapping for entire function 15 | @cython.cdivision(True) 16 | cdef float expit_c(float x): 17 | cdef float y= 1/(1+exp(-x)) 18 | return y 19 | 20 | #MAX 21 | @cython.boundscheck(False) # turn off bounds-checking for entire function 22 | @cython.wraparound(False) # turn off negative index wrapping for entire function 23 | @cython.cdivision(True) 24 | cdef float max_c(float a, float b): 25 | if(a>b): 26 | return a 27 | return b 28 | 29 | """ 30 | #SOFTMAX! 31 | @cython.cdivision(True) 32 | @cython.boundscheck(False) # turn off bounds-checking for entire function 33 | @cython.wraparound(False) # turn off negative index wrapping for entire function 34 | cdef void _softmax_c(float* x, int classes): 35 | cdef: 36 | float sum = 0 37 | np.intp_t k 38 | float arr_max = 0 39 | for k in range(classes): 40 | arr_max = max(arr_max,x[k]) 41 | 42 | for k in range(classes): 43 | x[k] = exp(x[k]-arr_max) 44 | sum += x[k] 45 | 46 | for k in range(classes): 47 | x[k] = x[k]/sum 48 | """ 49 | 50 | 51 | 52 | #BOX CONSTRUCTOR 53 | @cython.cdivision(True) 54 | @cython.boundscheck(False) # turn off bounds-checking for entire function 55 | @cython.wraparound(False) # turn off negative index wrapping for entire function 56 | cdef _yolov3_box(meta,np.ndarray[float,ndim=3] net_out_in,scale_idx): 57 | cdef: 58 | np.intp_t H, W, _, C, B, row, col, box_loop, class_loop, anchor_idx 59 | np.intp_t row1, col1, box_loop1,index,index2 60 | float threshold = meta['thresh'] 61 | float tempc,arr_max=0,sum=0 62 | double[:] anchors = np.asarray(meta['anchors']) 63 | list boxes = list() 64 | 65 | H, W = net_out_in.shape[:2] 66 | C = meta['classes'] 67 | B = 3 # meta['num'] 68 | anchor_idx = 6 - 3 * scale_idx 69 | Hin = H * pow(2, 5 - scale_idx) 70 | Win = W * pow(2, 5 - scale_idx) 71 | 72 | cdef: 73 | float[:, :, :, ::1] net_out = net_out_in.reshape([H, W, B, net_out_in.shape[2]/B]) 74 | float[:, :, :, ::1] Classes = net_out[:, :, :, 5:] 75 | float[:, :, :, ::1] Bbox_pred = net_out[:, :, :, :5] 76 | float[:, :, :, ::1] probs = np.zeros((H, W, B, C), dtype=np.float32) 77 | 78 | for row in range(H): 79 | for col in range(W): 80 | for box_loop in range(B): 81 | arr_max=0 82 | sum=0; 83 | Bbox_pred[row, col, box_loop, 4] = expit_c(Bbox_pred[row, col, box_loop, 4]) 84 | Bbox_pred[row, col, box_loop, 0] = (col + expit_c(Bbox_pred[row, col, box_loop, 0])) / W 85 | Bbox_pred[row, col, box_loop, 1] = (row + expit_c(Bbox_pred[row, col, box_loop, 1])) / H 86 | Bbox_pred[row, col, box_loop, 2] = exp(Bbox_pred[row, col, box_loop, 2]) * anchors[2 * (box_loop + anchor_idx) + 0] / Win 87 | Bbox_pred[row, col, box_loop, 3] = exp(Bbox_pred[row, col, box_loop, 3]) * anchors[2 * (box_loop + anchor_idx) + 1] / Hin 88 | #SOFTMAX BLOCK, no more pointer juggling 89 | for class_loop in range(C): 90 | arr_max=max_c(arr_max,Classes[row,col,box_loop,class_loop]) 91 | 92 | for class_loop in range(C): 93 | Classes[row,col,box_loop,class_loop]=exp(Classes[row,col,box_loop,class_loop]-arr_max) 94 | sum+=Classes[row,col,box_loop,class_loop] 95 | 96 | for class_loop in range(C): 97 | tempc = Classes[row, col, box_loop, class_loop] * Bbox_pred[row, col, box_loop, 4]/sum 98 | if(tempc > threshold): 99 | probs[row, col, box_loop, class_loop] = tempc 100 | 101 | 102 | #NMS 103 | return np.ascontiguousarray(probs).reshape(H*W*B,C), np.ascontiguousarray(Bbox_pred).reshape(H*B*W,5) 104 | 105 | 106 | #BOX CONSTRUCTOR 107 | @cython.cdivision(True) 108 | @cython.boundscheck(False) # turn off bounds-checking for entire function 109 | @cython.wraparound(False) # turn off negative index wrapping for entire function 110 | def yolov3_box(meta,np.ndarray[float,ndim=3] out0,np.ndarray[float,ndim=3] out1,np.ndarray[float,ndim=3] out2): 111 | a0, b0 = _yolov3_box(meta, out0, 0) 112 | a1, b1 = _yolov3_box(meta, out1, 1) 113 | a2, b2 = _yolov3_box(meta, out2, 2) 114 | return NMS(np.concatenate([a2, a1, a0], axis=0), np.concatenate([b2, b1, b0], axis=0)) 115 | 116 | 117 | #BOX CONSTRUCTOR 118 | @cython.cdivision(True) 119 | @cython.boundscheck(False) # turn off bounds-checking for entire function 120 | @cython.wraparound(False) # turn off negative index wrapping for entire function 121 | def yolov2_box(meta,np.ndarray[float,ndim=3] net_out_in): 122 | cdef: 123 | np.intp_t H, W, _, C, B, row, col, box_loop, class_loop 124 | np.intp_t row1, col1, box_loop1,index,index2 125 | float threshold = meta['thresh'] 126 | float tempc,arr_max=0,sum=0 127 | double[:] anchors = np.asarray(meta['anchors']) 128 | list boxes = list() 129 | 130 | H, W = net_out_in.shape[:2] 131 | C = meta['classes'] 132 | B = meta['num'] 133 | 134 | cdef: 135 | float[:, :, :, ::1] net_out = net_out_in.reshape([H, W, B, net_out_in.shape[2]/B]) 136 | float[:, :, :, ::1] Classes = net_out[:, :, :, 5:] 137 | float[:, :, :, ::1] Bbox_pred = net_out[:, :, :, :5] 138 | float[:, :, :, ::1] probs = np.zeros((H, W, B, C), dtype=np.float32) 139 | 140 | for row in range(H): 141 | for col in range(W): 142 | for box_loop in range(B): 143 | arr_max=0 144 | sum=0; 145 | Bbox_pred[row, col, box_loop, 4] = expit_c(Bbox_pred[row, col, box_loop, 4]) 146 | Bbox_pred[row, col, box_loop, 0] = (col + expit_c(Bbox_pred[row, col, box_loop, 0])) / W 147 | Bbox_pred[row, col, box_loop, 1] = (row + expit_c(Bbox_pred[row, col, box_loop, 1])) / H 148 | Bbox_pred[row, col, box_loop, 2] = exp(Bbox_pred[row, col, box_loop, 2]) * anchors[2 * box_loop + 0] / W 149 | Bbox_pred[row, col, box_loop, 3] = exp(Bbox_pred[row, col, box_loop, 3]) * anchors[2 * box_loop + 1] / H 150 | #SOFTMAX BLOCK, no more pointer juggling 151 | for class_loop in range(C): 152 | arr_max=max_c(arr_max,Classes[row,col,box_loop,class_loop]) 153 | 154 | for class_loop in range(C): 155 | Classes[row,col,box_loop,class_loop]=exp(Classes[row,col,box_loop,class_loop]-arr_max) 156 | sum+=Classes[row,col,box_loop,class_loop] 157 | 158 | for class_loop in range(C): 159 | tempc = Classes[row, col, box_loop, class_loop] * Bbox_pred[row, col, box_loop, 4]/sum 160 | if(tempc > threshold): 161 | probs[row, col, box_loop, class_loop] = tempc 162 | 163 | 164 | #NMS 165 | return NMS(np.ascontiguousarray(probs).reshape(H*W*B,C), np.ascontiguousarray(Bbox_pred).reshape(H*B*W,5)) 166 | -------------------------------------------------------------------------------- /tensornets/references/darkflow_utils/nms.pxd: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | cimport cython 6 | ctypedef np.float_t DTYPE_t 7 | from libc.math cimport exp 8 | from .box import BoundBox 9 | 10 | 11 | cdef NMS(float[:, ::1] , float[:, ::1] ) 12 | -------------------------------------------------------------------------------- /tensornets/references/darkflow_utils/nms.pyx: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | cimport numpy as np 5 | cimport cython 6 | from libc.math cimport exp 7 | from .box import BoundBox 8 | 9 | 10 | 11 | #OVERLAP 12 | @cython.boundscheck(False) # turn off bounds-checking for entire function 13 | @cython.wraparound(False) # turn off negative index wrapping for entire function 14 | @cython.cdivision(True) 15 | cdef float overlap_c(float x1, float w1 , float x2 , float w2): 16 | cdef: 17 | float l1,l2,left,right 18 | l1 = x1 - w1 /2. 19 | l2 = x2 - w2 /2. 20 | left = max(l1,l2) 21 | r1 = x1 + w1 /2. 22 | r2 = x2 + w2 /2. 23 | right = min(r1, r2) 24 | return right - left; 25 | 26 | #BOX INTERSECTION 27 | @cython.boundscheck(False) # turn off bounds-checking for entire function 28 | @cython.wraparound(False) # turn off negative index wrapping for entire function 29 | @cython.cdivision(True) 30 | cdef float box_intersection_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 31 | cdef: 32 | float w,h,area 33 | w = overlap_c(ax, aw, bx, bw) 34 | h = overlap_c(ay, ah, by, bh) 35 | if w < 0 or h < 0: return 0 36 | area = w * h 37 | return area 38 | 39 | #BOX UNION 40 | @cython.boundscheck(False) # turn off bounds-checking for entire function 41 | @cython.wraparound(False) # turn off negative index wrapping for entire function 42 | @cython.cdivision(True) 43 | cdef float box_union_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 44 | cdef: 45 | float i,u 46 | i = box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) 47 | u = aw * ah + bw * bh -i 48 | return u 49 | 50 | 51 | #BOX IOU 52 | @cython.boundscheck(False) # turn off bounds-checking for entire function 53 | @cython.wraparound(False) # turn off negative index wrapping for entire function 54 | @cython.cdivision(True) 55 | cdef float box_iou_c(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh): 56 | return box_intersection_c(ax, ay, aw, ah, bx, by, bw, bh) / box_union_c(ax, ay, aw, ah, bx, by, bw, bh); 57 | 58 | 59 | 60 | 61 | #NMS 62 | @cython.boundscheck(False) # turn off bounds-checking for entire function 63 | @cython.wraparound(False) # turn off negative index wrapping for entire function 64 | @cython.cdivision(True) 65 | cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox): 66 | cdef list boxes = list() 67 | cdef set indices = set() 68 | cdef: 69 | np.intp_t pred_length,class_length,class_loop,index,index2 70 | 71 | 72 | pred_length = final_bbox.shape[0] 73 | class_length = final_probs.shape[1] 74 | for class_loop in range(class_length): 75 | for index in range(pred_length): 76 | if final_probs[index,class_loop] == 0: continue 77 | for index2 in range(index+1,pred_length): 78 | if final_probs[index2,class_loop] == 0: continue 79 | if index==index2 : continue 80 | if box_iou_c(final_bbox[index,0],final_bbox[index,1],final_bbox[index,2],final_bbox[index,3],final_bbox[index2,0],final_bbox[index2,1],final_bbox[index2,2],final_bbox[index2,3]) >= 0.4: 81 | if final_probs[index2,class_loop] > final_probs[index, class_loop] : 82 | final_probs[index, class_loop] =0 83 | break 84 | final_probs[index2,class_loop]=0 85 | 86 | if index not in indices: 87 | bb=BoundBox(class_length) 88 | bb.x = final_bbox[index, 0] 89 | bb.y = final_bbox[index, 1] 90 | bb.w = final_bbox[index, 2] 91 | bb.h = final_bbox[index, 3] 92 | bb.c = final_bbox[index, 4] 93 | bb.probs = np.asarray(final_probs[index,:]) 94 | boxes.append(bb) 95 | indices.add(index) 96 | return boxes 97 | 98 | # cdef NMS(float[:, ::1] final_probs , float[:, ::1] final_bbox): 99 | # cdef list boxes = list() 100 | # cdef: 101 | # np.intp_t pred_length,class_length,class_loop,index,index2, i, j 102 | 103 | 104 | # pred_length = final_bbox.shape[0] 105 | # class_length = final_probs.shape[1] 106 | 107 | # for class_loop in range(class_length): 108 | # order = np.argsort(final_probs[:,class_loop])[::-1] 109 | # # First box 110 | # for i in range(pred_length): 111 | # index = order[i] 112 | # if final_probs[index, class_loop] == 0.: 113 | # continue 114 | # # Second box 115 | # for j in range(i+1, pred_length): 116 | # index2 = order[j] 117 | # if box_iou_c( 118 | # final_bbox[index,0],final_bbox[index,1], 119 | # final_bbox[index,2],final_bbox[index,3], 120 | # final_bbox[index2,0],final_bbox[index2,1], 121 | # final_bbox[index2,2],final_bbox[index2,3]) >= 0.4: 122 | # final_probs[index2, class_loop] = 0. 123 | 124 | # bb = BoundBox(class_length) 125 | # bb.x = final_bbox[index, 0] 126 | # bb.y = final_bbox[index, 1] 127 | # bb.w = final_bbox[index, 2] 128 | # bb.h = final_bbox[index, 3] 129 | # bb.c = final_bbox[index, 4] 130 | # bb.probs = np.asarray(final_probs[index,:]) 131 | # boxes.append(bb) 132 | 133 | # return boxes 134 | -------------------------------------------------------------------------------- /tensornets/references/rcnns.py: -------------------------------------------------------------------------------- 1 | """Collection of RCNN variants 2 | 3 | The reference paper: 4 | 5 | - Faster R-CNN: Towards Real-Time Object Detection 6 | with Region Proposal Networks, NIPS 2015 7 | - Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun 8 | - https://arxiv.org/abs/1506.01497 9 | 10 | The reference implementation: 11 | 12 | 1. Caffe and Python utils 13 | - https://github.com/rbgirshick/py-faster-rcnn 14 | 2. RoI pooling in TensorFlow 15 | - https://github.com/deepsense-ai/roi-pooling 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | 20 | import tensorflow as tf 21 | 22 | from ..layers import conv2d 23 | from ..layers import dropout 24 | from ..layers import flatten 25 | from ..layers import fc 26 | from ..layers import max_pool2d 27 | from ..layers import convrelu as conv 28 | 29 | from ..ops import * 30 | from ..utils import pad_info 31 | from ..utils import set_args 32 | from ..utils import var_scope 33 | 34 | from .rpn_utils import filter_boxes 35 | from .rpn_utils import get_anchors 36 | from .rpn_utils import get_boxes 37 | from .rpn_utils import get_shifts 38 | from .rpn_utils import inv_boxes 39 | from .rpn_utils import nms 40 | from .rpn_utils import roi_pooling 41 | 42 | 43 | def __args__(is_training): 44 | return [([conv2d], {'padding': 'SAME', 'activation_fn': None, 45 | 'scope': 'conv'}), 46 | ([dropout], {'is_training': is_training}), 47 | ([fc], {'activation_fn': None, 'scope': 'fc'}), 48 | ([max_pool2d], {'scope': 'pool'})] 49 | 50 | 51 | @var_scope('stack') 52 | def _stack(x, filters, blocks, pool_fn=max_pool2d, scope=None): 53 | for i in range(1, blocks+1): 54 | x = conv(x, filters, 3, scope=str(i)) 55 | if pool_fn is not None: 56 | x = pool_fn(x, 2, stride=2) 57 | return x 58 | 59 | 60 | @var_scope('rp_net') 61 | def rp_net(x, filters, original_height, original_width, scales, 62 | anchors=9, feat_stride=16, 63 | nms_thresh=0.7, # NMS threshold used on RPN proposals 64 | pre_nms_topN=6000, # Number of top scoring boxes to keep before NMS 65 | post_nms_topN=300, # Number of top scoring boxes to keep after NMS 66 | min_size=16, # Minimum of box sizes at original scale 67 | scope=None): 68 | x = conv(x, filters, 3, padding='SAME', scope='0') 69 | 70 | height = tf.shape(x)[1] 71 | width = tf.shape(x)[2] 72 | 73 | x1 = conv2d(x, 2 * anchors, 1, scope='logits') 74 | x1 = tf.reshape(x1, (-1, height, width, 2, anchors)) 75 | x1 = tf.nn.softmax(x1, dim=3) 76 | x1 = reshape(x1, (-1, height, width, 2 * anchors), name='probs') 77 | 78 | x2 = conv2d(x, 4 * anchors, 1, scope='boxes') 79 | 80 | # Force the following operations to use CPU 81 | # Note that inference time may increase up to 10x without this designation 82 | with tf.device('cpu:0'): 83 | # Enumerate all shifts 84 | shifts = get_shifts(width, height, feat_stride) 85 | 86 | # Enumerate all shifted anchors 87 | shifted_anchors = tf.expand_dims(get_anchors(), 0) + \ 88 | tf.expand_dims(shifts, 1) 89 | shifted_anchors = tf.reshape(shifted_anchors, (-1, 4)) 90 | 91 | # Same story for the scores 92 | scores = tf.reshape(x1[:, :, :, anchors:], 93 | (-1, height * width * anchors)) 94 | bbox_deltas = tf.reshape(x2, (-1, height * width * anchors, 4)) 95 | 96 | # Convert anchors into proposals via bbox transformations 97 | # 2. clip predicted boxes to image 98 | proposals = inv_boxes(shifted_anchors, bbox_deltas, 99 | original_height, original_width) 100 | 101 | # 3. remove predicted boxes with either height or width < threshold 102 | # (NOTE: convert min_size to input image scale stored in im_info[2]) 103 | keep = filter_boxes(proposals, min_size * scales[0]) 104 | scores = gather(scores, keep, axis=1, name='filtered/probs') 105 | proposals = gather(proposals, keep, axis=1, name='filtered/boxes') 106 | 107 | # 4. sort all (proposal, score) pairs by score from highest to lowest 108 | # 5. take top pre_nms_topN (e.g. 6000) 109 | _, order = tf.nn.top_k(scores[0], k=tf.shape(scores)[1]) 110 | order = order[:pre_nms_topN] 111 | scores = gather(scores, order, axis=1, name='topk/probs') 112 | proposals = gather(proposals, order, axis=1, name='topk/boxes') 113 | 114 | # 6. apply nms (e.g. threshold = 0.7) 115 | # 7. take after_nms_topN (e.g. 300) 116 | # 8. return the top proposals (-> RoIs top) 117 | keep = nms(proposals[0], scores[0], nms_thresh) 118 | keep = keep[:post_nms_topN] 119 | scores = gather(scores, keep, axis=1, name='nms/probs') 120 | proposals = gather(proposals, keep, axis=1, name='nms/boxes') 121 | 122 | return proposals 123 | 124 | 125 | @var_scope('roi_pool') 126 | def roi_pool2d(x, kernel_size, rois, spatial_scale=0.0625, scope=None): 127 | rois = tf.cast(tf.round(rois * spatial_scale), dtype=tf.int32) 128 | rois = tf.pad(rois[0], [[0, 0], [1, 0]]) 129 | return roi_pooling(x, rois, kernel_size, kernel_size) 130 | 131 | 132 | def rcnn(x, stem_fn, roi_pool_fn, is_training, classes, 133 | scope=None, reuse=None): 134 | x = stem_fn(x) 135 | x, rois = roi_pool_fn(x) 136 | x = flatten(x) 137 | x = fc(x, 4096, scope='fc6') 138 | x = relu(x, name='relu6') 139 | x = dropout(x, keep_prob=0.5, scope='drop6') 140 | x = fc(x, 4096, scope='fc7') 141 | x = relu(x, name='relu7') 142 | x = dropout(x, keep_prob=0.5, scope='drop7') 143 | x = concat([softmax(fc(x, classes, scope='logits'), name='probs'), 144 | fc(x, 4 * classes, scope='boxes'), 145 | rois], axis=1, name='out') 146 | x.get_boxes = get_boxes 147 | return x 148 | 149 | 150 | @var_scope('REFfasterrcnnZFvoc') 151 | @set_args(__args__) 152 | def faster_rcnn_zf_voc(x, is_training=False, classes=21, 153 | scope=None, reuse=None): 154 | scales = tf.placeholder(tf.float32, [None]) 155 | height = tf.cast(tf.shape(x)[1], dtype=tf.float32) 156 | width = tf.cast(tf.shape(x)[2], dtype=tf.float32) 157 | 158 | def stem_fn(x): 159 | x = pad(x, pad_info(7), name='pad1') 160 | x = conv(x, 96, 7, stride=2, padding='VALID', scope='conv1') 161 | x = srn(x, depth_radius=3, alpha=0.00005, beta=0.75, name='srn1') 162 | x = pad(x, pad_info(3, symmetry=False), name='pad2') 163 | x = max_pool2d(x, 3, stride=2, padding='VALID', scope='pool1') 164 | 165 | x = pad(x, pad_info(5), name='pad3') 166 | x = conv(x, 256, 5, stride=2, padding='VALID', scope='conv2') 167 | x = srn(x, depth_radius=3, alpha=0.00005, beta=0.75, name='srn2') 168 | x = pad(x, pad_info(3, symmetry=False), name='pad4') 169 | x = max_pool2d(x, 3, stride=2, padding='VALID', scope='pool2') 170 | 171 | x = conv(x, 384, 3, scope='conv3') 172 | x = conv(x, 384, 3, scope='conv4') 173 | x = conv(x, 256, 3, scope='conv5') 174 | return x 175 | 176 | def roi_pool_fn(x): 177 | rois = rp_net(x, 256, height, width, scales) 178 | x = roi_pool2d(x, 6, rois) 179 | return x, rois[0] / scales 180 | 181 | x = rcnn(x, stem_fn, roi_pool_fn, is_training, classes, scope, reuse) 182 | x.scales = scales 183 | return x 184 | 185 | 186 | @var_scope('REFfasterrcnnVGG16voc') 187 | @set_args(__args__) 188 | def faster_rcnn_vgg16_voc(x, is_training=False, classes=21, 189 | scope=None, reuse=None): 190 | scales = tf.placeholder(tf.float32, [None]) 191 | height = tf.cast(tf.shape(x)[1], dtype=tf.float32) 192 | width = tf.cast(tf.shape(x)[2], dtype=tf.float32) 193 | 194 | def stem_fn(x): 195 | x = _stack(x, 64, 2, scope='conv1') 196 | x = _stack(x, 128, 2, scope='conv2') 197 | x = _stack(x, 256, 3, scope='conv3') 198 | x = _stack(x, 512, 3, scope='conv4') 199 | x = _stack(x, 512, 3, pool_fn=None, scope='conv5') 200 | return x 201 | 202 | def roi_pool_fn(x): 203 | rois = rp_net(x, 512, height, width, scales) 204 | x = roi_pool2d(x, 7, rois) 205 | return x, rois[0] / scales 206 | 207 | x = rcnn(x, stem_fn, roi_pool_fn, is_training, classes, scope, reuse) 208 | x.scales = scales 209 | return x 210 | 211 | 212 | # Simple alias. 213 | FasterRCNN_ZF_VOC = faster_rcnn_zf_voc 214 | FasterRCNN_VGG16_VOC = faster_rcnn_vgg16_voc 215 | -------------------------------------------------------------------------------- /tensornets/references/rpn_utils.py: -------------------------------------------------------------------------------- 1 | """Collection of region proposal related utils 2 | 3 | The codes were largely taken from the original py-faster-rcnn 4 | (https://github.com/rbgirshick/py-faster-rcnn), and translated 5 | into TensorFlow. Especially, each part was from the following: 6 | 7 | 1. _whctrs, _mkanchors, _ratio_enum, _scale_enum, get_anchors 8 | - ${py-faster-rcnn}/lib/rpn/generate_anchors.py 9 | 2. inv_boxes, inv_boxes_np 10 | - ${py-faster-rcnn}/lib/fast_rcnn/bbox_transform.py 11 | 3. get_shifts, filter_boxes 12 | - ${py-faster-rcnn}/lib/rpn/proposal_layer.py 13 | 4. nms, nms_np 14 | - ${py-faster-rcnn}/lib/nms/py_cpu_nms.py 15 | 5. get_boxes 16 | - ${py-faster-rcnn}/lib/fast_rcnn/test.py 17 | """ 18 | from __future__ import division 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | 23 | try: 24 | # installation guide: 25 | # $ git clone git@github.com:deepsense-io/roi-pooling.git 26 | # $ cd roi-pooling 27 | # $ vi roi_pooling/Makefile 28 | # (edit according to https://github.com/tensorflow/tensorflow/ 29 | # issues/13607#issuecomment-335530430) 30 | # $ python setup.py install 31 | from roi_pooling.roi_pooling_ops import roi_pooling 32 | except: 33 | def roi_pooling(x, rois, w, h): 34 | raise AssertionError('`roi_pooling` requires deepsense-ai\'s package.') 35 | 36 | try: 37 | xrange # Python 2 38 | except NameError: 39 | xrange = range # Python 3 40 | 41 | 42 | def _whctrs(anchor): 43 | """ 44 | Return width, height, x center, and y center for an anchor (window). 45 | """ 46 | 47 | w = anchor[2] - anchor[0] + 1 48 | h = anchor[3] - anchor[1] + 1 49 | x_ctr = anchor[0] + (w - 1) / 2 50 | y_ctr = anchor[1] + (h - 1) / 2 51 | return w, h, x_ctr, y_ctr 52 | 53 | 54 | def _mkanchors(ws, hs, x_ctr, y_ctr): 55 | """ 56 | Given a vector of widths (ws) and heights (hs) around a center 57 | (x_ctr, y_ctr), output a set of anchors (windows). 58 | """ 59 | 60 | ws = (ws - 1) / 2 61 | hs = (hs - 1) / 2 62 | return tf.stack([ 63 | x_ctr - ws, 64 | y_ctr - hs, 65 | x_ctr + ws, 66 | y_ctr + hs], 67 | axis=-1) 68 | 69 | 70 | def _ratio_enum(anchor, ratios): 71 | """ 72 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 73 | """ 74 | 75 | w, h, x_ctr, y_ctr = _whctrs(anchor) 76 | size = w * h 77 | size_ratios = size / ratios 78 | ws = tf.round(tf.sqrt(size_ratios)) 79 | hs = tf.round(ws * ratios) 80 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 81 | return anchors 82 | 83 | 84 | def _scale_enum(anchor, scales): 85 | """ 86 | Enumerate a set of anchors for each scale wrt an anchor. 87 | """ 88 | 89 | w, h, x_ctr, y_ctr = _whctrs(anchor) 90 | ws = w * scales 91 | hs = h * scales 92 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 93 | return anchors 94 | 95 | 96 | def get_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2**np.arange(3, 6)): 97 | """ 98 | Generate anchor (reference) windows by enumerating aspect ratios X 99 | scales wrt a reference (0, 0, 15, 15) window. 100 | """ 101 | 102 | base_anchor = tf.constant( 103 | [0, 0, base_size - 1, base_size - 1], dtype=tf.float32) 104 | ratio_anchors = _ratio_enum(base_anchor, ratios) 105 | anchors = tf.concat( 106 | [_scale_enum(ratio_anchors[i, :], scales) 107 | for i in xrange(ratio_anchors.shape[0])], 108 | axis=0) 109 | return anchors 110 | 111 | 112 | def get_shifts(width, height, feat_stride): 113 | shift_x = tf.range(width) * feat_stride 114 | shift_y = tf.range(height) * feat_stride 115 | shift_x, shift_y = tf.meshgrid(shift_x, shift_y) 116 | shift_x = tf.reshape(shift_x, (-1,)) 117 | shift_y = tf.reshape(shift_y, (-1,)) 118 | shifts = tf.stack([shift_x, shift_y, shift_x, shift_y], axis=0) 119 | shifts = tf.transpose(shifts) 120 | return tf.cast(shifts, dtype=tf.float32) 121 | 122 | 123 | def inv_boxes(boxes, deltas, height, width): 124 | w = boxes[:, 2] - boxes[:, 0] + 1.0 125 | h = boxes[:, 3] - boxes[:, 1] + 1.0 126 | x = boxes[:, 0] + 0.5 * w 127 | y = boxes[:, 1] + 0.5 * h 128 | 129 | pred_x = deltas[:, :, 0] * w + x 130 | pred_y = deltas[:, :, 1] * h + y 131 | pred_w = tf.exp(deltas[:, :, 2]) * w 132 | pred_h = tf.exp(deltas[:, :, 3]) * h 133 | 134 | x1 = tf.maximum(tf.minimum(pred_x - 0.5 * pred_w, width - 1), 0) 135 | y1 = tf.maximum(tf.minimum(pred_y - 0.5 * pred_h, height - 1), 0) 136 | x2 = tf.maximum(tf.minimum(pred_x + 0.5 * pred_w, width - 1), 0) 137 | y2 = tf.maximum(tf.minimum(pred_y + 0.5 * pred_h, height - 1), 0) 138 | 139 | return tf.stack([x1, y1, x2, y2], axis=-1) 140 | 141 | 142 | def inv_boxes_np(boxes, deltas, im_shape): 143 | w = boxes[:, 2] - boxes[:, 0] + 1 144 | h = boxes[:, 3] - boxes[:, 1] + 1 145 | x = boxes[:, 0] + 0.5 * w 146 | y = boxes[:, 1] + 0.5 * h 147 | 148 | pred_x = deltas[:, 0::4] * w[:, np.newaxis] + x[:, np.newaxis] 149 | pred_y = deltas[:, 1::4] * h[:, np.newaxis] + y[:, np.newaxis] 150 | pred_w = np.exp(deltas[:, 2::4]) * w[:, np.newaxis] 151 | pred_h = np.exp(deltas[:, 3::4]) * h[:, np.newaxis] 152 | 153 | x1 = np.maximum(np.minimum(pred_x - 0.5 * pred_w, im_shape[1] - 1), 0) 154 | y1 = np.maximum(np.minimum(pred_y - 0.5 * pred_h, im_shape[0] - 1), 0) 155 | x2 = np.maximum(np.minimum(pred_x + 0.5 * pred_w, im_shape[1] - 1), 0) 156 | y2 = np.maximum(np.minimum(pred_y + 0.5 * pred_h, im_shape[0] - 1), 0) 157 | 158 | return np.stack([x1, y1, x2, y2], axis=-1) 159 | 160 | 161 | def filter_boxes(boxes, min_size): 162 | """Remove all boxes with any side smaller than min_size.""" 163 | ws = boxes[0, :, 2] - boxes[0, :, 0] + 1 164 | hs = boxes[0, :, 3] - boxes[0, :, 1] + 1 165 | keep = tf.where((ws >= min_size) & (hs >= min_size))[:, 0] 166 | return keep 167 | 168 | 169 | def nms(proposals, scores, thresh): 170 | x1 = proposals[:, 0] 171 | y1 = proposals[:, 1] 172 | x2 = proposals[:, 2] 173 | y2 = proposals[:, 3] 174 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 175 | num = tf.range(tf.shape(scores)[0]) 176 | 177 | def body(i, keep, screen): 178 | xx1 = tf.maximum(x1[i], x1) 179 | yy1 = tf.maximum(y1[i], y1) 180 | xx2 = tf.minimum(x2[i], x2) 181 | yy2 = tf.minimum(y2[i], y2) 182 | 183 | w = tf.maximum(0.0, xx2 - xx1 + 1) 184 | h = tf.maximum(0.0, yy2 - yy1 + 1) 185 | inter = w * h 186 | ovr = inter / (areas[i] + areas - inter) 187 | 188 | bools = (ovr <= thresh) & (num >= i) & (screen) 189 | i = tf.cond(tf.count_nonzero(bools) > 0, 190 | lambda: tf.cast(tf.where(bools)[0, 0], tf.int32), 191 | lambda: tf.shape(scores)[0]) 192 | 193 | return [i, tf.concat([keep, tf.stack([i])], axis=0), bools] 194 | 195 | def condition(i, keep, screen): 196 | return i < tf.shape(scores)[0] 197 | 198 | i = tf.constant(0) 199 | i, keep, screen = tf.while_loop( 200 | condition, body, [i, tf.stack([i]), num >= 0], 201 | shape_invariants=[tf.TensorShape([]), 202 | tf.TensorShape([None, ]), 203 | tf.TensorShape([None, ])], 204 | back_prop=False) 205 | 206 | return keep[:-1] 207 | 208 | 209 | def nms_np(dets, thresh): 210 | """Pure Python NMS baseline.""" 211 | x1 = dets[:, 0] 212 | y1 = dets[:, 1] 213 | x2 = dets[:, 2] 214 | y2 = dets[:, 3] 215 | scores = dets[:, 4] 216 | 217 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 218 | order = scores.argsort()[::-1] 219 | 220 | keep = [] 221 | while order.size > 0: 222 | i = order[0] 223 | keep.append(i) 224 | xx1 = np.maximum(x1[i], x1[order[1:]]) 225 | yy1 = np.maximum(y1[i], y1[order[1:]]) 226 | xx2 = np.minimum(x2[i], x2[order[1:]]) 227 | yy2 = np.minimum(y2[i], y2[order[1:]]) 228 | 229 | w = np.maximum(0.0, xx2 - xx1 + 1) 230 | h = np.maximum(0.0, yy2 - yy1 + 1) 231 | inter = w * h 232 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 233 | 234 | inds = np.where(ovr <= thresh)[0] 235 | order = order[inds + 1] 236 | 237 | return keep 238 | 239 | 240 | def get_boxes(outs, im_shape, max_per_image=100, thresh=0.05, nmsth=0.3): 241 | classes = (outs.shape[1] - 4) // 5 - 1 242 | scores, boxes, rois = np.split(outs, [classes + 1, -4], axis=1) 243 | pred_boxes = inv_boxes_np(rois, boxes, im_shape) 244 | objs = [] 245 | total_boxes = 0 246 | for j in xrange(1, classes + 1): 247 | inds = np.where(scores[:, j] > thresh)[0] 248 | cls_scores = scores[inds, j] 249 | cls_boxes = pred_boxes[inds, j] 250 | cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) 251 | keep = nms_np(cls_dets, nmsth) 252 | cls_dets = cls_dets[keep, :] 253 | objs.append(cls_dets) 254 | total_boxes += cls_dets.shape[0] 255 | 256 | if max_per_image > 0 and total_boxes > max_per_image: 257 | image_scores = np.hstack([objs[j][:, -1] for j in xrange(classes)]) 258 | image_thresh = np.sort(image_scores)[-max_per_image] 259 | for j in xrange(classes): 260 | keep = np.where(objs[j][:, -1] >= image_thresh)[0] 261 | objs[j] = objs[j][keep, :] 262 | 263 | return objs 264 | -------------------------------------------------------------------------------- /tensornets/references/voc.names: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /tensornets/references/yolo_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | 4 | import os 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | from ..version_utils import tf_later_than 9 | 10 | try: 11 | from .darkflow_utils.get_boxes import yolov3_box 12 | from .darkflow_utils.get_boxes import yolov2_box 13 | except ImportError: 14 | yolov3_box = None 15 | yolov2_box = None 16 | 17 | try: 18 | xrange # Python 2 19 | except NameError: 20 | xrange = range # Python 3 21 | 22 | 23 | if tf_later_than('1.14'): 24 | tf = tf.compat.v1 25 | 26 | 27 | with open(os.path.join(os.path.dirname(__file__), 'coco.names'), 'r') as f: 28 | labels_coco = [line.rstrip() for line in f.readlines()] 29 | 30 | with open(os.path.join(os.path.dirname(__file__), 'voc.names'), 'r') as f: 31 | labels_voc = [line.rstrip() for line in f.readlines()] 32 | 33 | bases = dict() 34 | bases['yolov3'] = {'anchors': [10., 13., 16., 30., 33., 23., 30., 61., 35 | 62., 45., 59., 119., 116., 90., 156., 198., 36 | 373., 326.]} 37 | bases['yolov3coco'] = bases['yolov3'] 38 | bases['yolov3voc'] = bases['yolov3'] 39 | bases['yolov2'] = {'anchors': [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 40 | 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]} 41 | bases['yolov2voc'] = {'anchors': [1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 42 | 8.09892, 9.47112, 4.84053, 11.2364, 10.0071]} 43 | bases['tinyyolov2voc'] = {'anchors': [1.08, 1.19, 3.42, 4.41, 6.63, 44 | 11.38, 9.42, 5.11, 16.62, 10.52]} 45 | 46 | 47 | def opts(model_name): 48 | opt = bases[model_name].copy() 49 | opt.update({'num': len(opt['anchors']) // 2}) 50 | if 'voc' in model_name: 51 | opt.update({'classes': len(labels_voc), 'labels': labels_voc}) 52 | else: 53 | opt.update({'classes': len(labels_coco), 'labels': labels_coco}) 54 | return opt 55 | 56 | 57 | def parse_box(b, t, w, h): 58 | idx = np.argmax(b.probs) 59 | score = b.probs[idx] 60 | if score > t: 61 | try: 62 | x1 = int((b.x - b.w / 2) * w) 63 | y1 = int((b.y - b.h / 2) * h) 64 | x2 = int((b.x + b.w / 2) * w) 65 | y2 = int((b.y + b.h / 2) * h) 66 | if x1 < 0: 67 | x1 = 0 68 | if x2 > w - 1: 69 | x2 = w - 1 70 | if y1 < 0: 71 | y1 = 0 72 | if y2 > h - 1: 73 | y2 = h - 1 74 | return idx, (x1, y1, x2, y2, score) 75 | except: 76 | return None, None 77 | else: 78 | return None, None 79 | 80 | 81 | def get_v3_boxes(opts, outs, source_size, threshold=0.1): 82 | h, w = source_size 83 | boxes = [[] for _ in xrange(opts['classes'])] 84 | opts['thresh'] = threshold 85 | results = yolov3_box(opts, 86 | np.array(outs[0][0], dtype=np.float32), 87 | np.array(outs[1][0], dtype=np.float32), 88 | np.array(outs[2][0], dtype=np.float32)) 89 | for b in results: 90 | idx, box = parse_box(b, threshold, w, h) 91 | if idx is not None: 92 | boxes[idx].append(box) 93 | for i in xrange(opts['classes']): 94 | boxes[i] = np.asarray(boxes[i], dtype=np.float32) 95 | return boxes 96 | 97 | 98 | def get_v2_boxes(opts, outs, source_size, threshold=0.1): 99 | h, w = source_size 100 | boxes = [[] for _ in xrange(opts['classes'])] 101 | opts['thresh'] = threshold 102 | results = yolov2_box(opts, np.array(outs[0], dtype=np.float32)) 103 | for b in results: 104 | idx, box = parse_box(b, threshold, w, h) 105 | if idx is not None: 106 | boxes[idx].append(box) 107 | for i in xrange(opts['classes']): 108 | boxes[i] = np.asarray(boxes[i], dtype=np.float32) 109 | return boxes 110 | 111 | 112 | def v2_inputs(out_shape, anchors, classes, dtype): 113 | sizes = [None, np.prod(out_shape), anchors] 114 | return [tf.placeholder(dtype, sizes + [classes], name='probs'), 115 | tf.placeholder(dtype, sizes, name='confs'), 116 | tf.placeholder(dtype, sizes + [4], name='coord'), 117 | tf.placeholder(dtype, sizes + [classes], name='proid'), 118 | tf.placeholder(dtype, sizes, name='areas'), 119 | tf.placeholder(dtype, sizes + [2], name='upleft'), 120 | tf.placeholder(dtype, sizes + [2], name='botright')] 121 | 122 | 123 | def v2_loss(outs, anchorcoords, classes): 124 | # Refer to the following darkflow loss 125 | # https://github.com/thtrieu/darkflow/blob/master/darkflow/net/yolov2/train.py 126 | sprob = 1. 127 | sconf = 5. 128 | snoob = 1. 129 | scoor = 1. 130 | H = int(outs.shape[1]) if tf_later_than('2') else outs.shape[1].value 131 | W = int(outs.shape[2]) if tf_later_than('2') else outs.shape[2].value 132 | cells = H * W 133 | sizes = np.array([[[[W, H]]]], dtype=np.float32) 134 | anchors = len(anchorcoords) // 2 135 | anchorcoords = np.reshape(anchorcoords, [1, 1, anchors, 2]) 136 | _, _probs, _confs, _coord, _proid, _areas, _ul, _br = outs.inputs[:8] 137 | 138 | # Extract the coordinate prediction from net.out 139 | outs = tf.reshape(outs, [-1, H, W, anchors, (5 + classes)]) 140 | coords = tf.reshape(outs[:, :, :, :, :4], [-1, cells, anchors, 4]) 141 | adj_xy = 1. / (1. + tf.exp(-coords[:, :, :, 0:2])) 142 | adj_wh = tf.sqrt(tf.exp(coords[:, :, :, 2:4]) * anchorcoords / sizes) 143 | adj_c = 1. / (1. + tf.exp(-outs[:, :, :, :, 4])) 144 | adj_c = tf.reshape(adj_c, [-1, cells, anchors, 1]) 145 | adj_prob = tf.reshape(tf.nn.softmax(outs[:, :, :, :, 5:]), 146 | [-1, cells, anchors, classes]) 147 | adj_outs = tf.concat([adj_xy, adj_wh, adj_c, adj_prob], 3) 148 | 149 | coords = tf.concat([adj_xy, adj_wh], 3) 150 | wh = tf.pow(coords[:, :, :, 2:4], 2) * sizes 151 | area_pred = wh[:, :, :, 0] * wh[:, :, :, 1] 152 | centers = coords[:, :, :, 0:2] 153 | floor = centers - (wh * .5) 154 | ceil = centers + (wh * .5) 155 | 156 | # calculate the intersection areas 157 | intersect_upleft = tf.maximum(floor, _ul) 158 | intersect_botright = tf.minimum(ceil, _br) 159 | intersect_wh = intersect_botright - intersect_upleft 160 | intersect_wh = tf.maximum(intersect_wh, 0.0) 161 | intersect = tf.multiply(intersect_wh[:, :, :, 0], intersect_wh[:, :, :, 1]) 162 | 163 | # calculate the best IOU, set 0.0 confidence for worse boxes 164 | iou = tf.truediv(intersect, _areas + area_pred - intersect) 165 | best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) 166 | best_box = tf.to_float(best_box) 167 | confs = tf.multiply(best_box, _confs) 168 | 169 | # take care of the weight terms 170 | conid = snoob * (1. - confs) + sconf * confs 171 | weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) 172 | cooid = scoor * weight_coo 173 | weight_pro = tf.concat(classes * [tf.expand_dims(confs, -1)], 3) 174 | proid = sprob * weight_pro 175 | 176 | true = tf.concat([_coord, tf.expand_dims(confs, 3), _probs], 3) 177 | wght = tf.concat([cooid, tf.expand_dims(conid, 3), proid], 3) 178 | 179 | loss = tf.pow(adj_outs - true, 2) 180 | loss = tf.multiply(loss, wght) 181 | loss = tf.reshape(loss, [-1, cells * anchors * (5 + classes)]) 182 | loss = tf.reduce_sum(loss, 1) 183 | return .5 * tf.reduce_mean(loss) + tf.losses.get_regularization_loss() 184 | -------------------------------------------------------------------------------- /tensornets/references/yolos.py: -------------------------------------------------------------------------------- 1 | """Collection of YOLO variants 2 | 3 | The reference papers: 4 | 5 | 1. YOLO9000 6 | - YOLO9000: Better, Faster, Stronger, CVPR 2017 (Best Paper Honorable Mention) 7 | - Joseph Redmon, Ali Farhadi 8 | - https://arxiv.org/abs/1612.08242 9 | 2. YOLOv3 10 | - YOLOv3: An Incremental Improvement 11 | - Joseph Redmon, Ali Farhadi 12 | - https://pjreddie.com/media/files/papers/YOLOv3.pdf 13 | 14 | The reference implementations: 15 | 16 | 1. Darknet 17 | - https://pjreddie.com/darknet/yolo/ 18 | 2. darkflow 19 | - https://github.com/thtrieu/darkflow 20 | """ 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | 24 | import tensorflow as tf 25 | 26 | from ..layers import batch_norm 27 | from ..layers import bias_add 28 | from ..layers import conv2d 29 | from ..layers import darkconv as conv 30 | from ..layers import max_pool2d 31 | 32 | from ..ops import * 33 | from ..utils import pad_info 34 | from ..utils import set_args 35 | from ..utils import var_scope 36 | 37 | from .yolo_utils import opts 38 | from .yolo_utils import get_v3_boxes 39 | from .yolo_utils import get_v2_boxes 40 | from .yolo_utils import v2_inputs 41 | from .yolo_utils import v2_loss 42 | 43 | 44 | def __args__(is_training): 45 | return [([batch_norm], {'is_training': is_training}), 46 | ([bias_add, conv2d], {}), 47 | ([max_pool2d], {'padding': 'SAME'})] 48 | 49 | 50 | @var_scope('stack') 51 | def _stack(x, filters, blocks, scope=None): 52 | for i in range(1, blocks+1): 53 | if i % 2 > 0: 54 | x = conv(x, filters, 3, scope=str(i)) 55 | else: 56 | x = conv(x, filters // 2, 1, scope=str(i)) 57 | return x 58 | 59 | 60 | @var_scope('stackv3') 61 | def stackv3(x, filters, blocks, kernel_size=3, 62 | conv_shortcut=True, scope=None): 63 | for i in range(1, blocks+1): 64 | shortcut = x 65 | p = conv(x, filters // 2, 1, scope="%d/1" % i) 66 | x = conv(p, filters, kernel_size, scope="%d/2" % i) 67 | if conv_shortcut is True: 68 | x = add(shortcut, x, name="%d/out" % i) 69 | if conv_shortcut is True: 70 | return x 71 | else: 72 | return x, p 73 | 74 | 75 | @var_scope('down') 76 | def down(x, filters, kernel_size=3, scope=None): 77 | x = pad(x, pad_info(kernel_size), name='pad') 78 | x = conv(x, filters, kernel_size, stride=2, 79 | padding='VALID', scope='conv') 80 | return x 81 | 82 | 83 | @var_scope('up') 84 | def up(x, filters, kernel_size=2, scope=None): 85 | x = conv(x, filters, 1, scope='conv') 86 | x = upsample(x, kernel_size, name='upsample') 87 | return x 88 | 89 | 90 | def yolov3(x, blocks, is_training, classes, scope=None, reuse=None): 91 | x = conv(x, 32, 3, scope='conv1') 92 | x = down(x, 64, scope='down1') 93 | x = stackv3(x, 64, blocks[0], scope='conv2') 94 | x = down(x, 128, scope='down2') 95 | x = stackv3(x, 128, blocks[1], scope='conv3') 96 | x = down(x, 256, scope='down3') 97 | x = p0 = stackv3(x, 256, blocks[2], scope='conv4') 98 | x = down(x, 512, scope='down4') 99 | x = p1 = stackv3(x, 512, blocks[3], scope='conv5') 100 | x = down(x, 1024, scope='down5') 101 | x = stackv3(x, 1024, blocks[4], scope='conv6') 102 | 103 | x, p = stackv3(x, 1024, blocks[5], conv_shortcut=False, scope='conv7') 104 | out0 = conv(x, (classes + 5) * 3, 1, onlyconv=True, scope='linear7') 105 | p = up(p, 256, 2, scope='up7') 106 | x = concat([p, p1], axis=3, name='concat7') 107 | 108 | x, p = stackv3(x, 512, blocks[5], conv_shortcut=False, scope='conv8') 109 | out1 = conv(x, (classes + 5) * 3, 1, onlyconv=True, scope='linear8') 110 | p = up(p, 128, 2, scope='up8') 111 | x = concat([p, p0], axis=3, name='concat8') 112 | 113 | x, _ = stackv3(x, 256, blocks[5], conv_shortcut=False, scope='conv9') 114 | out2 = conv(x, (classes + 5) * 3, 1, onlyconv=True, scope='linear9') 115 | out2.aliases = [] 116 | out2.preds = [out0, out1, out2] 117 | return out2 118 | 119 | 120 | def yolo(x, blocks, is_training, classes, scope=None, reuse=None): 121 | x = _stack(x, 32, blocks[0], scope='conv1') 122 | x = max_pool2d(x, 2, stride=2, scope='pool1') 123 | x = _stack(x, 64, blocks[1], scope='conv2') 124 | x = max_pool2d(x, 2, stride=2, scope='pool2') 125 | x = _stack(x, 128, blocks[2], scope='conv3') 126 | x = max_pool2d(x, 2, stride=2, scope='pool3') 127 | x = _stack(x, 256, blocks[3], scope='conv4') 128 | x = max_pool2d(x, 2, stride=2, scope='pool4') 129 | x = p = _stack(x, 512, blocks[4], scope='conv5') 130 | x = max_pool2d(x, 2, stride=2, scope='pool5') 131 | x = _stack(x, 1024, blocks[5], scope='conv6') 132 | 133 | x = conv(x, 1024, 3, scope='conv7') 134 | x = conv(x, 1024, 3, scope='conv8') 135 | 136 | p = conv(p, 64, 1, scope='conv5a') 137 | p = local_flatten(p, 2, name='flat5a') 138 | 139 | x = concat([p, x], axis=3, name='concat') 140 | x = conv(x, 1024, 3, scope='conv9') 141 | x = conv(x, (classes + 5) * 5, 1, onlyconv=True, scope='linear') 142 | x.aliases = [] 143 | return x 144 | 145 | 146 | def tinyyolo(x, is_training, classes, scope=None, reuse=None): 147 | x = conv(x, 16, 3, scope='conv1') 148 | x = max_pool2d(x, 2, stride=2, scope='pool1') 149 | x = conv(x, 32, 3, scope='conv2') 150 | x = max_pool2d(x, 2, stride=2, scope='pool2') 151 | x = conv(x, 64, 3, scope='conv3') 152 | x = max_pool2d(x, 2, stride=2, scope='pool3') 153 | x = conv(x, 128, 3, scope='conv4') 154 | x = max_pool2d(x, 2, stride=2, scope='pool4') 155 | x = conv(x, 256, 3, scope='conv5') 156 | x = max_pool2d(x, 2, stride=2, scope='pool5') 157 | x = conv(x, 512, 3, scope='conv6') 158 | 159 | x = max_pool2d(x, 2, stride=1, scope='pool6') 160 | x = conv(x, 1024, 3, scope='conv7') 161 | x = conv(x, 1024 if classes == 20 else 512, 3, scope='conv8') 162 | x = conv(x, (classes + 5) * 5, 1, onlyconv=True, scope='linear') 163 | x.aliases = [] 164 | return x 165 | 166 | 167 | @var_scope('REFyolov3coco') 168 | @set_args(__args__) 169 | def yolov3coco(x, is_training=False, classes=80, scope=None, reuse=None): 170 | def _get_boxes(*args, **kwargs): 171 | return get_v3_boxes(opts('yolov3'), *args, **kwargs) 172 | x = yolov3(x, [1, 2, 8, 8, 4, 3], is_training, classes, scope, reuse) 173 | x.get_boxes = _get_boxes 174 | return x 175 | 176 | 177 | @var_scope('REFyolov3voc') 178 | @set_args(__args__) 179 | def yolov3voc(x, is_training=False, classes=20, scope=None, reuse=None): 180 | def _get_boxes(*args, **kwargs): 181 | return get_v3_boxes(opts('yolov3voc'), *args, **kwargs) 182 | x = yolov3(x, [1, 2, 8, 8, 4, 3], is_training, classes, scope, reuse) 183 | x.get_boxes = _get_boxes 184 | return x 185 | 186 | 187 | @var_scope('REFyolov2coco') 188 | @set_args(__args__) 189 | def yolov2coco(x, is_training=False, classes=80, scope=None, reuse=None): 190 | inputs = x 191 | opt = opts('yolov2') 192 | x = yolo(x, [1, 1, 3, 3, 5, 5], is_training, classes, scope, reuse) 193 | 194 | def _get_boxes(*args, **kwargs): 195 | return get_v2_boxes(opt, *args, **kwargs) 196 | x.get_boxes = _get_boxes 197 | x.inputs = [inputs] 198 | x.inputs += v2_inputs(x.shape[1:3], opt['num'], classes, x.dtype) 199 | if isinstance(is_training, tf.Tensor): 200 | x.inputs.append(is_training) 201 | x.loss = v2_loss(x, opt['anchors'], classes) 202 | return x 203 | 204 | 205 | @var_scope('REFyolov2voc') 206 | @set_args(__args__) 207 | def yolov2voc(x, is_training=False, classes=20, scope=None, reuse=None): 208 | inputs = x 209 | opt = opts('yolov2voc') 210 | x = yolo(x, [1, 1, 3, 3, 5, 5], is_training, classes, scope, reuse) 211 | 212 | def _get_boxes(*args, **kwargs): 213 | return get_v2_boxes(opt, *args, **kwargs) 214 | x.get_boxes = _get_boxes 215 | x.inputs = [inputs] 216 | x.inputs += v2_inputs(x.shape[1:3], opt['num'], classes, x.dtype) 217 | if isinstance(is_training, tf.Tensor): 218 | x.inputs.append(is_training) 219 | x.loss = v2_loss(x, opt['anchors'], classes) 220 | return x 221 | 222 | 223 | @var_scope('REFtinyyolov2coco') 224 | @set_args(__args__) 225 | def tinyyolov2coco(x, is_training=False, classes=80, scope=None, reuse=None): 226 | inputs = x 227 | opt = opts('tinyyolov2') 228 | x = tinyyolo(x, is_training, classes, scope, reuse) 229 | 230 | def _get_boxes(*args, **kwargs): 231 | return get_v2_boxes(opt, *args, **kwargs) 232 | x.get_boxes = _get_boxes 233 | x.inputs = [inputs] 234 | x.inputs += v2_inputs(x.shape[1:3], opt['num'], classes, x.dtype) 235 | if isinstance(is_training, tf.Tensor): 236 | x.inputs.append(is_training) 237 | x.loss = v2_loss(x, opt['anchors'], classes) 238 | return x 239 | 240 | 241 | @var_scope('REFtinyyolov2voc') 242 | @set_args(__args__) 243 | def tinyyolov2voc(x, is_training=False, classes=20, scope=None, reuse=None): 244 | inputs = x 245 | opt = opts('tinyyolov2voc') 246 | x = tinyyolo(x, is_training, classes, scope, reuse) 247 | 248 | def _get_boxes(*args, **kwargs): 249 | return get_v2_boxes(opt, *args, **kwargs) 250 | x.get_boxes = _get_boxes 251 | x.inputs = [inputs] 252 | x.inputs += v2_inputs(x.shape[1:3], opt['num'], classes, x.dtype) 253 | if isinstance(is_training, tf.Tensor): 254 | x.inputs.append(is_training) 255 | x.loss = v2_loss(x, opt['anchors'], classes) 256 | return x 257 | 258 | 259 | # Simple alias. 260 | YOLOv3COCO = yolov3coco 261 | YOLOv3VOC = yolov3voc 262 | YOLOv2COCO = yolov2coco 263 | YOLOv2VOC = yolov2voc 264 | TinyYOLOv2COCO = tinyyolov2coco 265 | TinyYOLOv2VOC = tinyyolov2voc 266 | -------------------------------------------------------------------------------- /tensornets/squeezenets.py: -------------------------------------------------------------------------------- 1 | """Collection of SqueezeNet variants 2 | 3 | The reference paper: 4 | 5 | - SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size, arXiv 2016 6 | - Forrest N. Iandola et al. 7 | - https://arxiv.org/abs/1602.07360 8 | 9 | The reference implementation: 10 | 11 | 1. Caffe SqueezeNets 12 | - https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1 13 | """ 14 | from __future__ import absolute_import 15 | 16 | import tensorflow as tf 17 | 18 | from .layers import conv2d 19 | from .layers import dropout 20 | from .layers import fc 21 | from .layers import max_pool2d 22 | from .layers import convrelu as conv 23 | 24 | from .ops import * 25 | from .utils import set_args 26 | from .utils import var_scope 27 | 28 | 29 | def __args__(is_training): 30 | return [([conv2d], {'padding': 'SAME', 'activation_fn': None, 31 | 'scope': 'conv'}), 32 | ([dropout], {'is_training': is_training, 'scope': 'dropout'}), 33 | ([fc], {'activation_fn': None, 'scope': 'fc'}), 34 | ([max_pool2d], {'scope': 'pool'})] 35 | 36 | 37 | @var_scope('fire') 38 | def fire(x, squeeze, expand, scope=None): 39 | x = conv(x, squeeze, 1, scope='squeeze1x1') 40 | x1 = conv(x, expand, 1, scope='expand1x1') 41 | x2 = conv(x, expand, 3, scope='expand3x3') 42 | x = concat([x1, x2], axis=3, name='concat') 43 | return x 44 | 45 | 46 | @var_scope('squeezenet') 47 | @set_args(__args__) 48 | def squeezenet(x, is_training=False, classes=1000, 49 | stem=False, scope=None, reuse=None): 50 | x = conv(x, 64, 3, stride=2, padding='VALID', scope='conv1') 51 | x = max_pool2d(x, 3, stride=2, scope='pool1') 52 | 53 | x = fire(x, 16, 64, scope='fire2') 54 | x = fire(x, 16, 64, scope='fire3') 55 | x = max_pool2d(x, 3, stride=2, scope='pool3') 56 | 57 | x = fire(x, 32, 128, scope='fire4') 58 | x = fire(x, 32, 128, scope='fire5') 59 | x = max_pool2d(x, 3, stride=2, scope='pool5') 60 | 61 | x = fire(x, 48, 192, scope='fire6') 62 | x = fire(x, 48, 192, scope='fire7') 63 | x = fire(x, 64, 256, scope='fire8') 64 | x = fire(x, 64, 256, scope='fire9') 65 | if stem: return x 66 | x = dropout(x, keep_prob=0.5, scope='drop9') 67 | 68 | x = reduce_mean(x, [1, 2], name='pool10') 69 | x = fc(x, classes, scope='logits') # the original name is `conv10` 70 | x = softmax(x, name='probs') 71 | return x 72 | 73 | 74 | # Simple alias. 75 | SqueezeNet = squeezenet 76 | -------------------------------------------------------------------------------- /tensornets/version_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from distutils.version import LooseVersion 4 | 5 | 6 | def tf_later_than(v): 7 | return LooseVersion(tf.__version__) > LooseVersion(v) 8 | 9 | 10 | def tf_equal_to(v): 11 | return tf.__version__ == v 12 | -------------------------------------------------------------------------------- /tensornets/vggs.py: -------------------------------------------------------------------------------- 1 | """Collection of VGG variants 2 | 3 | The reference paper: 4 | 5 | - Very Deep Convolutional Networks for Large-Scale Image Recognition, ICLR 2015 6 | - Karen Simonyan, Andrew Zisserman 7 | - https://arxiv.org/abs/1409.1556 8 | 9 | The reference implementation: 10 | 11 | 1. Keras 12 | - https://github.com/keras-team/keras/blob/master/keras/applications/vgg{16,19}.py 13 | 2. Caffe VGG 14 | - http://www.robots.ox.ac.uk/~vgg/research/very_deep/ 15 | """ 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | 19 | import tensorflow as tf 20 | 21 | from .layers import conv2d 22 | from .layers import dropout 23 | from .layers import flatten 24 | from .layers import fc 25 | from .layers import max_pool2d 26 | from .layers import convrelu as conv 27 | 28 | from .ops import * 29 | from .utils import set_args 30 | from .utils import var_scope 31 | 32 | 33 | def __args__(is_training): 34 | return [([conv2d], {'padding': 'SAME', 'activation_fn': None, 35 | 'scope': 'conv'}), 36 | ([dropout], {'is_training': is_training}), 37 | ([flatten], {'scope': 'flatten'}), 38 | ([fc], {'activation_fn': None, 'scope': 'fc'}), 39 | ([max_pool2d], {'scope': 'pool'})] 40 | 41 | 42 | @var_scope('stack') 43 | def _stack(x, filters, blocks, scope=None): 44 | for i in range(1, blocks+1): 45 | x = conv(x, filters, 3, scope=str(i)) 46 | x = max_pool2d(x, 2, stride=2) 47 | return x 48 | 49 | 50 | def vgg(x, blocks, is_training, classes, stem, scope=None, reuse=None): 51 | x = _stack(x, 64, blocks[0], scope='conv1') 52 | x = _stack(x, 128, blocks[1], scope='conv2') 53 | x = _stack(x, 256, blocks[2], scope='conv3') 54 | x = _stack(x, 512, blocks[3], scope='conv4') 55 | x = _stack(x, 512, blocks[4], scope='conv5') 56 | if stem: return x 57 | x = flatten(x) 58 | x = fc(x, 4096, scope='fc6') 59 | x = relu(x, name='relu6') 60 | x = dropout(x, keep_prob=0.5, scope='drop6') 61 | x = fc(x, 4096, scope='fc7') 62 | x = relu(x, name='relu7') 63 | x = dropout(x, keep_prob=0.5, scope='drop7') 64 | x = fc(x, classes, scope='logits') 65 | x = softmax(x, name='probs') 66 | return x 67 | 68 | 69 | @var_scope('vgg16') 70 | @set_args(__args__) 71 | def vgg16(x, is_training=False, classes=1000, 72 | stem=False, scope=None, reuse=None): 73 | return vgg(x, [2, 2, 3, 3, 3], is_training, classes, stem, scope, reuse) 74 | 75 | 76 | @var_scope('vgg19') 77 | @set_args(__args__) 78 | def vgg19(x, is_training=False, classes=1000, 79 | stem=False, scope=None, reuse=None): 80 | return vgg(x, [2, 2, 4, 4, 4], is_training, classes, stem, scope, reuse) 81 | 82 | 83 | # Simple alias. 84 | VGG16 = vgg16 85 | VGG19 = vgg19 86 | -------------------------------------------------------------------------------- /tensornets/wavenets.py: -------------------------------------------------------------------------------- 1 | """Collection of WaveNet variants 2 | 3 | The reference paper: 4 | 5 | - WaveNet: A Generative Model for Raw Audio, arXiv 2016 6 | - Aaron van den Oord et al. 7 | - https://arxiv.org/abs/1609.03499 8 | 9 | The reference implementations: 10 | 11 | 1. (initially and mainly) @ibab's repository 12 | - https://github.com/ibab/tensorflow-wavenet/blob/master/wavenet/model.py 13 | 2. (to improve readability) @basveeling's repository 14 | - https://github.com/basveeling/wavenet/blob/master/wavenet.py 15 | """ 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | 19 | import tensorflow as tf 20 | 21 | from .layers import conv1d 22 | 23 | from .ops import * 24 | from .utils import pad_info 25 | from .utils import set_args 26 | from .utils import var_scope 27 | 28 | 29 | def __args__(is_training): 30 | return [([conv1d], {'padding': 'VALID', 'activation_fn': None})] 31 | 32 | 33 | @var_scope('block') 34 | def block(x, filters, skipfilters, dilation, scope=None): 35 | x = tf.pad(x, [[0, 0], [dilation, 0], [0, 0]]) 36 | f = conv1d(x, filters, 2, rate=dilation, scope='filter') 37 | g = conv1d(x, filters, 2, rate=dilation, scope='gate') 38 | o = tanh(f, name='filter/tanh') * sigmoid(g, name='gate/sigmoid') 39 | d = conv1d(o, filters, 1, scope='dense') 40 | s = conv1d(o, skipfilters, 1, scope='skip') 41 | return x[:, dilation:] + d, s 42 | 43 | 44 | @var_scope('wavenet') 45 | @set_args(__args__) 46 | def wavenet(x, filters=32, skipfilters=512, 47 | quantization=256, blocks=10, repeats=5, 48 | is_training=False, scope=None, reuse=None): 49 | x = one_hot(x, quantization, name='one_hot') 50 | x = tf.pad(x, [[0, 0], [1, 0], [0, 0]]) 51 | x = conv1d(x, filters, 2, biases_initializer=None, scope='embedding') 52 | 53 | skips = [] 54 | for i in range(blocks * repeats): 55 | x, s = block(x, filters, skipfilters, 2 ** (i % blocks), scope=str(i)) 56 | skips.append(s) 57 | 58 | x = relu(sum(skips), name='skips') 59 | x = conv1d(x, skipfilters, 1, scope='fc') 60 | x = relu(x, name='fc/relu') 61 | x = conv1d(x, quantization, 1, scope='logits') 62 | x = softmax(x, name='probs') 63 | return x 64 | 65 | 66 | # Simple alias. 67 | WaveNet = wavenet 68 | -------------------------------------------------------------------------------- /tensornets/zf.py: -------------------------------------------------------------------------------- 1 | """ZF net embedded in Faster RCNN 2 | 3 | The reference paper: 4 | 5 | - Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks, NIPS 2015 6 | - Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun 7 | - https://arxiv.org/abs/1506.01497 8 | 9 | The reference implementation: 10 | 11 | 1. Caffe and Python utils 12 | - https://github.com/rbgirshick/py-faster-rcnn 13 | """ 14 | from __future__ import absolute_import 15 | from __future__ import division 16 | 17 | import tensorflow as tf 18 | 19 | from .layers import conv2d 20 | from .layers import fc 21 | from .layers import max_pool2d 22 | from .layers import convrelu as conv 23 | 24 | from .ops import * 25 | from .utils import pad_info 26 | from .utils import set_args 27 | from .utils import var_scope 28 | 29 | 30 | def __args__(is_training): 31 | return [([conv2d], {'padding': 'SAME', 'activation_fn': None, 32 | 'scope': 'conv'}), 33 | ([fc], {'activation_fn': None, 'scope': 'fc'}), 34 | ([max_pool2d], {'scope': 'pool'})] 35 | 36 | 37 | @var_scope('zf') 38 | @set_args(__args__) 39 | def zf(x, is_training=False, classes=1000, stem=False, scope=None, reuse=None): 40 | x = pad(x, pad_info(7), name='pad1') 41 | x = conv(x, 96, 7, stride=2, padding='VALID', scope='conv1') 42 | x = srn(x, depth_radius=3, alpha=0.00005, beta=0.75, name='srn1') 43 | x = pad(x, pad_info(3, symmetry=False), name='pad2') 44 | x = max_pool2d(x, 3, stride=2, padding='VALID', scope='pool1') 45 | 46 | x = pad(x, pad_info(5), name='pad3') 47 | x = conv(x, 256, 5, stride=2, padding='VALID', scope='conv2') 48 | x = srn(x, depth_radius=3, alpha=0.00005, beta=0.75, name='srn2') 49 | x = pad(x, pad_info(3, symmetry=False), name='pad4') 50 | x = max_pool2d(x, 3, stride=2, padding='VALID', scope='pool2') 51 | 52 | x = conv(x, 384, 3, scope='conv3') 53 | x = conv(x, 384, 3, scope='conv4') 54 | x = conv(x, 256, 3, scope='conv5') 55 | if stem: return x 56 | 57 | x = reduce_mean(x, [1, 2], name='avgpool') 58 | x = fc(x, classes, scope='logits') 59 | x = softmax(x, name='probs') 60 | return x 61 | 62 | 63 | # Simple alias. 64 | ZF = zf 65 | -------------------------------------------------------------------------------- /tests/all_imagenet_models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import tensornets as nets 4 | 5 | from tensornets.datasets import imagenet 6 | data_dir = '/home/taehoonlee/Data/imagenet/inputs' 7 | 8 | 9 | def imagenet_load(data_dir, resize_wh, crop_wh, crops): 10 | return imagenet.load( 11 | data_dir, 'val', batch_size=10 if crops == 10 else 100, 12 | resize_wh=resize_wh, 13 | crop_locs=10 if crops == 10 else 4, 14 | crop_wh=crop_wh) 15 | 16 | 17 | def test(models_list, crops=1, verbose=False): 18 | batches1 = imagenet_load(data_dir, 256, 224, crops) 19 | batches2 = imagenet_load(data_dir, 341, 299, crops) 20 | batches3 = imagenet_load(data_dir, 378, 331, crops) 21 | inputs, models, shapes, params = [], [], [], [] 22 | labels, preds_list = [], [] 23 | if verbose: 24 | print("") 25 | 26 | with tf.Graph().as_default(): 27 | for (_net, _shape, _gpu) in models_list: 28 | with tf.device("gpu:%d" % _gpu): 29 | _input = tf.placeholder(tf.float32, [None] + list(_shape)) 30 | _model = _net(_input, is_training=False) 31 | _weights = _model.get_weights() 32 | inputs.append(_input) 33 | models.append(_model) 34 | shapes.append(_shape) 35 | params.append(sum([w.shape.num_elements() for w in _weights])) 36 | 37 | with tf.Session() as sess: 38 | nets.pretrained(models) 39 | while True: 40 | try: 41 | batch1, label1 = batches1.next() 42 | batch2, label2 = batches2.next() 43 | batch3, label3 = batches3.next() 44 | except: 45 | break 46 | feed_dict = dict((i, m.preprocess(batch1 if s[0] == 224 else 47 | batch2 if s[0] == 299 else 48 | batch3)) 49 | for (i, m, s) in zip(inputs, models, shapes)) 50 | preds = sess.run(models, feed_dict) 51 | if crops > 1: 52 | preds = [np.mean(pred.reshape(-1, crops, 1000), axis=1) 53 | for pred in preds] 54 | labels.append(label1) 55 | preds_list.append(preds) 56 | if verbose: 57 | print('.'), 58 | labels = np.concatenate(labels) 59 | 60 | if verbose: 61 | print("") 62 | 63 | def err(x): 64 | return 100 * (1 - sum(x) / float(len(x))) 65 | 66 | print("Crops: %d" % crops) 67 | print("Samples: %d" % len(labels)) 68 | print("| | Top-1 | Top-5 | Top-1 | Top-5 | Size |") 69 | print("|------------------|-------|-------|--------|--------|-------|") 70 | 71 | for i in range(len(models)): 72 | preds = np.concatenate([np.argsort(pred[i], axis=1)[:, -5:] 73 | for pred in preds_list], axis=0) 74 | actuals = labels[:preds.shape[0]] 75 | top1 = (actuals == preds[:, -1]) 76 | top5 = [1 if actual in pred else 0 77 | for (actual, pred) in zip(actuals, preds)] 78 | print("| %16s | %5d | %5d | %2.3f | %2.3f | %.1fM |" % 79 | (models[i].aliases[0][:16], 80 | sum(top1), sum(top5), 81 | err(top1), err(top5), 82 | params[i] / 10e5)) 83 | 84 | 85 | test([(nets.ResNet50, (224, 224, 3), 0), 86 | (nets.ResNet101, (224, 224, 3), 0), 87 | (nets.ResNet152, (224, 224, 3), 0), 88 | (nets.ResNeXt50, (224, 224, 3), 0), 89 | (nets.ResNeXt101, (224, 224, 3), 1), 90 | (nets.ResNeXt101c64, (224, 224, 3), 1), 91 | (nets.WideResNet50, (224, 224, 3), 1)]) 92 | 93 | test([(nets.ResNet50v2, (299, 299, 3), 0), 94 | (nets.ResNet101v2, (299, 299, 3), 1), 95 | (nets.ResNet152v2, (299, 299, 3), 1), 96 | (nets.ResNet200v2, (224, 224, 3), 0)]) 97 | 98 | test([(nets.Inception1, (224, 224, 3), 0), 99 | (nets.Inception2, (224, 224, 3), 1), 100 | (nets.Inception3, (299, 299, 3), 0), 101 | (nets.Inception4, (299, 299, 3), 0), 102 | (nets.InceptionResNet2, (299, 299, 3), 1)]) 103 | 104 | test([(nets.NASNetAlarge, (331, 331, 3), 0)]) 105 | 106 | test([(nets.NASNetAmobile, (224, 224, 3), 0), 107 | (nets.VGG16, (224, 224, 3), 0), 108 | (nets.VGG19, (224, 224, 3), 1), 109 | (nets.SqueezeNet, (224, 224, 3), 1)]) 110 | 111 | test([(nets.DenseNet121, (224, 224, 3), 0), 112 | (nets.DenseNet169, (224, 224, 3), 0), 113 | (nets.DenseNet201, (224, 224, 3), 1), 114 | (nets.MobileNet25, (224, 224, 3), 0), 115 | (nets.MobileNet50, (224, 224, 3), 1), 116 | (nets.MobileNet75, (224, 224, 3), 1), 117 | (nets.MobileNet100, (224, 224, 3), 0)]) 118 | 119 | 120 | test([(nets.ResNet50, (224, 224, 3), 0), 121 | (nets.ResNet101, (224, 224, 3), 0), 122 | (nets.ResNet152, (224, 224, 3), 0), 123 | (nets.ResNeXt50, (224, 224, 3), 0), 124 | (nets.ResNeXt101, (224, 224, 3), 1), 125 | (nets.ResNeXt101c64, (224, 224, 3), 1), 126 | (nets.WideResNet50, (224, 224, 3), 1)], 10) 127 | 128 | test([(nets.ResNet50v2, (299, 299, 3), 0), 129 | (nets.ResNet101v2, (299, 299, 3), 1), 130 | (nets.ResNet152v2, (299, 299, 3), 1), 131 | (nets.ResNet200v2, (224, 224, 3), 0)], 10) 132 | 133 | test([(nets.Inception1, (224, 224, 3), 0), 134 | (nets.Inception2, (224, 224, 3), 1), 135 | (nets.Inception3, (299, 299, 3), 0), 136 | (nets.Inception4, (299, 299, 3), 0), 137 | (nets.InceptionResNet2, (299, 299, 3), 1)], 10) 138 | 139 | test([(nets.NASNetAlarge, (331, 331, 3), 0)], 10) 140 | 141 | test([(nets.NASNetAmobile, (224, 224, 3), 0), 142 | (nets.VGG16, (224, 224, 3), 0), 143 | (nets.VGG19, (224, 224, 3), 1), 144 | (nets.SqueezeNet, (224, 224, 3), 1)], 10) 145 | 146 | test([(nets.DenseNet121, (224, 224, 3), 0), 147 | (nets.DenseNet169, (224, 224, 3), 0), 148 | (nets.DenseNet201, (224, 224, 3), 1), 149 | (nets.MobileNet25, (224, 224, 3), 0), 150 | (nets.MobileNet50, (224, 224, 3), 1), 151 | (nets.MobileNet75, (224, 224, 3), 1), 152 | (nets.MobileNet100, (224, 224, 3), 0)], 10) 153 | -------------------------------------------------------------------------------- /tests/basics_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import tensornets as nets 6 | import os 7 | import pytest 8 | import random 9 | 10 | from tensornets.middles import direct as middle_names 11 | 12 | from distutils.version import LooseVersion 13 | 14 | 15 | pytestmark = pytest.mark.skipif( 16 | os.environ.get('CORE_CHANGED', 'True') == 'False', 17 | reason='Runs only when the relevant files have been modified.') 18 | 19 | 20 | if LooseVersion(tf.__version__) > LooseVersion('1.14'): 21 | tf = tf.compat.v1 22 | 23 | 24 | @pytest.mark.parametrize('net,shape,weights,outputs,middles', [ 25 | random.choice([ 26 | (nets.ResNet50, (224, 224, 3), 320, 161, 16), 27 | (nets.ResNet101, (224, 224, 3), 626, 314, 33), 28 | (nets.ResNet152, (224, 224, 3), 932, 467, 50), 29 | ]), 30 | random.choice([ 31 | (nets.ResNet50v2, (299, 299, 3), 272, 192, 16), 32 | (nets.ResNet101v2, (299, 299, 3), 544, 379, 33), 33 | (nets.ResNet152v2, (299, 299, 3), 816, 566, 50), 34 | ]), 35 | (nets.ResNet200v2, (224, 224, 3), 1224, 745, 66), 36 | random.choice([ 37 | (nets.ResNeXt50, (224, 224, 3), 267, 193, 16), 38 | (nets.ResNeXt101, (224, 224, 3), 522, 380, 33), 39 | # (nets.ResNeXt101c64, (224, 224, 3), 522, 380, 33), # too heavy on Travis 40 | ]), 41 | (nets.WideResNet50, (224, 224, 3), 267, 177, 16), 42 | (nets.Inception1, (224, 224, 3), 116, 143, 11), 43 | (nets.Inception2, (224, 224, 3), 277, 231, 10), 44 | (nets.Inception3, (299, 299, 3), 378, 313, 11), 45 | (nets.Inception4, (299, 299, 3), 598, 494, 17), 46 | (nets.InceptionResNet2, (299, 299, 3), 898, 744, 43), 47 | pytest.param( 48 | nets.NASNetAlarge, (331, 331, 3), 1558, 1029, 20, 49 | marks=pytest.mark.xfail( 50 | LooseVersion(tf.__version__) < LooseVersion('1.3.0'), 51 | reason='NASNetAlarge requires TensorFlow >= 1.3.0')), 52 | pytest.param( 53 | nets.NASNetAmobile, (224, 224, 3), 1138, 759, 14, 54 | marks=pytest.mark.xfail( 55 | LooseVersion(tf.__version__) < LooseVersion('1.3.0'), 56 | reason='NASNetAmobile requires TensorFlow >= 1.3.0')), 57 | pytest.param( 58 | nets.PNASNetlarge, (331, 331, 3), 1179, 752, 12, 59 | marks=pytest.mark.xfail( 60 | LooseVersion(tf.__version__) < LooseVersion('1.3.0'), 61 | reason='PNASNetlarge requires TensorFlow >= 1.3.0')), 62 | pytest.param( 63 | *random.choice([ 64 | (nets.VGG16, (224, 224, 3), 32, 40, 9), 65 | (nets.VGG19, (224, 224, 3), 38, 46, 12), 66 | ]), 67 | marks=pytest.mark.skipif( 68 | LooseVersion(tf.__version__) == LooseVersion('1.2.0'), 69 | reason='Deployments of VGGs on local are OK. But there is ' 70 | 'something wrong in those tests on Travis with TF 1.2.0.')), 71 | random.choice([ 72 | (nets.DenseNet121, (224, 224, 3), 606, 429, 61), 73 | (nets.DenseNet169, (224, 224, 3), 846, 597, 85), 74 | (nets.DenseNet201, (224, 224, 3), 1006, 709, 101), 75 | ]), 76 | random.choice([ 77 | (nets.MobileNet25, (224, 224, 3), 137, 85, 11), 78 | (nets.MobileNet50, (224, 224, 3), 137, 85, 11), 79 | (nets.MobileNet75, (224, 224, 3), 137, 85, 11), 80 | (nets.MobileNet100, (224, 224, 3), 137, 85, 11), 81 | ]), 82 | random.choice([ 83 | (nets.MobileNet35v2, (224, 224, 3), 262, 152, 62), 84 | (nets.MobileNet50v2, (224, 224, 3), 262, 152, 62), 85 | (nets.MobileNet75v2, (224, 224, 3), 262, 152, 62), 86 | (nets.MobileNet100v2, (224, 224, 3), 262, 152, 62), 87 | (nets.MobileNet130v2, (224, 224, 3), 262, 152, 62), 88 | (nets.MobileNet140v2, (224, 224, 3), 262, 152, 62), 89 | ]), 90 | random.choice([ 91 | (nets.MobileNet75v3, (224, 224, 3), 266, 187, 19), 92 | (nets.MobileNet75v3small, (224, 224, 3), 210, 157, 15), 93 | (nets.MobileNet100v3, (224, 224, 3), 266, 187, 19), 94 | (nets.MobileNet100v3small, (224, 224, 3), 210, 157, 15), 95 | (nets.MobileNet100v3largemini, (224, 224, 3), 234, 139, 19), 96 | (nets.MobileNet100v3smallmini, (224, 224, 3), 174, 103, 15), 97 | ]), 98 | random.choice([ 99 | (nets.EfficientNetB0, (224, 224, 3), 311, 217, 25), 100 | (nets.EfficientNetB1, (240, 240, 3), 439, 312, 39), 101 | (nets.EfficientNetB2, (260, 260, 3), 439, 312, 39), 102 | (nets.EfficientNetB3, (300, 300, 3), 496, 354, 45), 103 | # (nets.EfficientNetB4, (380, 380, 3), 610, 438, 57), # too heavy on Travis 104 | # (nets.EfficientNetB5, (456, 456, 3), 738, 533, 71), # too heavy on Travis 105 | # (nets.EfficientNetB6, (528, 528, 3), 852, 617, 83), # too heavy on Travis 106 | # (nets.EfficientNetB7, (600, 600, 3), 1037, 754, 103), # too heavy on Travis 107 | ]), 108 | (nets.SqueezeNet, (224, 224, 3), 52, 65, 10), 109 | ], ids=[ 110 | 'ResNet', 111 | 'ResNetv2', 112 | 'ResNet200v2', 113 | 'ResNeXt', 114 | 'WideResNet50', 115 | 'Inception1', 116 | 'Inception2', 117 | 'Inception3', 118 | 'Inception4', 119 | 'InceptionResNet2', 120 | 'NASNetAlarge', 121 | 'NASNetAmobile', 122 | 'PNASNetlarge', 123 | 'VGG', 124 | 'DenseNet', 125 | 'MobileNet', 126 | 'MobileNetv2', 127 | 'MobileNetv3', 128 | 'EfficientNet', 129 | 'SqueezeNet', 130 | ]) 131 | def test_classification_basics(net, shape, weights, outputs, middles): 132 | with tf.Graph().as_default(): 133 | inputs = tf.placeholder(tf.float32, [None] + list(shape)) 134 | model = net(inputs, is_training=False) 135 | assert isinstance(model, tf.Tensor) 136 | 137 | x = np.random.random((1,) + shape).astype(np.float32) * 255 138 | 139 | with tf.Session() as sess: 140 | model.init() 141 | y = model.eval({inputs: model.preprocess(x)}) 142 | 143 | assert y.shape == (1, 1000) 144 | 145 | # Check whether the tensor names match the desired ones 146 | assert 'probs' in model.name # for `model` 147 | assert 'logits' in model.logits.name # for `model.logits` 148 | model_name = model.aliases[0] 149 | for (a, b) in zip(model.get_middles(), middle_names(model_name)[1]): 150 | assert a.name.endswith(b) # for `model.get_middles()` 151 | 152 | # Disable the following tests for TF==1.1.0 153 | if LooseVersion(tf.__version__) == LooseVersion('1.1.0'): 154 | return 155 | 156 | # Check whether the desired list is returned 157 | assert len(model.get_weights()) == weights 158 | assert len(model.get_outputs()) == outputs 159 | assert len(model.get_middles()) == middles 160 | 161 | # Clear GraphDef to avoid `GraphDef cannot be larger than 2GB` 162 | with tf.Graph().as_default(): 163 | inputs = tf.placeholder(tf.float32, [None] + list(shape)) 164 | 165 | # Check whether the desired list is returned under scope functions 166 | with tf.name_scope('a'): 167 | with tf.variable_scope('b'): 168 | with tf.name_scope('c'): 169 | model = net(inputs, is_training=False) 170 | assert len(model.get_weights()) == weights 171 | assert len(model.get_outputs()) == outputs 172 | assert len(model.get_middles()) == middles 173 | 174 | with tf.variable_scope('d'): 175 | with tf.name_scope('e'): 176 | with tf.variable_scope('f'): 177 | model = net(inputs, is_training=False) 178 | assert len(model.get_weights()) == weights 179 | assert len(model.get_outputs()) == outputs 180 | assert len(model.get_middles()) == middles 181 | 182 | 183 | @pytest.mark.parametrize('net,shape,stem', [ 184 | (nets.YOLOv2, (416, 416, 3), nets.Darknet19), 185 | (nets.TinyYOLOv2, (416, 416, 3), nets.TinyDarknet19), 186 | ], ids=[ 187 | 'YOLOv2', 188 | 'TinyYOLOv2', 189 | ]) 190 | def test_detection_basics(net, shape, stem): 191 | # TODO: Once the roi-pooling dependency is removed, 192 | # FasterRCNN-related tests should be added. 193 | with tf.Graph().as_default(): 194 | inputs = tf.placeholder(tf.float32, [None] + list(shape)) 195 | model = net(inputs, stem, is_training=False) 196 | assert isinstance(model, tf.Tensor) 197 | 198 | x = np.random.random((1, 733, 490, 3)).astype(np.float32) * 255 199 | 200 | with tf.Session() as sess: 201 | model.init() 202 | y = model.eval({inputs: model.preprocess(x)}) 203 | 204 | # TODO: Once the get_boxes's are translated from cython, 205 | # get_boxes tests should be enabled. 206 | # boxes = model.get_boxes(y, x.shape[1:3]) 207 | 208 | # assert len(boxes) == 20 209 | 210 | 211 | @pytest.mark.parametrize('net,shape', [ 212 | (nets.MobileNet25, (224, 224, 3)), 213 | (nets.SqueezeNet, (224, 224, 3)), 214 | ], ids=[ 215 | 'MobileNet', 216 | 'SqueezeNet', 217 | ]) 218 | def test_load_save(net, shape): 219 | with tf.Graph().as_default(): 220 | inputs = tf.placeholder(tf.float32, [None] + list(shape)) 221 | model = net(inputs, is_training=False) 222 | 223 | # usages with the default session 224 | 225 | with tf.Session() as sess: 226 | model.init() 227 | model.save('test.npz') 228 | values0 = sess.run(model.weights()) 229 | 230 | sess.run(model.pretrained()) 231 | values1 = sess.run(model.weights()) 232 | 233 | for (v0, v1) in zip(values0, values1): 234 | assert not np.allclose(v0, v1) 235 | 236 | with tf.Session() as sess: 237 | model.load('test.npz') 238 | values2 = sess.run(model.weights()) 239 | 240 | for (v0, v2) in zip(values0, values2): 241 | assert np.allclose(v0, v2) 242 | 243 | # usages without the default session 244 | 245 | sess = tf.Session() 246 | 247 | model.init(sess) 248 | model.save('test2.npz', sess) 249 | values0 = sess.run(model.weights()) 250 | 251 | sess.run(model.pretrained()) 252 | values1 = sess.run(model.weights()) 253 | 254 | for (v0, v1) in zip(values0, values1): 255 | assert not np.allclose(v0, v1) 256 | 257 | model.load('test2.npz', sess) 258 | values2 = sess.run(model.weights()) 259 | 260 | for (v0, v2) in zip(values0, values2): 261 | assert np.allclose(v0, v2) 262 | 263 | with pytest.raises(AssertionError): 264 | model.init() 265 | 266 | with pytest.raises(AssertionError): 267 | model.save('test2.npz') 268 | 269 | with pytest.raises(AssertionError): 270 | model.load('test2.npz') 271 | 272 | sess.close() 273 | 274 | os.remove('test.npz') 275 | os.remove('test2.npz') 276 | -------------------------------------------------------------------------------- /tests/utils_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import tensornets as nets 5 | from tensornets.utils import load_img 6 | import os 7 | import pytest 8 | 9 | 10 | pytestmark = pytest.mark.skipif( 11 | os.environ.get('CORE_CHANGED', 'True') == 'False', 12 | reason='Runs only when the relevant files have been modified.') 13 | 14 | 15 | def test_load_img(): 16 | x = load_img('cat.png') 17 | assert x.shape == (1, 733, 490, 3) 18 | 19 | x = load_img(['cat.png', 'cat.png'], target_size=(100, 200)) 20 | assert x.shape == (2, 100, 200, 3) 21 | 22 | x = load_img(['cat.png'] * 3, target_size=(100, 200), crop_size=50) 23 | assert x.shape == (3, 50, 50, 3) 24 | 25 | with pytest.raises(ValueError): 26 | x = load_img(['cat.png', 'cat.png']) 27 | 28 | with pytest.raises(ValueError): 29 | x = load_img(['cat.png'] * 3, target_size=100) 30 | -------------------------------------------------------------------------------- /translations/mobilenetv3_tfslim.py: -------------------------------------------------------------------------------- 1 | """Weight translation of MobileNetv3 variants 2 | (tested with tensornets: 0.4.3 and tensorflow: 1.15.0) 3 | 4 | The codes are executable on the path "research/slim/" 5 | in the "tensorflow/models" repository. 6 | 7 | For the 0.75 variants, the following modifications are necessary. 8 | 9 | In the line 116 of "research/slim/nets/mobilenet/mobilenet.py", 10 | def op(opfunc, multiplier_func=depth_multiplier, **params): 11 | multiplier = params.pop('multiplier_transform', multiplier_func) 12 | - return _Op(opfunc, params=params, multiplier_func=multiplier) 13 | + if params.get('normalizer_fn', True) is not None: 14 | + return _Op(opfunc, params=params, multiplier_func=multiplier) 15 | + else: 16 | + return _Op(opfunc, params=params, multiplier_func=lambda x, y: x) 17 | """ 18 | import numpy as np 19 | import tensorflow as tf 20 | import tensornets as nets 21 | 22 | from datasets import imagenet 23 | from nets.mobilenet import mobilenet_v3 24 | 25 | models_list = [ 26 | (nets.MobileNet75v3large, (224, 224, 3), 'mobilenet_75_v3_large', 27 | mobilenet_v3.large, 0.75, 'v3-large_224_0.75_float/ema/model-220000'), 28 | (nets.MobileNet75v3small, (224, 224, 3), 'mobilenet_75_v3_small', 29 | mobilenet_v3.small, 0.75, 'v3-small_224_0.75_float/ema/model-497500'), 30 | (nets.MobileNet100v3large, (224, 224, 3), 'mobilenet_100_v3_large', 31 | mobilenet_v3.large, 1.0, 'v3-large_224_1.0_float/ema/model-540000'), 32 | (nets.MobileNet100v3small, (224, 224, 3), 'mobilenet_100_v3_small', 33 | mobilenet_v3.small, 1.0, 'v3-small_224_1.0_float/ema/model-388500'), 34 | (nets.MobileNet100v3largemini, (224, 224, 3), 35 | 'mobilenet_100_v3_large_mini', 36 | mobilenet_v3.large_minimalistic, 1.0, 37 | 'v3-large-minimalistic_224_1.0_float/ema/model-342500'), 38 | (nets.MobileNet100v3smallmini, (224, 224, 3), 39 | 'mobilenet_100_v3_small_mini', 40 | mobilenet_v3.small_minimalistic, 1.0, 41 | 'v3-small-minimalistic_224_1.0_float/ema/model-498000'), 42 | ] 43 | 44 | 45 | for (net, shape, model_name, net_slim, alpha, checkpoint) in models_list: 46 | 47 | with tf.Graph().as_default(): 48 | 49 | inputs = tf.compat.v1.placeholder(tf.float32, [None] + list(shape)) 50 | 51 | with tf.contrib.slim.arg_scope(mobilenet_v3.training_scope(is_training=False)): 52 | logits, endpoints = net_slim(inputs, depth_multiplier=alpha) 53 | 54 | saver = tf.compat.v1.train.Saver() 55 | 56 | weights_tfslim = tf.compat.v1.get_collection( 57 | tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) 58 | 59 | model = net(inputs, scope='a') 60 | 61 | img = nets.utils.load_img('/home/taehoonlee/tensornets/cat.png', 62 | target_size=int(shape[0] * 8 / 7), 63 | crop_size=shape[0]) 64 | 65 | with tf.compat.v1.Session() as sess: 66 | 67 | # Retrieve values 68 | sess.run(tf.compat.v1.global_variables_initializer()) 69 | saver.restore(sess, checkpoint) 70 | names = [w.name[2:] for w in model.weights()] 71 | values = sess.run(weights_tfslim) 72 | 73 | # Trim the background class (1001 -> 1000) 74 | for i in range(-2, 0): 75 | values[i] = np.delete(np.squeeze(values[i]), 0, axis=-1) 76 | 77 | # Save the values as the TensorNets format 78 | np.savez(model_name, names=names, values=values) 79 | 80 | # Load and set the values 81 | weights = model.weights() 82 | values = nets.utils.parse_weights(model_name + '.npz') 83 | sess.run([w.assign(v) for (w, v) in zip(weights, values)]) 84 | 85 | # Run equivalence tests 86 | preds, preds_tfslim = sess.run([model, endpoints['Predictions']], 87 | {inputs: model.preprocess(img)}) 88 | preds_tfslim = preds_tfslim[:, 1:] 89 | np.testing.assert_allclose(preds, preds_tfslim, atol=2e-4) 90 | print(model_name, 'ok') 91 | -------------------------------------------------------------------------------- /translations/tfslim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import tensornets as nets 4 | import tensorflow_hub as hub 5 | 6 | from tensornets.utils import tf_later_than 7 | 8 | if tf_later_than('1.14'): 9 | tf = tf.compat.v1 10 | 11 | models_list = [ 12 | (nets.Inception2, (224, 224, 3), 'inception_v2'), 13 | (nets.Inception3, (299, 299, 3), 'inception_v3'), 14 | (nets.MobileNet35v2, (224, 224, 3), 'mobilenet_v2_035_224'), 15 | (nets.MobileNet50v2, (224, 224, 3), 'mobilenet_v2_050_224'), 16 | (nets.MobileNet75v2, (224, 224, 3), 'mobilenet_v2_075_224'), 17 | (nets.MobileNet100v2, (224, 224, 3), 'mobilenet_v2_100_224'), 18 | (nets.MobileNet130v2, (224, 224, 3), 'mobilenet_v2_130_224'), 19 | (nets.MobileNet140v2, (224, 224, 3), 'mobilenet_v2_140_224'), 20 | (nets.PNASNetlarge, (331, 331, 3), 'pnasnet_large'), 21 | (nets.EfficientNetB0, (224, 224, 3), 'efficientnet/b0'), 22 | (nets.EfficientNetB1, (240, 240, 3), 'efficientnet/b1'), 23 | (nets.EfficientNetB2, (260, 260, 3), 'efficientnet/b2'), 24 | (nets.EfficientNetB3, (300, 300, 3), 'efficientnet/b3'), 25 | (nets.EfficientNetB4, (380, 380, 3), 'efficientnet/b4'), 26 | (nets.EfficientNetB5, (456, 456, 3), 'efficientnet/b5'), 27 | (nets.EfficientNetB6, (528, 528, 3), 'efficientnet/b6'), 28 | (nets.EfficientNetB7, (600, 600, 3), 'efficientnet/b7'), 29 | ] 30 | 31 | url = 'https://tfhub.dev/google' 32 | 33 | 34 | for (net, shape, model_name) in models_list: 35 | 36 | with tf.Graph().as_default(): 37 | 38 | inputs = tf.placeholder(tf.float32, [None] + list(shape)) 39 | model = net(inputs, scope='a') 40 | 41 | if model_name[:12] == 'efficientnet': 42 | tfhub = hub.Module("%s/%s/classification/1" % (url, model_name)) 43 | else: 44 | tfhub = hub.Module("%s/imagenet/%s/classification/1" % (url, model_name)) 45 | features = tfhub(inputs, signature="image_classification", 46 | as_dict=True) 47 | model_tfhub = tf.nn.softmax(features['default']) 48 | 49 | img = nets.utils.load_img('cat.png', 50 | target_size=int(shape[0] * 8 / 7), 51 | crop_size=shape[0]) 52 | 53 | with tf.Session() as sess: 54 | 55 | # Retrieve values 56 | sess.run(tf.global_variables_initializer()) 57 | weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 58 | scope='module') 59 | values = sess.run(weights) 60 | 61 | # Trim the background class (1001 -> 1000) 62 | if not model_name[:12] == 'efficientnet': 63 | for i in range(-2, 0): 64 | values[i] = np.delete(np.squeeze(values[i]), 0, axis=-1) 65 | 66 | names = [w.name[2:] for w in model.get_weights()] 67 | if not nets.utils.tf_later_than('1.4.0'): 68 | # Adjust the order of the values to cover TF < 1.4.0 69 | for i in range(len(names) - 1): 70 | if 'gamma:0' in names[i] and 'beta:0' in names[i + 1]: 71 | names[i], names[i + 1] = names[i + 1], names[i] 72 | values[i], values[i + 1] = values[i + 1], values[i] 73 | 74 | # Save the values as the TensorNets format 75 | np.savez(model_name, names=names, values=values) 76 | 77 | # Load and set the values 78 | weights = model.get_weights() 79 | values = nets.utils.parse_weights(model_name + '.npz') 80 | sess.run([w.assign(v) for (w, v) in zip(weights, values)]) 81 | 82 | # Run equivalence tests 83 | preds = sess.run(model, {inputs: model.preprocess(img)}) 84 | preds_tfhub = sess.run(model_tfhub, {inputs: img / 255.}) 85 | if not model_name[:12] == 'efficientnet': 86 | preds_tfhub = preds_tfhub[:, 1:] 87 | np.testing.assert_allclose(preds, preds_tfhub, atol=1e-4) 88 | --------------------------------------------------------------------------------