├── .github └── workflows │ └── main.yml ├── .gitignore ├── LICENSE ├── README.md ├── efficientnet_pytorch ├── __init__.py ├── model.py └── utils.py ├── examples ├── imagenet │ ├── README.md │ ├── data │ │ └── README.md │ └── main.py └── simple │ ├── check.ipynb │ ├── example.ipynb │ ├── img.jpg │ ├── img2.jpg │ └── labels_map.txt ├── hubconf.py ├── setup.py ├── sotabench.py ├── sotabench_setup.sh ├── tests └── test_model.py └── tf_to_pytorch ├── README.md ├── convert_tf_to_pt ├── download.sh ├── load_tf_weights.py ├── load_tf_weights_tf1.py ├── original_tf │ ├── __init__.py │ ├── efficientnet_builder.py │ ├── efficientnet_model.py │ ├── eval_ckpt_main.py │ ├── eval_ckpt_main_tf1.py │ ├── preprocessing.py │ └── utils.py ├── rename.sh └── run.sh └── pretrained_tensorflow └── download.sh /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Workflow 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | pypi-job: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Install twine 14 | run: pip install twine 15 | - name: Build package 16 | run: python setup.py sdist 17 | - name: Publish a Python distribution to PyPI 18 | uses: pypa/gh-action-pypi-publish@release/v1 19 | with: 20 | user: __token__ 21 | password: ${{ secrets.PYPI_API_TOKEN }} 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Custom 2 | tmp 3 | *.pkl 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # Environments 89 | .env 90 | .venv 91 | env/ 92 | venv/ 93 | ENV/ 94 | env.bak/ 95 | venv.bak/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | .DS_STORE 110 | 111 | # PyCharm 112 | .idea* 113 | *.xml 114 | 115 | # Custom 116 | tensorflow/ 117 | example/test* 118 | *.pth* 119 | examples/imagenet/data/ 120 | !examples/imagenet/data/README.md 121 | tmp 122 | tf_to_pytorch/pretrained_tensorflow 123 | !tf_to_pytorch/pretrained_tensorflow/download.sh 124 | examples/imagenet/run.sh 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EfficientNet PyTorch 2 | 3 | ### Quickstart 4 | 5 | Install with `pip install efficientnet_pytorch` and load a pretrained EfficientNet with: 6 | ```python 7 | from efficientnet_pytorch import EfficientNet 8 | model = EfficientNet.from_pretrained('efficientnet-b0') 9 | ``` 10 | 11 | ### Updates 12 | 13 | #### Update (April 2, 2021) 14 | 15 | The [EfficientNetV2 paper](https://arxiv.org/abs/2104.00298) has been released! I am working on implementing it as you read this :) 16 | 17 | About EfficientNetV2: 18 | > EfficientNetV2 is a new family of convolutional networks that have faster training speed and better parameter efficiency than previous models. To develop this family of models, we use a combination of training-aware neural architecture search and scaling, to jointly optimize training speed and parameter efficiency. The models were searched from the search space enriched with new ops such as Fused-MBConv. 19 | 20 | Here is a comparison: 21 | > 22 | 23 | 24 | #### Update (Aug 25, 2020) 25 | 26 | This update adds: 27 | * A new `include_top` (default: `True`) option ([#208](https://github.com/lukemelas/EfficientNet-PyTorch/pull/208)) 28 | * Continuous testing with [sotabench](https://sotabench.com/) 29 | * Code quality improvements and fixes ([#215](https://github.com/lukemelas/EfficientNet-PyTorch/pull/215) [#223](https://github.com/lukemelas/EfficientNet-PyTorch/pull/223)) 30 | 31 | #### Update (May 14, 2020) 32 | 33 | This update adds comprehensive comments and documentation (thanks to @workingcoder). 34 | 35 | #### Update (January 23, 2020) 36 | 37 | This update adds a new category of pre-trained model based on adversarial training, called _advprop_. It is important to note that the preprocessing required for the advprop pretrained models is slightly different from normal ImageNet preprocessing. As a result, by default, advprop models are not used. To load a model with advprop, use: 38 | ```python 39 | model = EfficientNet.from_pretrained("efficientnet-b0", advprop=True) 40 | ``` 41 | There is also a new, large `efficientnet-b8` pretrained model that is only available in advprop form. When using these models, replace ImageNet preprocessing code as follows: 42 | ```python 43 | if advprop: # for models using advprop pretrained weights 44 | normalize = transforms.Lambda(lambda img: img * 2.0 - 1.0) 45 | else: 46 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 47 | std=[0.229, 0.224, 0.225]) 48 | ``` 49 | This update also addresses multiple other issues ([#115](https://github.com/lukemelas/EfficientNet-PyTorch/issues/115), [#128](https://github.com/lukemelas/EfficientNet-PyTorch/issues/128)). 50 | 51 | #### Update (October 15, 2019) 52 | 53 | This update allows you to choose whether to use a memory-efficient Swish activation. The memory-efficient version is chosen by default, but it cannot be used when exporting using PyTorch JIT. For this purpose, we have also included a standard (export-friendly) swish activation function. To switch to the export-friendly version, simply call `model.set_swish(memory_efficient=False)` after loading your desired model. This update addresses issues [#88](https://github.com/lukemelas/EfficientNet-PyTorch/pull/88) and [#89](https://github.com/lukemelas/EfficientNet-PyTorch/pull/89). 54 | 55 | #### Update (October 12, 2019) 56 | 57 | This update makes the Swish activation function more memory-efficient. It also addresses pull requests [#72](https://github.com/lukemelas/EfficientNet-PyTorch/pull/72), [#73](https://github.com/lukemelas/EfficientNet-PyTorch/pull/73), [#85](https://github.com/lukemelas/EfficientNet-PyTorch/pull/85), and [#86](https://github.com/lukemelas/EfficientNet-PyTorch/pull/86). Thanks to the authors of all the pull requests! 58 | 59 | #### Update (July 31, 2019) 60 | 61 | _Upgrade the pip package with_ `pip install --upgrade efficientnet-pytorch` 62 | 63 | The B6 and B7 models are now available. Additionally, _all_ pretrained models have been updated to use AutoAugment preprocessing, which translates to better performance across the board. Usage is the same as before: 64 | ```python 65 | from efficientnet_pytorch import EfficientNet 66 | model = EfficientNet.from_pretrained('efficientnet-b7') 67 | ``` 68 | 69 | #### Update (June 29, 2019) 70 | 71 | This update adds easy model exporting ([#20](https://github.com/lukemelas/EfficientNet-PyTorch/issues/20)) and feature extraction ([#38](https://github.com/lukemelas/EfficientNet-PyTorch/issues/38)). 72 | 73 | * [Example: Export to ONNX](#example-export) 74 | * [Example: Extract features](#example-feature-extraction) 75 | * Also: fixed a CUDA/CPU bug ([#32](https://github.com/lukemelas/EfficientNet-PyTorch/issues/32)) 76 | 77 | It is also now incredibly simple to load a pretrained model with a new number of classes for transfer learning: 78 | ```python 79 | model = EfficientNet.from_pretrained('efficientnet-b1', num_classes=23) 80 | ``` 81 | 82 | 83 | #### Update (June 23, 2019) 84 | 85 | The B4 and B5 models are now available. Their usage is identical to the other models: 86 | ```python 87 | from efficientnet_pytorch import EfficientNet 88 | model = EfficientNet.from_pretrained('efficientnet-b4') 89 | ``` 90 | 91 | ### Overview 92 | This repository contains an op-for-op PyTorch reimplementation of [EfficientNet](https://arxiv.org/abs/1905.11946), along with pre-trained models and examples. 93 | 94 | The goal of this implementation is to be simple, highly extensible, and easy to integrate into your own projects. This implementation is a work in progress -- new features are currently being implemented. 95 | 96 | At the moment, you can easily: 97 | * Load pretrained EfficientNet models 98 | * Use EfficientNet models for classification or feature extraction 99 | * Evaluate EfficientNet models on ImageNet or your own images 100 | 101 | _Upcoming features_: In the next few days, you will be able to: 102 | * Train new models from scratch on ImageNet with a simple command 103 | * Quickly finetune an EfficientNet on your own dataset 104 | * Export EfficientNet models for production 105 | 106 | ### Table of contents 107 | 1. [About EfficientNet](#about-efficientnet) 108 | 2. [About EfficientNet-PyTorch](#about-efficientnet-pytorch) 109 | 3. [Installation](#installation) 110 | 4. [Usage](#usage) 111 | * [Load pretrained models](#loading-pretrained-models) 112 | * [Example: Classify](#example-classification) 113 | * [Example: Extract features](#example-feature-extraction) 114 | * [Example: Export to ONNX](#example-export) 115 | 6. [Contributing](#contributing) 116 | 117 | ### About EfficientNet 118 | 119 | If you're new to EfficientNets, here is an explanation straight from the official TensorFlow implementation: 120 | 121 | EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models. We develop EfficientNets based on AutoML and Compound Scaling. In particular, we first use [AutoML Mobile framework](https://ai.googleblog.com/2018/08/mnasnet-towards-automating-design-of.html) to develop a mobile-size baseline network, named as EfficientNet-B0; Then, we use the compound scaling method to scale up this baseline to obtain EfficientNet-B1 to B7. 122 | 123 | 124 | 125 | 128 | 131 | 132 |
126 | 127 | 129 | 130 |
133 | 134 | EfficientNets achieve state-of-the-art accuracy on ImageNet with an order of magnitude better efficiency: 135 | 136 | 137 | * In high-accuracy regime, our EfficientNet-B7 achieves state-of-the-art 84.4% top-1 / 97.1% top-5 accuracy on ImageNet with 66M parameters and 37B FLOPS, being 8.4x smaller and 6.1x faster on CPU inference than previous best [Gpipe](https://arxiv.org/abs/1811.06965). 138 | 139 | * In middle-accuracy regime, our EfficientNet-B1 is 7.6x smaller and 5.7x faster on CPU inference than [ResNet-152](https://arxiv.org/abs/1512.03385), with similar ImageNet accuracy. 140 | 141 | * Compared with the widely used [ResNet-50](https://arxiv.org/abs/1512.03385), our EfficientNet-B4 improves the top-1 accuracy from 76.3% of ResNet-50 to 82.6% (+6.3%), under similar FLOPS constraint. 142 | 143 | ### About EfficientNet PyTorch 144 | 145 | EfficientNet PyTorch is a PyTorch re-implementation of EfficientNet. It is consistent with the [original TensorFlow implementation](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), such that it is easy to load weights from a TensorFlow checkpoint. At the same time, we aim to make our PyTorch implementation as simple, flexible, and extensible as possible. 146 | 147 | If you have any feature requests or questions, feel free to leave them as GitHub issues! 148 | 149 | ### Installation 150 | 151 | Install via pip: 152 | ```bash 153 | pip install efficientnet_pytorch 154 | ``` 155 | 156 | Or install from source: 157 | ```bash 158 | git clone https://github.com/lukemelas/EfficientNet-PyTorch 159 | cd EfficientNet-Pytorch 160 | pip install -e . 161 | ``` 162 | 163 | ### Usage 164 | 165 | #### Loading pretrained models 166 | 167 | Load an EfficientNet: 168 | ```python 169 | from efficientnet_pytorch import EfficientNet 170 | model = EfficientNet.from_name('efficientnet-b0') 171 | ``` 172 | 173 | Load a pretrained EfficientNet: 174 | ```python 175 | from efficientnet_pytorch import EfficientNet 176 | model = EfficientNet.from_pretrained('efficientnet-b0') 177 | ``` 178 | 179 | Details about the models are below: 180 | 181 | | *Name* |*# Params*|*Top-1 Acc.*|*Pretrained?*| 182 | |:-----------------:|:--------:|:----------:|:-----------:| 183 | | `efficientnet-b0` | 5.3M | 76.3 | ✓ | 184 | | `efficientnet-b1` | 7.8M | 78.8 | ✓ | 185 | | `efficientnet-b2` | 9.2M | 79.8 | ✓ | 186 | | `efficientnet-b3` | 12M | 81.1 | ✓ | 187 | | `efficientnet-b4` | 19M | 82.6 | ✓ | 188 | | `efficientnet-b5` | 30M | 83.3 | ✓ | 189 | | `efficientnet-b6` | 43M | 84.0 | ✓ | 190 | | `efficientnet-b7` | 66M | 84.4 | ✓ | 191 | 192 | 193 | #### Example: Classification 194 | 195 | Below is a simple, complete example. It may also be found as a jupyter notebook in `examples/simple` or as a [Colab Notebook](https://colab.research.google.com/drive/1Jw28xZ1NJq4Cja4jLe6tJ6_F5lCzElb4). 196 | 197 | We assume that in your current directory, there is a `img.jpg` file and a `labels_map.txt` file (ImageNet class names). These are both included in `examples/simple`. 198 | 199 | ```python 200 | import json 201 | from PIL import Image 202 | import torch 203 | from torchvision import transforms 204 | 205 | from efficientnet_pytorch import EfficientNet 206 | model = EfficientNet.from_pretrained('efficientnet-b0') 207 | 208 | # Preprocess image 209 | tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(), 210 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),]) 211 | img = tfms(Image.open('img.jpg')).unsqueeze(0) 212 | print(img.shape) # torch.Size([1, 3, 224, 224]) 213 | 214 | # Load ImageNet class names 215 | labels_map = json.load(open('labels_map.txt')) 216 | labels_map = [labels_map[str(i)] for i in range(1000)] 217 | 218 | # Classify 219 | model.eval() 220 | with torch.no_grad(): 221 | outputs = model(img) 222 | 223 | # Print predictions 224 | print('-----') 225 | for idx in torch.topk(outputs, k=5).indices.squeeze(0).tolist(): 226 | prob = torch.softmax(outputs, dim=1)[0, idx].item() 227 | print('{label:<75} ({p:.2f}%)'.format(label=labels_map[idx], p=prob*100)) 228 | ``` 229 | 230 | #### Example: Feature Extraction 231 | 232 | You can easily extract features with `model.extract_features`: 233 | ```python 234 | from efficientnet_pytorch import EfficientNet 235 | model = EfficientNet.from_pretrained('efficientnet-b0') 236 | 237 | # ... image preprocessing as in the classification example ... 238 | print(img.shape) # torch.Size([1, 3, 224, 224]) 239 | 240 | features = model.extract_features(img) 241 | print(features.shape) # torch.Size([1, 1280, 7, 7]) 242 | ``` 243 | 244 | #### Example: Export to ONNX 245 | 246 | Exporting to ONNX for deploying to production is now simple: 247 | ```python 248 | import torch 249 | from efficientnet_pytorch import EfficientNet 250 | 251 | model = EfficientNet.from_pretrained('efficientnet-b1') 252 | dummy_input = torch.randn(10, 3, 240, 240) 253 | 254 | model.set_swish(memory_efficient=False) 255 | torch.onnx.export(model, dummy_input, "test-b1.onnx", verbose=True) 256 | ``` 257 | 258 | [Here](https://colab.research.google.com/drive/1rOAEXeXHaA8uo3aG2YcFDHItlRJMV0VP) is a Colab example. 259 | 260 | 261 | #### ImageNet 262 | 263 | See `examples/imagenet` for details about evaluating on ImageNet. 264 | 265 | ### Contributing 266 | 267 | If you find a bug, create a GitHub issue, or even better, submit a pull request. Similarly, if you have questions, simply post them as GitHub issues. 268 | 269 | I look forward to seeing what the community does with these models! 270 | -------------------------------------------------------------------------------- /efficientnet_pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.7.1" 2 | from .model import EfficientNet, VALID_MODELS 3 | from .utils import ( 4 | GlobalParams, 5 | BlockArgs, 6 | BlockDecoder, 7 | efficientnet, 8 | get_model_params, 9 | ) 10 | -------------------------------------------------------------------------------- /efficientnet_pytorch/model.py: -------------------------------------------------------------------------------- 1 | """model.py - Model and module class for EfficientNet. 2 | They are built to mirror those in the official TensorFlow implementation. 3 | """ 4 | 5 | # Author: lukemelas (github username) 6 | # Github repo: https://github.com/lukemelas/EfficientNet-PyTorch 7 | # With adjustments and added comments by workingcoder (github username). 8 | 9 | import torch 10 | from torch import nn 11 | from torch.nn import functional as F 12 | from .utils import ( 13 | round_filters, 14 | round_repeats, 15 | drop_connect, 16 | get_same_padding_conv2d, 17 | get_model_params, 18 | efficientnet_params, 19 | load_pretrained_weights, 20 | Swish, 21 | MemoryEfficientSwish, 22 | calculate_output_image_size 23 | ) 24 | 25 | 26 | VALID_MODELS = ( 27 | 'efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2', 'efficientnet-b3', 28 | 'efficientnet-b4', 'efficientnet-b5', 'efficientnet-b6', 'efficientnet-b7', 29 | 'efficientnet-b8', 30 | 31 | # Support the construction of 'efficientnet-l2' without pretrained weights 32 | 'efficientnet-l2' 33 | ) 34 | 35 | 36 | class MBConvBlock(nn.Module): 37 | """Mobile Inverted Residual Bottleneck Block. 38 | 39 | Args: 40 | block_args (namedtuple): BlockArgs, defined in utils.py. 41 | global_params (namedtuple): GlobalParam, defined in utils.py. 42 | image_size (tuple or list): [image_height, image_width]. 43 | 44 | References: 45 | [1] https://arxiv.org/abs/1704.04861 (MobileNet v1) 46 | [2] https://arxiv.org/abs/1801.04381 (MobileNet v2) 47 | [3] https://arxiv.org/abs/1905.02244 (MobileNet v3) 48 | """ 49 | 50 | def __init__(self, block_args, global_params, image_size=None): 51 | super().__init__() 52 | self._block_args = block_args 53 | self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow 54 | self._bn_eps = global_params.batch_norm_epsilon 55 | self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1) 56 | self.id_skip = block_args.id_skip # whether to use skip connection and drop connect 57 | 58 | # Expansion phase (Inverted Bottleneck) 59 | inp = self._block_args.input_filters # number of input channels 60 | oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels 61 | if self._block_args.expand_ratio != 1: 62 | Conv2d = get_same_padding_conv2d(image_size=image_size) 63 | self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False) 64 | self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) 65 | # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size 66 | 67 | # Depthwise convolution phase 68 | k = self._block_args.kernel_size 69 | s = self._block_args.stride 70 | Conv2d = get_same_padding_conv2d(image_size=image_size) 71 | self._depthwise_conv = Conv2d( 72 | in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise 73 | kernel_size=k, stride=s, bias=False) 74 | self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) 75 | image_size = calculate_output_image_size(image_size, s) 76 | 77 | # Squeeze and Excitation layer, if desired 78 | if self.has_se: 79 | Conv2d = get_same_padding_conv2d(image_size=(1, 1)) 80 | num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio)) 81 | self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1) 82 | self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1) 83 | 84 | # Pointwise convolution phase 85 | final_oup = self._block_args.output_filters 86 | Conv2d = get_same_padding_conv2d(image_size=image_size) 87 | self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False) 88 | self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps) 89 | self._swish = MemoryEfficientSwish() 90 | 91 | def forward(self, inputs, drop_connect_rate=None): 92 | """MBConvBlock's forward function. 93 | 94 | Args: 95 | inputs (tensor): Input tensor. 96 | drop_connect_rate (bool): Drop connect rate (float, between 0 and 1). 97 | 98 | Returns: 99 | Output of this block after processing. 100 | """ 101 | 102 | # Expansion and Depthwise Convolution 103 | x = inputs 104 | if self._block_args.expand_ratio != 1: 105 | x = self._expand_conv(inputs) 106 | x = self._bn0(x) 107 | x = self._swish(x) 108 | 109 | x = self._depthwise_conv(x) 110 | x = self._bn1(x) 111 | x = self._swish(x) 112 | 113 | # Squeeze and Excitation 114 | if self.has_se: 115 | x_squeezed = F.adaptive_avg_pool2d(x, 1) 116 | x_squeezed = self._se_reduce(x_squeezed) 117 | x_squeezed = self._swish(x_squeezed) 118 | x_squeezed = self._se_expand(x_squeezed) 119 | x = torch.sigmoid(x_squeezed) * x 120 | 121 | # Pointwise Convolution 122 | x = self._project_conv(x) 123 | x = self._bn2(x) 124 | 125 | # Skip connection and drop connect 126 | input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters 127 | if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: 128 | # The combination of skip connection and drop connect brings about stochastic depth. 129 | if drop_connect_rate: 130 | x = drop_connect(x, p=drop_connect_rate, training=self.training) 131 | x = x + inputs # skip connection 132 | return x 133 | 134 | def set_swish(self, memory_efficient=True): 135 | """Sets swish function as memory efficient (for training) or standard (for export). 136 | 137 | Args: 138 | memory_efficient (bool): Whether to use memory-efficient version of swish. 139 | """ 140 | self._swish = MemoryEfficientSwish() if memory_efficient else Swish() 141 | 142 | 143 | class EfficientNet(nn.Module): 144 | """EfficientNet model. 145 | Most easily loaded with the .from_name or .from_pretrained methods. 146 | 147 | Args: 148 | blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks. 149 | global_params (namedtuple): A set of GlobalParams shared between blocks. 150 | 151 | References: 152 | [1] https://arxiv.org/abs/1905.11946 (EfficientNet) 153 | 154 | Example: 155 | >>> import torch 156 | >>> from efficientnet.model import EfficientNet 157 | >>> inputs = torch.rand(1, 3, 224, 224) 158 | >>> model = EfficientNet.from_pretrained('efficientnet-b0') 159 | >>> model.eval() 160 | >>> outputs = model(inputs) 161 | """ 162 | 163 | def __init__(self, blocks_args=None, global_params=None): 164 | super().__init__() 165 | assert isinstance(blocks_args, list), 'blocks_args should be a list' 166 | assert len(blocks_args) > 0, 'block args must be greater than 0' 167 | self._global_params = global_params 168 | self._blocks_args = blocks_args 169 | 170 | # Batch norm parameters 171 | bn_mom = 1 - self._global_params.batch_norm_momentum 172 | bn_eps = self._global_params.batch_norm_epsilon 173 | 174 | # Get stem static or dynamic convolution depending on image size 175 | image_size = global_params.image_size 176 | Conv2d = get_same_padding_conv2d(image_size=image_size) 177 | 178 | # Stem 179 | in_channels = 3 # rgb 180 | out_channels = round_filters(32, self._global_params) # number of output channels 181 | self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) 182 | self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) 183 | image_size = calculate_output_image_size(image_size, 2) 184 | 185 | # Build blocks 186 | self._blocks = nn.ModuleList([]) 187 | for block_args in self._blocks_args: 188 | 189 | # Update block input and output filters based on depth multiplier. 190 | block_args = block_args._replace( 191 | input_filters=round_filters(block_args.input_filters, self._global_params), 192 | output_filters=round_filters(block_args.output_filters, self._global_params), 193 | num_repeat=round_repeats(block_args.num_repeat, self._global_params) 194 | ) 195 | 196 | # The first block needs to take care of stride and filter size increase. 197 | self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size)) 198 | image_size = calculate_output_image_size(image_size, block_args.stride) 199 | if block_args.num_repeat > 1: # modify block_args to keep same output size 200 | block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) 201 | for _ in range(block_args.num_repeat - 1): 202 | self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size)) 203 | # image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1 204 | 205 | # Head 206 | in_channels = block_args.output_filters # output of final block 207 | out_channels = round_filters(1280, self._global_params) 208 | Conv2d = get_same_padding_conv2d(image_size=image_size) 209 | self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 210 | self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) 211 | 212 | # Final linear layer 213 | self._avg_pooling = nn.AdaptiveAvgPool2d(1) 214 | if self._global_params.include_top: 215 | self._dropout = nn.Dropout(self._global_params.dropout_rate) 216 | self._fc = nn.Linear(out_channels, self._global_params.num_classes) 217 | 218 | # set activation to memory efficient swish by default 219 | self._swish = MemoryEfficientSwish() 220 | 221 | def set_swish(self, memory_efficient=True): 222 | """Sets swish function as memory efficient (for training) or standard (for export). 223 | 224 | Args: 225 | memory_efficient (bool): Whether to use memory-efficient version of swish. 226 | """ 227 | self._swish = MemoryEfficientSwish() if memory_efficient else Swish() 228 | for block in self._blocks: 229 | block.set_swish(memory_efficient) 230 | 231 | def extract_endpoints(self, inputs): 232 | """Use convolution layer to extract features 233 | from reduction levels i in [1, 2, 3, 4, 5]. 234 | 235 | Args: 236 | inputs (tensor): Input tensor. 237 | 238 | Returns: 239 | Dictionary of last intermediate features 240 | with reduction levels i in [1, 2, 3, 4, 5]. 241 | Example: 242 | >>> import torch 243 | >>> from efficientnet.model import EfficientNet 244 | >>> inputs = torch.rand(1, 3, 224, 224) 245 | >>> model = EfficientNet.from_pretrained('efficientnet-b0') 246 | >>> endpoints = model.extract_endpoints(inputs) 247 | >>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112]) 248 | >>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56]) 249 | >>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28]) 250 | >>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14]) 251 | >>> print(endpoints['reduction_5'].shape) # torch.Size([1, 320, 7, 7]) 252 | >>> print(endpoints['reduction_6'].shape) # torch.Size([1, 1280, 7, 7]) 253 | """ 254 | endpoints = dict() 255 | 256 | # Stem 257 | x = self._swish(self._bn0(self._conv_stem(inputs))) 258 | prev_x = x 259 | 260 | # Blocks 261 | for idx, block in enumerate(self._blocks): 262 | drop_connect_rate = self._global_params.drop_connect_rate 263 | if drop_connect_rate: 264 | drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate 265 | x = block(x, drop_connect_rate=drop_connect_rate) 266 | if prev_x.size(2) > x.size(2): 267 | endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x 268 | elif idx == len(self._blocks) - 1: 269 | endpoints['reduction_{}'.format(len(endpoints) + 1)] = x 270 | prev_x = x 271 | 272 | # Head 273 | x = self._swish(self._bn1(self._conv_head(x))) 274 | endpoints['reduction_{}'.format(len(endpoints) + 1)] = x 275 | 276 | return endpoints 277 | 278 | def extract_features(self, inputs): 279 | """use convolution layer to extract feature . 280 | 281 | Args: 282 | inputs (tensor): Input tensor. 283 | 284 | Returns: 285 | Output of the final convolution 286 | layer in the efficientnet model. 287 | """ 288 | # Stem 289 | x = self._swish(self._bn0(self._conv_stem(inputs))) 290 | 291 | # Blocks 292 | for idx, block in enumerate(self._blocks): 293 | drop_connect_rate = self._global_params.drop_connect_rate 294 | if drop_connect_rate: 295 | drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate 296 | x = block(x, drop_connect_rate=drop_connect_rate) 297 | 298 | # Head 299 | x = self._swish(self._bn1(self._conv_head(x))) 300 | 301 | return x 302 | 303 | def forward(self, inputs): 304 | """EfficientNet's forward function. 305 | Calls extract_features to extract features, applies final linear layer, and returns logits. 306 | 307 | Args: 308 | inputs (tensor): Input tensor. 309 | 310 | Returns: 311 | Output of this model after processing. 312 | """ 313 | # Convolution layers 314 | x = self.extract_features(inputs) 315 | # Pooling and final linear layer 316 | x = self._avg_pooling(x) 317 | if self._global_params.include_top: 318 | x = x.flatten(start_dim=1) 319 | x = self._dropout(x) 320 | x = self._fc(x) 321 | return x 322 | 323 | @classmethod 324 | def from_name(cls, model_name, in_channels=3, **override_params): 325 | """Create an efficientnet model according to name. 326 | 327 | Args: 328 | model_name (str): Name for efficientnet. 329 | in_channels (int): Input data's channel number. 330 | override_params (other key word params): 331 | Params to override model's global_params. 332 | Optional key: 333 | 'width_coefficient', 'depth_coefficient', 334 | 'image_size', 'dropout_rate', 335 | 'num_classes', 'batch_norm_momentum', 336 | 'batch_norm_epsilon', 'drop_connect_rate', 337 | 'depth_divisor', 'min_depth' 338 | 339 | Returns: 340 | An efficientnet model. 341 | """ 342 | cls._check_model_name_is_valid(model_name) 343 | blocks_args, global_params = get_model_params(model_name, override_params) 344 | model = cls(blocks_args, global_params) 345 | model._change_in_channels(in_channels) 346 | return model 347 | 348 | @classmethod 349 | def from_pretrained(cls, model_name, weights_path=None, advprop=False, 350 | in_channels=3, num_classes=1000, **override_params): 351 | """Create an efficientnet model according to name. 352 | 353 | Args: 354 | model_name (str): Name for efficientnet. 355 | weights_path (None or str): 356 | str: path to pretrained weights file on the local disk. 357 | None: use pretrained weights downloaded from the Internet. 358 | advprop (bool): 359 | Whether to load pretrained weights 360 | trained with advprop (valid when weights_path is None). 361 | in_channels (int): Input data's channel number. 362 | num_classes (int): 363 | Number of categories for classification. 364 | It controls the output size for final linear layer. 365 | override_params (other key word params): 366 | Params to override model's global_params. 367 | Optional key: 368 | 'width_coefficient', 'depth_coefficient', 369 | 'image_size', 'dropout_rate', 370 | 'batch_norm_momentum', 371 | 'batch_norm_epsilon', 'drop_connect_rate', 372 | 'depth_divisor', 'min_depth' 373 | 374 | Returns: 375 | A pretrained efficientnet model. 376 | """ 377 | model = cls.from_name(model_name, num_classes=num_classes, **override_params) 378 | load_pretrained_weights(model, model_name, weights_path=weights_path, 379 | load_fc=(num_classes == 1000), advprop=advprop) 380 | model._change_in_channels(in_channels) 381 | return model 382 | 383 | @classmethod 384 | def get_image_size(cls, model_name): 385 | """Get the input image size for a given efficientnet model. 386 | 387 | Args: 388 | model_name (str): Name for efficientnet. 389 | 390 | Returns: 391 | Input image size (resolution). 392 | """ 393 | cls._check_model_name_is_valid(model_name) 394 | _, _, res, _ = efficientnet_params(model_name) 395 | return res 396 | 397 | @classmethod 398 | def _check_model_name_is_valid(cls, model_name): 399 | """Validates model name. 400 | 401 | Args: 402 | model_name (str): Name for efficientnet. 403 | 404 | Returns: 405 | bool: Is a valid name or not. 406 | """ 407 | if model_name not in VALID_MODELS: 408 | raise ValueError('model_name should be one of: ' + ', '.join(VALID_MODELS)) 409 | 410 | def _change_in_channels(self, in_channels): 411 | """Adjust model's first convolution layer to in_channels, if in_channels not equals 3. 412 | 413 | Args: 414 | in_channels (int): Input data's channel number. 415 | """ 416 | if in_channels != 3: 417 | Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size) 418 | out_channels = round_filters(32, self._global_params) 419 | self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) 420 | -------------------------------------------------------------------------------- /efficientnet_pytorch/utils.py: -------------------------------------------------------------------------------- 1 | """utils.py - Helper functions for building the model and for loading model parameters. 2 | These helper functions are built to mirror those in the official TensorFlow implementation. 3 | """ 4 | 5 | # Author: lukemelas (github username) 6 | # Github repo: https://github.com/lukemelas/EfficientNet-PyTorch 7 | # With adjustments and added comments by workingcoder (github username). 8 | 9 | import re 10 | import math 11 | import collections 12 | from functools import partial 13 | import torch 14 | from torch import nn 15 | from torch.nn import functional as F 16 | from torch.utils import model_zoo 17 | 18 | 19 | ################################################################################ 20 | # Help functions for model architecture 21 | ################################################################################ 22 | 23 | # GlobalParams and BlockArgs: Two namedtuples 24 | # Swish and MemoryEfficientSwish: Two implementations of the method 25 | # round_filters and round_repeats: 26 | # Functions to calculate params for scaling model width and depth ! ! ! 27 | # get_width_and_height_from_size and calculate_output_image_size 28 | # drop_connect: A structural design 29 | # get_same_padding_conv2d: 30 | # Conv2dDynamicSamePadding 31 | # Conv2dStaticSamePadding 32 | # get_same_padding_maxPool2d: 33 | # MaxPool2dDynamicSamePadding 34 | # MaxPool2dStaticSamePadding 35 | # It's an additional function, not used in EfficientNet, 36 | # but can be used in other model (such as EfficientDet). 37 | 38 | # Parameters for the entire model (stem, all blocks, and head) 39 | GlobalParams = collections.namedtuple('GlobalParams', [ 40 | 'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate', 41 | 'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon', 42 | 'drop_connect_rate', 'depth_divisor', 'min_depth', 'include_top']) 43 | 44 | # Parameters for an individual model block 45 | BlockArgs = collections.namedtuple('BlockArgs', [ 46 | 'num_repeat', 'kernel_size', 'stride', 'expand_ratio', 47 | 'input_filters', 'output_filters', 'se_ratio', 'id_skip']) 48 | 49 | # Set GlobalParams and BlockArgs's defaults 50 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) 51 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) 52 | 53 | # Swish activation function 54 | if hasattr(nn, 'SiLU'): 55 | Swish = nn.SiLU 56 | else: 57 | # For compatibility with old PyTorch versions 58 | class Swish(nn.Module): 59 | def forward(self, x): 60 | return x * torch.sigmoid(x) 61 | 62 | 63 | # A memory-efficient implementation of Swish function 64 | class SwishImplementation(torch.autograd.Function): 65 | @staticmethod 66 | def forward(ctx, i): 67 | result = i * torch.sigmoid(i) 68 | ctx.save_for_backward(i) 69 | return result 70 | 71 | @staticmethod 72 | def backward(ctx, grad_output): 73 | i = ctx.saved_tensors[0] 74 | sigmoid_i = torch.sigmoid(i) 75 | return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) 76 | 77 | 78 | class MemoryEfficientSwish(nn.Module): 79 | def forward(self, x): 80 | return SwishImplementation.apply(x) 81 | 82 | 83 | def round_filters(filters, global_params): 84 | """Calculate and round number of filters based on width multiplier. 85 | Use width_coefficient, depth_divisor and min_depth of global_params. 86 | 87 | Args: 88 | filters (int): Filters number to be calculated. 89 | global_params (namedtuple): Global params of the model. 90 | 91 | Returns: 92 | new_filters: New filters number after calculating. 93 | """ 94 | multiplier = global_params.width_coefficient 95 | if not multiplier: 96 | return filters 97 | # TODO: modify the params names. 98 | # maybe the names (width_divisor,min_width) 99 | # are more suitable than (depth_divisor,min_depth). 100 | divisor = global_params.depth_divisor 101 | min_depth = global_params.min_depth 102 | filters *= multiplier 103 | min_depth = min_depth or divisor # pay attention to this line when using min_depth 104 | # follow the formula transferred from official TensorFlow implementation 105 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) 106 | if new_filters < 0.9 * filters: # prevent rounding by more than 10% 107 | new_filters += divisor 108 | return int(new_filters) 109 | 110 | 111 | def round_repeats(repeats, global_params): 112 | """Calculate module's repeat number of a block based on depth multiplier. 113 | Use depth_coefficient of global_params. 114 | 115 | Args: 116 | repeats (int): num_repeat to be calculated. 117 | global_params (namedtuple): Global params of the model. 118 | 119 | Returns: 120 | new repeat: New repeat number after calculating. 121 | """ 122 | multiplier = global_params.depth_coefficient 123 | if not multiplier: 124 | return repeats 125 | # follow the formula transferred from official TensorFlow implementation 126 | return int(math.ceil(multiplier * repeats)) 127 | 128 | 129 | def drop_connect(inputs, p, training): 130 | """Drop connect. 131 | 132 | Args: 133 | input (tensor: BCWH): Input of this structure. 134 | p (float: 0.0~1.0): Probability of drop connection. 135 | training (bool): The running mode. 136 | 137 | Returns: 138 | output: Output after drop connection. 139 | """ 140 | assert 0 <= p <= 1, 'p must be in range of [0,1]' 141 | 142 | if not training: 143 | return inputs 144 | 145 | batch_size = inputs.shape[0] 146 | keep_prob = 1 - p 147 | 148 | # generate binary_tensor mask according to probability (p for 0, 1-p for 1) 149 | random_tensor = keep_prob 150 | random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device) 151 | binary_tensor = torch.floor(random_tensor) 152 | 153 | output = inputs / keep_prob * binary_tensor 154 | return output 155 | 156 | 157 | def get_width_and_height_from_size(x): 158 | """Obtain height and width from x. 159 | 160 | Args: 161 | x (int, tuple or list): Data size. 162 | 163 | Returns: 164 | size: A tuple or list (H,W). 165 | """ 166 | if isinstance(x, int): 167 | return x, x 168 | if isinstance(x, list) or isinstance(x, tuple): 169 | return x 170 | else: 171 | raise TypeError() 172 | 173 | 174 | def calculate_output_image_size(input_image_size, stride): 175 | """Calculates the output image size when using Conv2dSamePadding with a stride. 176 | Necessary for static padding. Thanks to mannatsingh for pointing this out. 177 | 178 | Args: 179 | input_image_size (int, tuple or list): Size of input image. 180 | stride (int, tuple or list): Conv2d operation's stride. 181 | 182 | Returns: 183 | output_image_size: A list [H,W]. 184 | """ 185 | if input_image_size is None: 186 | return None 187 | image_height, image_width = get_width_and_height_from_size(input_image_size) 188 | stride = stride if isinstance(stride, int) else stride[0] 189 | image_height = int(math.ceil(image_height / stride)) 190 | image_width = int(math.ceil(image_width / stride)) 191 | return [image_height, image_width] 192 | 193 | 194 | # Note: 195 | # The following 'SamePadding' functions make output size equal ceil(input size/stride). 196 | # Only when stride equals 1, can the output size be the same as input size. 197 | # Don't be confused by their function names ! ! ! 198 | 199 | def get_same_padding_conv2d(image_size=None): 200 | """Chooses static padding if you have specified an image size, and dynamic padding otherwise. 201 | Static padding is necessary for ONNX exporting of models. 202 | 203 | Args: 204 | image_size (int or tuple): Size of the image. 205 | 206 | Returns: 207 | Conv2dDynamicSamePadding or Conv2dStaticSamePadding. 208 | """ 209 | if image_size is None: 210 | return Conv2dDynamicSamePadding 211 | else: 212 | return partial(Conv2dStaticSamePadding, image_size=image_size) 213 | 214 | 215 | class Conv2dDynamicSamePadding(nn.Conv2d): 216 | """2D Convolutions like TensorFlow, for a dynamic image size. 217 | The padding is operated in forward function by calculating dynamically. 218 | """ 219 | 220 | # Tips for 'SAME' mode padding. 221 | # Given the following: 222 | # i: width or height 223 | # s: stride 224 | # k: kernel size 225 | # d: dilation 226 | # p: padding 227 | # Output after Conv2d: 228 | # o = floor((i+p-((k-1)*d+1))/s+1) 229 | # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1), 230 | # => p = (i-1)*s+((k-1)*d+1)-i 231 | 232 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): 233 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 234 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 235 | 236 | def forward(self, x): 237 | ih, iw = x.size()[-2:] 238 | kh, kw = self.weight.size()[-2:] 239 | sh, sw = self.stride 240 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! ! 241 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 242 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 243 | if pad_h > 0 or pad_w > 0: 244 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) 245 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 246 | 247 | 248 | class Conv2dStaticSamePadding(nn.Conv2d): 249 | """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size. 250 | The padding mudule is calculated in construction function, then used in forward. 251 | """ 252 | 253 | # With the same calculation as Conv2dDynamicSamePadding 254 | 255 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs): 256 | super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs) 257 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 258 | 259 | # Calculate padding based on image size and save it 260 | assert image_size is not None 261 | ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size 262 | kh, kw = self.weight.size()[-2:] 263 | sh, sw = self.stride 264 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 265 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 266 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 267 | if pad_h > 0 or pad_w > 0: 268 | self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, 269 | pad_h // 2, pad_h - pad_h // 2)) 270 | else: 271 | self.static_padding = nn.Identity() 272 | 273 | def forward(self, x): 274 | x = self.static_padding(x) 275 | x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 276 | return x 277 | 278 | 279 | def get_same_padding_maxPool2d(image_size=None): 280 | """Chooses static padding if you have specified an image size, and dynamic padding otherwise. 281 | Static padding is necessary for ONNX exporting of models. 282 | 283 | Args: 284 | image_size (int or tuple): Size of the image. 285 | 286 | Returns: 287 | MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding. 288 | """ 289 | if image_size is None: 290 | return MaxPool2dDynamicSamePadding 291 | else: 292 | return partial(MaxPool2dStaticSamePadding, image_size=image_size) 293 | 294 | 295 | class MaxPool2dDynamicSamePadding(nn.MaxPool2d): 296 | """2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size. 297 | The padding is operated in forward function by calculating dynamically. 298 | """ 299 | 300 | def __init__(self, kernel_size, stride, padding=0, dilation=1, return_indices=False, ceil_mode=False): 301 | super().__init__(kernel_size, stride, padding, dilation, return_indices, ceil_mode) 302 | self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride 303 | self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size 304 | self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation 305 | 306 | def forward(self, x): 307 | ih, iw = x.size()[-2:] 308 | kh, kw = self.kernel_size 309 | sh, sw = self.stride 310 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 311 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 312 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 313 | if pad_h > 0 or pad_w > 0: 314 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) 315 | return F.max_pool2d(x, self.kernel_size, self.stride, self.padding, 316 | self.dilation, self.ceil_mode, self.return_indices) 317 | 318 | 319 | class MaxPool2dStaticSamePadding(nn.MaxPool2d): 320 | """2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size. 321 | The padding mudule is calculated in construction function, then used in forward. 322 | """ 323 | 324 | def __init__(self, kernel_size, stride, image_size=None, **kwargs): 325 | super().__init__(kernel_size, stride, **kwargs) 326 | self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride 327 | self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size 328 | self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation 329 | 330 | # Calculate padding based on image size and save it 331 | assert image_size is not None 332 | ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size 333 | kh, kw = self.kernel_size 334 | sh, sw = self.stride 335 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 336 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 337 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 338 | if pad_h > 0 or pad_w > 0: 339 | self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)) 340 | else: 341 | self.static_padding = nn.Identity() 342 | 343 | def forward(self, x): 344 | x = self.static_padding(x) 345 | x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding, 346 | self.dilation, self.ceil_mode, self.return_indices) 347 | return x 348 | 349 | 350 | ################################################################################ 351 | # Helper functions for loading model params 352 | ################################################################################ 353 | 354 | # BlockDecoder: A Class for encoding and decoding BlockArgs 355 | # efficientnet_params: A function to query compound coefficient 356 | # get_model_params and efficientnet: 357 | # Functions to get BlockArgs and GlobalParams for efficientnet 358 | # url_map and url_map_advprop: Dicts of url_map for pretrained weights 359 | # load_pretrained_weights: A function to load pretrained weights 360 | 361 | class BlockDecoder(object): 362 | """Block Decoder for readability, 363 | straight from the official TensorFlow repository. 364 | """ 365 | 366 | @staticmethod 367 | def _decode_block_string(block_string): 368 | """Get a block through a string notation of arguments. 369 | 370 | Args: 371 | block_string (str): A string notation of arguments. 372 | Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'. 373 | 374 | Returns: 375 | BlockArgs: The namedtuple defined at the top of this file. 376 | """ 377 | assert isinstance(block_string, str) 378 | 379 | ops = block_string.split('_') 380 | options = {} 381 | for op in ops: 382 | splits = re.split(r'(\d.*)', op) 383 | if len(splits) >= 2: 384 | key, value = splits[:2] 385 | options[key] = value 386 | 387 | # Check stride 388 | assert (('s' in options and len(options['s']) == 1) or 389 | (len(options['s']) == 2 and options['s'][0] == options['s'][1])) 390 | 391 | return BlockArgs( 392 | num_repeat=int(options['r']), 393 | kernel_size=int(options['k']), 394 | stride=[int(options['s'][0])], 395 | expand_ratio=int(options['e']), 396 | input_filters=int(options['i']), 397 | output_filters=int(options['o']), 398 | se_ratio=float(options['se']) if 'se' in options else None, 399 | id_skip=('noskip' not in block_string)) 400 | 401 | @staticmethod 402 | def _encode_block_string(block): 403 | """Encode a block to a string. 404 | 405 | Args: 406 | block (namedtuple): A BlockArgs type argument. 407 | 408 | Returns: 409 | block_string: A String form of BlockArgs. 410 | """ 411 | args = [ 412 | 'r%d' % block.num_repeat, 413 | 'k%d' % block.kernel_size, 414 | 's%d%d' % (block.strides[0], block.strides[1]), 415 | 'e%s' % block.expand_ratio, 416 | 'i%d' % block.input_filters, 417 | 'o%d' % block.output_filters 418 | ] 419 | if 0 < block.se_ratio <= 1: 420 | args.append('se%s' % block.se_ratio) 421 | if block.id_skip is False: 422 | args.append('noskip') 423 | return '_'.join(args) 424 | 425 | @staticmethod 426 | def decode(string_list): 427 | """Decode a list of string notations to specify blocks inside the network. 428 | 429 | Args: 430 | string_list (list[str]): A list of strings, each string is a notation of block. 431 | 432 | Returns: 433 | blocks_args: A list of BlockArgs namedtuples of block args. 434 | """ 435 | assert isinstance(string_list, list) 436 | blocks_args = [] 437 | for block_string in string_list: 438 | blocks_args.append(BlockDecoder._decode_block_string(block_string)) 439 | return blocks_args 440 | 441 | @staticmethod 442 | def encode(blocks_args): 443 | """Encode a list of BlockArgs to a list of strings. 444 | 445 | Args: 446 | blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args. 447 | 448 | Returns: 449 | block_strings: A list of strings, each string is a notation of block. 450 | """ 451 | block_strings = [] 452 | for block in blocks_args: 453 | block_strings.append(BlockDecoder._encode_block_string(block)) 454 | return block_strings 455 | 456 | 457 | def efficientnet_params(model_name): 458 | """Map EfficientNet model name to parameter coefficients. 459 | 460 | Args: 461 | model_name (str): Model name to be queried. 462 | 463 | Returns: 464 | params_dict[model_name]: A (width,depth,res,dropout) tuple. 465 | """ 466 | params_dict = { 467 | # Coefficients: width,depth,res,dropout 468 | 'efficientnet-b0': (1.0, 1.0, 224, 0.2), 469 | 'efficientnet-b1': (1.0, 1.1, 240, 0.2), 470 | 'efficientnet-b2': (1.1, 1.2, 260, 0.3), 471 | 'efficientnet-b3': (1.2, 1.4, 300, 0.3), 472 | 'efficientnet-b4': (1.4, 1.8, 380, 0.4), 473 | 'efficientnet-b5': (1.6, 2.2, 456, 0.4), 474 | 'efficientnet-b6': (1.8, 2.6, 528, 0.5), 475 | 'efficientnet-b7': (2.0, 3.1, 600, 0.5), 476 | 'efficientnet-b8': (2.2, 3.6, 672, 0.5), 477 | 'efficientnet-l2': (4.3, 5.3, 800, 0.5), 478 | } 479 | return params_dict[model_name] 480 | 481 | 482 | def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None, 483 | dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000, include_top=True): 484 | """Create BlockArgs and GlobalParams for efficientnet model. 485 | 486 | Args: 487 | width_coefficient (float) 488 | depth_coefficient (float) 489 | image_size (int) 490 | dropout_rate (float) 491 | drop_connect_rate (float) 492 | num_classes (int) 493 | 494 | Meaning as the name suggests. 495 | 496 | Returns: 497 | blocks_args, global_params. 498 | """ 499 | 500 | # Blocks args for the whole model(efficientnet-b0 by default) 501 | # It will be modified in the construction of EfficientNet Class according to model 502 | blocks_args = [ 503 | 'r1_k3_s11_e1_i32_o16_se0.25', 504 | 'r2_k3_s22_e6_i16_o24_se0.25', 505 | 'r2_k5_s22_e6_i24_o40_se0.25', 506 | 'r3_k3_s22_e6_i40_o80_se0.25', 507 | 'r3_k5_s11_e6_i80_o112_se0.25', 508 | 'r4_k5_s22_e6_i112_o192_se0.25', 509 | 'r1_k3_s11_e6_i192_o320_se0.25', 510 | ] 511 | blocks_args = BlockDecoder.decode(blocks_args) 512 | 513 | global_params = GlobalParams( 514 | width_coefficient=width_coefficient, 515 | depth_coefficient=depth_coefficient, 516 | image_size=image_size, 517 | dropout_rate=dropout_rate, 518 | 519 | num_classes=num_classes, 520 | batch_norm_momentum=0.99, 521 | batch_norm_epsilon=1e-3, 522 | drop_connect_rate=drop_connect_rate, 523 | depth_divisor=8, 524 | min_depth=None, 525 | include_top=include_top, 526 | ) 527 | 528 | return blocks_args, global_params 529 | 530 | 531 | def get_model_params(model_name, override_params): 532 | """Get the block args and global params for a given model name. 533 | 534 | Args: 535 | model_name (str): Model's name. 536 | override_params (dict): A dict to modify global_params. 537 | 538 | Returns: 539 | blocks_args, global_params 540 | """ 541 | if model_name.startswith('efficientnet'): 542 | w, d, s, p = efficientnet_params(model_name) 543 | # note: all models have drop connect rate = 0.2 544 | blocks_args, global_params = efficientnet( 545 | width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s) 546 | else: 547 | raise NotImplementedError('model name is not pre-defined: {}'.format(model_name)) 548 | if override_params: 549 | # ValueError will be raised here if override_params has fields not included in global_params. 550 | global_params = global_params._replace(**override_params) 551 | return blocks_args, global_params 552 | 553 | 554 | # train with Standard methods 555 | # check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks) 556 | url_map = { 557 | 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth', 558 | 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth', 559 | 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth', 560 | 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth', 561 | 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth', 562 | 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth', 563 | 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth', 564 | 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth', 565 | } 566 | 567 | # train with Adversarial Examples(AdvProp) 568 | # check more details in paper(Adversarial Examples Improve Image Recognition) 569 | url_map_advprop = { 570 | 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth', 571 | 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth', 572 | 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth', 573 | 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth', 574 | 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth', 575 | 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth', 576 | 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth', 577 | 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth', 578 | 'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth', 579 | } 580 | 581 | # TODO: add the petrained weights url map of 'efficientnet-l2' 582 | 583 | 584 | def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True): 585 | """Loads pretrained weights from weights path or download using url. 586 | 587 | Args: 588 | model (Module): The whole model of efficientnet. 589 | model_name (str): Model name of efficientnet. 590 | weights_path (None or str): 591 | str: path to pretrained weights file on the local disk. 592 | None: use pretrained weights downloaded from the Internet. 593 | load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model. 594 | advprop (bool): Whether to load pretrained weights 595 | trained with advprop (valid when weights_path is None). 596 | """ 597 | if isinstance(weights_path, str): 598 | state_dict = torch.load(weights_path) 599 | else: 600 | # AutoAugment or Advprop (different preprocessing) 601 | url_map_ = url_map_advprop if advprop else url_map 602 | state_dict = model_zoo.load_url(url_map_[model_name]) 603 | 604 | if load_fc: 605 | ret = model.load_state_dict(state_dict, strict=False) 606 | assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys) 607 | else: 608 | state_dict.pop('_fc.weight') 609 | state_dict.pop('_fc.bias') 610 | ret = model.load_state_dict(state_dict, strict=False) 611 | assert set(ret.missing_keys) == set( 612 | ['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys) 613 | assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys) 614 | 615 | if verbose: 616 | print('Loaded pretrained weights for {}'.format(model_name)) 617 | -------------------------------------------------------------------------------- /examples/imagenet/README.md: -------------------------------------------------------------------------------- 1 | ### Imagenet 2 | 3 | This is a preliminary directory for evaluating the model on ImageNet. It is adapted from the standard PyTorch Imagenet script. 4 | 5 | For now, only evaluation is supported, but I am currently building scripts to assist with training new models on Imagenet. 6 | 7 | The evaluation results are slightly different from the original TensorFlow repository, due to differences in data preprocessing. For example, with the current preprocessing, `efficientnet-b3` gives a top-1 accuracy of `80.8`, rather than `81.1` in the paper. I am working on porting the TensorFlow preprocessing into PyTorch to address this issue. 8 | 9 | To run on Imagenet, place your `train` and `val` directories in `data`. 10 | 11 | Example commands: 12 | ```bash 13 | # Evaluate small EfficientNet on CPU 14 | python main.py data -e -a 'efficientnet-b0' --pretrained 15 | ``` 16 | ```bash 17 | # Evaluate medium EfficientNet on GPU 18 | python main.py data -e -a 'efficientnet-b3' --pretrained --gpu 0 --batch-size 128 19 | ``` 20 | ```bash 21 | # Evaluate ResNet-50 for comparison 22 | python main.py data -e -a 'resnet50' --pretrained --gpu 0 23 | ``` 24 | -------------------------------------------------------------------------------- /examples/imagenet/data/README.md: -------------------------------------------------------------------------------- 1 | ### ImageNet 2 | 3 | Download ImageNet and place it into `train` and `val` folders here. 4 | 5 | More details may be found with the official PyTorch ImageNet example [here](https://github.com/pytorch/examples/blob/master/imagenet). 6 | -------------------------------------------------------------------------------- /examples/imagenet/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluate on ImageNet. Note that at the moment, training is not implemented (I am working on it). 3 | that being said, evaluation is working. 4 | """ 5 | 6 | import argparse 7 | import os 8 | import random 9 | import shutil 10 | import time 11 | import warnings 12 | import PIL 13 | 14 | import torch 15 | import torch.nn as nn 16 | import torch.nn.parallel 17 | import torch.backends.cudnn as cudnn 18 | import torch.distributed as dist 19 | import torch.optim 20 | import torch.multiprocessing as mp 21 | import torch.utils.data 22 | import torch.utils.data.distributed 23 | import torchvision.transforms as transforms 24 | import torchvision.datasets as datasets 25 | import torchvision.models as models 26 | 27 | from efficientnet_pytorch import EfficientNet 28 | 29 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 30 | parser.add_argument('data', metavar='DIR', 31 | help='path to dataset') 32 | parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', 33 | help='model architecture (default: resnet18)') 34 | parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', 35 | help='number of data loading workers (default: 4)') 36 | parser.add_argument('--epochs', default=90, type=int, metavar='N', 37 | help='number of total epochs to run') 38 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 39 | help='manual epoch number (useful on restarts)') 40 | parser.add_argument('-b', '--batch-size', default=256, type=int, 41 | metavar='N', 42 | help='mini-batch size (default: 256), this is the total ' 43 | 'batch size of all GPUs on the current node when ' 44 | 'using Data Parallel or Distributed Data Parallel') 45 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, 46 | metavar='LR', help='initial learning rate', dest='lr') 47 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 48 | help='momentum') 49 | parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, 50 | metavar='W', help='weight decay (default: 1e-4)', 51 | dest='weight_decay') 52 | parser.add_argument('-p', '--print-freq', default=10, type=int, 53 | metavar='N', help='print frequency (default: 10)') 54 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 55 | help='path to latest checkpoint (default: none)') 56 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', 57 | help='evaluate model on validation set') 58 | parser.add_argument('--pretrained', dest='pretrained', action='store_true', 59 | help='use pre-trained model') 60 | parser.add_argument('--world-size', default=-1, type=int, 61 | help='number of nodes for distributed training') 62 | parser.add_argument('--rank', default=-1, type=int, 63 | help='node rank for distributed training') 64 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, 65 | help='url used to set up distributed training') 66 | parser.add_argument('--dist-backend', default='nccl', type=str, 67 | help='distributed backend') 68 | parser.add_argument('--seed', default=None, type=int, 69 | help='seed for initializing training. ') 70 | parser.add_argument('--gpu', default=None, type=int, 71 | help='GPU id to use.') 72 | parser.add_argument('--image_size', default=224, type=int, 73 | help='image size') 74 | parser.add_argument('--advprop', default=False, action='store_true', 75 | help='use advprop or not') 76 | parser.add_argument('--multiprocessing-distributed', action='store_true', 77 | help='Use multi-processing distributed training to launch ' 78 | 'N processes per node, which has N GPUs. This is the ' 79 | 'fastest way to use PyTorch for either single node or ' 80 | 'multi node data parallel training') 81 | 82 | best_acc1 = 0 83 | 84 | 85 | def main(): 86 | args = parser.parse_args() 87 | 88 | if args.seed is not None: 89 | random.seed(args.seed) 90 | torch.manual_seed(args.seed) 91 | cudnn.deterministic = True 92 | warnings.warn('You have chosen to seed training. ' 93 | 'This will turn on the CUDNN deterministic setting, ' 94 | 'which can slow down your training considerably! ' 95 | 'You may see unexpected behavior when restarting ' 96 | 'from checkpoints.') 97 | 98 | if args.gpu is not None: 99 | warnings.warn('You have chosen a specific GPU. This will completely ' 100 | 'disable data parallelism.') 101 | 102 | if args.dist_url == "env://" and args.world_size == -1: 103 | args.world_size = int(os.environ["WORLD_SIZE"]) 104 | 105 | args.distributed = args.world_size > 1 or args.multiprocessing_distributed 106 | 107 | ngpus_per_node = torch.cuda.device_count() 108 | if args.multiprocessing_distributed: 109 | # Since we have ngpus_per_node processes per node, the total world_size 110 | # needs to be adjusted accordingly 111 | args.world_size = ngpus_per_node * args.world_size 112 | # Use torch.multiprocessing.spawn to launch distributed processes: the 113 | # main_worker process function 114 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) 115 | else: 116 | # Simply call main_worker function 117 | main_worker(args.gpu, ngpus_per_node, args) 118 | 119 | 120 | def main_worker(gpu, ngpus_per_node, args): 121 | global best_acc1 122 | args.gpu = gpu 123 | 124 | if args.gpu is not None: 125 | print("Use GPU: {} for training".format(args.gpu)) 126 | 127 | if args.distributed: 128 | if args.dist_url == "env://" and args.rank == -1: 129 | args.rank = int(os.environ["RANK"]) 130 | if args.multiprocessing_distributed: 131 | # For multiprocessing distributed training, rank needs to be the 132 | # global rank among all the processes 133 | args.rank = args.rank * ngpus_per_node + gpu 134 | dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 135 | world_size=args.world_size, rank=args.rank) 136 | # create model 137 | if 'efficientnet' in args.arch: # NEW 138 | if args.pretrained: 139 | model = EfficientNet.from_pretrained(args.arch, advprop=args.advprop) 140 | print("=> using pre-trained model '{}'".format(args.arch)) 141 | else: 142 | print("=> creating model '{}'".format(args.arch)) 143 | model = EfficientNet.from_name(args.arch) 144 | 145 | else: 146 | if args.pretrained: 147 | print("=> using pre-trained model '{}'".format(args.arch)) 148 | model = models.__dict__[args.arch](pretrained=True) 149 | else: 150 | print("=> creating model '{}'".format(args.arch)) 151 | model = models.__dict__[args.arch]() 152 | 153 | if args.distributed: 154 | # For multiprocessing distributed, DistributedDataParallel constructor 155 | # should always set the single device scope, otherwise, 156 | # DistributedDataParallel will use all available devices. 157 | if args.gpu is not None: 158 | torch.cuda.set_device(args.gpu) 159 | model.cuda(args.gpu) 160 | # When using a single GPU per process and per 161 | # DistributedDataParallel, we need to divide the batch size 162 | # ourselves based on the total number of GPUs we have 163 | args.batch_size = int(args.batch_size / ngpus_per_node) 164 | args.workers = int(args.workers / ngpus_per_node) 165 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) 166 | else: 167 | model.cuda() 168 | # DistributedDataParallel will divide and allocate batch_size to all 169 | # available GPUs if device_ids are not set 170 | model = torch.nn.parallel.DistributedDataParallel(model) 171 | elif args.gpu is not None: 172 | torch.cuda.set_device(args.gpu) 173 | model = model.cuda(args.gpu) 174 | else: 175 | # DataParallel will divide and allocate batch_size to all available GPUs 176 | if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): 177 | model.features = torch.nn.DataParallel(model.features) 178 | model.cuda() 179 | else: 180 | model = torch.nn.DataParallel(model).cuda() 181 | 182 | # define loss function (criterion) and optimizer 183 | criterion = nn.CrossEntropyLoss().cuda(args.gpu) 184 | 185 | optimizer = torch.optim.SGD(model.parameters(), args.lr, 186 | momentum=args.momentum, 187 | weight_decay=args.weight_decay) 188 | 189 | # optionally resume from a checkpoint 190 | if args.resume: 191 | if os.path.isfile(args.resume): 192 | print("=> loading checkpoint '{}'".format(args.resume)) 193 | checkpoint = torch.load(args.resume) 194 | args.start_epoch = checkpoint['epoch'] 195 | best_acc1 = checkpoint['best_acc1'] 196 | if args.gpu is not None: 197 | # best_acc1 may be from a checkpoint from a different GPU 198 | best_acc1 = best_acc1.to(args.gpu) 199 | model.load_state_dict(checkpoint['state_dict']) 200 | optimizer.load_state_dict(checkpoint['optimizer']) 201 | print("=> loaded checkpoint '{}' (epoch {})" 202 | .format(args.resume, checkpoint['epoch'])) 203 | else: 204 | print("=> no checkpoint found at '{}'".format(args.resume)) 205 | 206 | cudnn.benchmark = True 207 | 208 | # Data loading code 209 | traindir = os.path.join(args.data, 'train') 210 | valdir = os.path.join(args.data, 'val') 211 | if args.advprop: 212 | normalize = transforms.Lambda(lambda img: img * 2.0 - 1.0) 213 | else: 214 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 215 | std=[0.229, 0.224, 0.225]) 216 | 217 | if 'efficientnet' in args.arch: 218 | image_size = EfficientNet.get_image_size(args.arch) 219 | else: 220 | image_size = args.image_size 221 | 222 | train_dataset = datasets.ImageFolder( 223 | traindir, 224 | transforms.Compose([ 225 | transforms.RandomResizedCrop(image_size), 226 | transforms.RandomHorizontalFlip(), 227 | transforms.ToTensor(), 228 | normalize, 229 | ])) 230 | 231 | if args.distributed: 232 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) 233 | else: 234 | train_sampler = None 235 | 236 | train_loader = torch.utils.data.DataLoader( 237 | train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), 238 | num_workers=args.workers, pin_memory=True, sampler=train_sampler) 239 | 240 | val_transforms = transforms.Compose([ 241 | transforms.Resize(image_size, interpolation=PIL.Image.BICUBIC), 242 | transforms.CenterCrop(image_size), 243 | transforms.ToTensor(), 244 | normalize, 245 | ]) 246 | print('Using image size', image_size) 247 | 248 | val_loader = torch.utils.data.DataLoader( 249 | datasets.ImageFolder(valdir, val_transforms), 250 | batch_size=args.batch_size, shuffle=False, 251 | num_workers=args.workers, pin_memory=True) 252 | 253 | if args.evaluate: 254 | res = validate(val_loader, model, criterion, args) 255 | with open('res.txt', 'w') as f: 256 | print(res, file=f) 257 | return 258 | 259 | for epoch in range(args.start_epoch, args.epochs): 260 | if args.distributed: 261 | train_sampler.set_epoch(epoch) 262 | adjust_learning_rate(optimizer, epoch, args) 263 | 264 | # train for one epoch 265 | train(train_loader, model, criterion, optimizer, epoch, args) 266 | 267 | # evaluate on validation set 268 | acc1 = validate(val_loader, model, criterion, args) 269 | 270 | # remember best acc@1 and save checkpoint 271 | is_best = acc1 > best_acc1 272 | best_acc1 = max(acc1, best_acc1) 273 | 274 | if not args.multiprocessing_distributed or (args.multiprocessing_distributed 275 | and args.rank % ngpus_per_node == 0): 276 | save_checkpoint({ 277 | 'epoch': epoch + 1, 278 | 'arch': args.arch, 279 | 'state_dict': model.state_dict(), 280 | 'best_acc1': best_acc1, 281 | 'optimizer' : optimizer.state_dict(), 282 | }, is_best) 283 | 284 | 285 | def train(train_loader, model, criterion, optimizer, epoch, args): 286 | batch_time = AverageMeter('Time', ':6.3f') 287 | data_time = AverageMeter('Data', ':6.3f') 288 | losses = AverageMeter('Loss', ':.4e') 289 | top1 = AverageMeter('Acc@1', ':6.2f') 290 | top5 = AverageMeter('Acc@5', ':6.2f') 291 | progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, 292 | top5, prefix="Epoch: [{}]".format(epoch)) 293 | 294 | # switch to train mode 295 | model.train() 296 | 297 | end = time.time() 298 | for i, (images, target) in enumerate(train_loader): 299 | # measure data loading time 300 | data_time.update(time.time() - end) 301 | 302 | if args.gpu is not None: 303 | images = images.cuda(args.gpu, non_blocking=True) 304 | target = target.cuda(args.gpu, non_blocking=True) 305 | 306 | # compute output 307 | output = model(images) 308 | loss = criterion(output, target) 309 | 310 | # measure accuracy and record loss 311 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 312 | losses.update(loss.item(), images.size(0)) 313 | top1.update(acc1[0], images.size(0)) 314 | top5.update(acc5[0], images.size(0)) 315 | 316 | # compute gradient and do SGD step 317 | optimizer.zero_grad() 318 | loss.backward() 319 | optimizer.step() 320 | 321 | # measure elapsed time 322 | batch_time.update(time.time() - end) 323 | end = time.time() 324 | 325 | if i % args.print_freq == 0: 326 | progress.print(i) 327 | 328 | 329 | def validate(val_loader, model, criterion, args): 330 | batch_time = AverageMeter('Time', ':6.3f') 331 | losses = AverageMeter('Loss', ':.4e') 332 | top1 = AverageMeter('Acc@1', ':6.2f') 333 | top5 = AverageMeter('Acc@5', ':6.2f') 334 | progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, 335 | prefix='Test: ') 336 | 337 | # switch to evaluate mode 338 | model.eval() 339 | 340 | with torch.no_grad(): 341 | end = time.time() 342 | for i, (images, target) in enumerate(val_loader): 343 | if args.gpu is not None: 344 | images = images.cuda(args.gpu, non_blocking=True) 345 | target = target.cuda(args.gpu, non_blocking=True) 346 | 347 | # compute output 348 | output = model(images) 349 | loss = criterion(output, target) 350 | 351 | # measure accuracy and record loss 352 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 353 | losses.update(loss.item(), images.size(0)) 354 | top1.update(acc1[0], images.size(0)) 355 | top5.update(acc5[0], images.size(0)) 356 | 357 | # measure elapsed time 358 | batch_time.update(time.time() - end) 359 | end = time.time() 360 | 361 | if i % args.print_freq == 0: 362 | progress.print(i) 363 | 364 | # TODO: this should also be done with the ProgressMeter 365 | print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' 366 | .format(top1=top1, top5=top5)) 367 | 368 | return top1.avg 369 | 370 | 371 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): 372 | torch.save(state, filename) 373 | if is_best: 374 | shutil.copyfile(filename, 'model_best.pth.tar') 375 | 376 | 377 | class AverageMeter(object): 378 | """Computes and stores the average and current value""" 379 | def __init__(self, name, fmt=':f'): 380 | self.name = name 381 | self.fmt = fmt 382 | self.reset() 383 | 384 | def reset(self): 385 | self.val = 0 386 | self.avg = 0 387 | self.sum = 0 388 | self.count = 0 389 | 390 | def update(self, val, n=1): 391 | self.val = val 392 | self.sum += val * n 393 | self.count += n 394 | self.avg = self.sum / self.count 395 | 396 | def __str__(self): 397 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 398 | return fmtstr.format(**self.__dict__) 399 | 400 | 401 | class ProgressMeter(object): 402 | def __init__(self, num_batches, *meters, prefix=""): 403 | self.batch_fmtstr = self._get_batch_fmtstr(num_batches) 404 | self.meters = meters 405 | self.prefix = prefix 406 | 407 | def print(self, batch): 408 | entries = [self.prefix + self.batch_fmtstr.format(batch)] 409 | entries += [str(meter) for meter in self.meters] 410 | print('\t'.join(entries)) 411 | 412 | def _get_batch_fmtstr(self, num_batches): 413 | num_digits = len(str(num_batches // 1)) 414 | fmt = '{:' + str(num_digits) + 'd}' 415 | return '[' + fmt + '/' + fmt.format(num_batches) + ']' 416 | 417 | 418 | def adjust_learning_rate(optimizer, epoch, args): 419 | """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" 420 | lr = args.lr * (0.1 ** (epoch // 30)) 421 | for param_group in optimizer.param_groups: 422 | param_group['lr'] = lr 423 | 424 | 425 | def accuracy(output, target, topk=(1,)): 426 | """Computes the accuracy over the k top predictions for the specified values of k""" 427 | with torch.no_grad(): 428 | maxk = max(topk) 429 | batch_size = target.size(0) 430 | 431 | _, pred = output.topk(maxk, 1, True, True) 432 | pred = pred.t() 433 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 434 | 435 | res = [] 436 | for k in topk: 437 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 438 | res.append(correct_k.mul_(100.0 / batch_size)) 439 | return res 440 | 441 | 442 | if __name__ == '__main__': 443 | main() 444 | -------------------------------------------------------------------------------- /examples/simple/img.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukemelas/EfficientNet-PyTorch/7e8b0d312162f335785fb5dcfa1df29a75a1783a/examples/simple/img.jpg -------------------------------------------------------------------------------- /examples/simple/img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukemelas/EfficientNet-PyTorch/7e8b0d312162f335785fb5dcfa1df29a75a1783a/examples/simple/img2.jpg -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | from efficientnet_pytorch import EfficientNet as _EfficientNet 2 | 3 | dependencies = ['torch'] 4 | 5 | 6 | def _create_model_fn(model_name): 7 | def _model_fn(num_classes=1000, in_channels=3, pretrained='imagenet'): 8 | """Create Efficient Net. 9 | 10 | Described in detail here: https://arxiv.org/abs/1905.11946 11 | 12 | Args: 13 | num_classes (int, optional): Number of classes, default is 1000. 14 | in_channels (int, optional): Number of input channels, default 15 | is 3. 16 | pretrained (str, optional): One of [None, 'imagenet', 'advprop'] 17 | If None, no pretrained model is loaded. 18 | If 'imagenet', models trained on imagenet dataset are loaded. 19 | If 'advprop', models trained using adversarial training called 20 | advprop are loaded. It is important to note that the 21 | preprocessing required for the advprop pretrained models is 22 | slightly different from normal ImageNet preprocessing 23 | """ 24 | model_name_ = model_name.replace('_', '-') 25 | if pretrained is not None: 26 | model = _EfficientNet.from_pretrained( 27 | model_name=model_name_, 28 | advprop=(pretrained == 'advprop'), 29 | num_classes=num_classes, 30 | in_channels=in_channels) 31 | else: 32 | model = _EfficientNet.from_name( 33 | model_name=model_name_, 34 | override_params={'num_classes': num_classes}, 35 | ) 36 | model._change_in_channels(in_channels) 37 | 38 | return model 39 | 40 | return _model_fn 41 | 42 | for model_name in ['efficientnet_b' + str(i) for i in range(9)]: 43 | locals()[model_name] = _create_model_fn(model_name) 44 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pipenv install twine --dev 6 | 7 | import io 8 | import os 9 | import sys 10 | from shutil import rmtree 11 | 12 | from setuptools import find_packages, setup, Command 13 | 14 | # Package meta-data. 15 | NAME = 'efficientnet_pytorch' 16 | DESCRIPTION = 'EfficientNet implemented in PyTorch.' 17 | URL = 'https://github.com/lukemelas/EfficientNet-PyTorch' 18 | EMAIL = 'lmelaskyriazi@college.harvard.edu' 19 | AUTHOR = 'Luke' 20 | REQUIRES_PYTHON = '>=3.5.0' 21 | VERSION = '0.7.1' 22 | 23 | # What packages are required for this module to be executed? 24 | REQUIRED = [ 25 | 'torch' 26 | ] 27 | 28 | # What packages are optional? 29 | EXTRAS = { 30 | # 'fancy feature': ['django'], 31 | } 32 | 33 | # The rest you shouldn't have to touch too much :) 34 | # ------------------------------------------------ 35 | # Except, perhaps the License and Trove Classifiers! 36 | # If you do change the License, remember to change the Trove Classifier for that! 37 | 38 | here = os.path.abspath(os.path.dirname(__file__)) 39 | 40 | # Import the README and use it as the long-description. 41 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 42 | try: 43 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 44 | long_description = '\n' + f.read() 45 | except FileNotFoundError: 46 | long_description = DESCRIPTION 47 | 48 | # Load the package's __version__.py module as a dictionary. 49 | about = {} 50 | if not VERSION: 51 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_") 52 | with open(os.path.join(here, project_slug, '__version__.py')) as f: 53 | exec(f.read(), about) 54 | else: 55 | about['__version__'] = VERSION 56 | 57 | 58 | class UploadCommand(Command): 59 | """Support setup.py upload.""" 60 | 61 | description = 'Build and publish the package.' 62 | user_options = [] 63 | 64 | @staticmethod 65 | def status(s): 66 | """Prints things in bold.""" 67 | print('\033[1m{0}\033[0m'.format(s)) 68 | 69 | def initialize_options(self): 70 | pass 71 | 72 | def finalize_options(self): 73 | pass 74 | 75 | def run(self): 76 | try: 77 | self.status('Removing previous builds…') 78 | rmtree(os.path.join(here, 'dist')) 79 | except OSError: 80 | pass 81 | 82 | self.status('Building Source and Wheel (universal) distribution…') 83 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 84 | 85 | self.status('Uploading the package to PyPI via Twine…') 86 | os.system('twine upload dist/*') 87 | 88 | self.status('Pushing git tags…') 89 | os.system('git tag v{0}'.format(about['__version__'])) 90 | os.system('git push --tags') 91 | 92 | sys.exit() 93 | 94 | 95 | # Where the magic happens: 96 | setup( 97 | name=NAME, 98 | version=about['__version__'], 99 | description=DESCRIPTION, 100 | long_description=long_description, 101 | long_description_content_type='text/markdown', 102 | author=AUTHOR, 103 | author_email=EMAIL, 104 | python_requires=REQUIRES_PYTHON, 105 | url=URL, 106 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), 107 | # py_modules=['model'], # If your package is a single module, use this instead of 'packages' 108 | install_requires=REQUIRED, 109 | extras_require=EXTRAS, 110 | include_package_data=True, 111 | license='Apache', 112 | classifiers=[ 113 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 114 | 'License :: OSI Approved :: Apache Software License', 115 | 'Programming Language :: Python', 116 | 'Programming Language :: Python :: 3', 117 | 'Programming Language :: Python :: 3.6', 118 | ], 119 | # $ setup.py publish support. 120 | cmdclass={ 121 | 'upload': UploadCommand, 122 | }, 123 | ) 124 | -------------------------------------------------------------------------------- /sotabench.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import PIL 4 | import torch 5 | from torch.utils.data import DataLoader 6 | import torchvision.transforms as transforms 7 | from torchvision.datasets import ImageNet 8 | 9 | from efficientnet_pytorch import EfficientNet 10 | 11 | from sotabencheval.image_classification import ImageNetEvaluator 12 | from sotabencheval.utils import is_server 13 | 14 | if is_server(): 15 | DATA_ROOT = DATA_ROOT = os.environ.get('IMAGENET_DIR', './imagenet') # './.data/vision/imagenet' 16 | else: # local settings 17 | DATA_ROOT = os.environ['IMAGENET_DIR'] 18 | assert bool(DATA_ROOT), 'please set IMAGENET_DIR environment variable' 19 | print('Local data root: ', DATA_ROOT) 20 | 21 | model_name = 'EfficientNet-B5' 22 | model = EfficientNet.from_pretrained(model_name.lower()) 23 | image_size = EfficientNet.get_image_size(model_name.lower()) 24 | 25 | input_transform = transforms.Compose([ 26 | transforms.Resize(image_size, PIL.Image.BICUBIC), 27 | transforms.CenterCrop(image_size), 28 | transforms.ToTensor(), 29 | transforms.Normalize( 30 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 31 | ]) 32 | 33 | test_dataset = ImageNet( 34 | DATA_ROOT, 35 | split="val", 36 | transform=input_transform, 37 | target_transform=None, 38 | ) 39 | 40 | test_loader = DataLoader( 41 | test_dataset, 42 | batch_size=128, 43 | shuffle=False, 44 | num_workers=4, 45 | pin_memory=True, 46 | ) 47 | 48 | model = model.cuda() 49 | model.eval() 50 | 51 | evaluator = ImageNetEvaluator(model_name=model_name, 52 | paper_arxiv_id='1905.11946') 53 | 54 | def get_img_id(image_name): 55 | return image_name.split('/')[-1].replace('.JPEG', '') 56 | 57 | with torch.no_grad(): 58 | for i, (input, target) in enumerate(test_loader): 59 | input = input.to(device='cuda', non_blocking=True) 60 | target = target.to(device='cuda', non_blocking=True) 61 | output = model(input) 62 | image_ids = [get_img_id(img[0]) for img in test_loader.dataset.imgs[i*test_loader.batch_size:(i+1)*test_loader.batch_size]] 63 | evaluator.add(dict(zip(image_ids, list(output.cpu().numpy())))) 64 | if evaluator.cache_exists: 65 | break 66 | 67 | if not is_server(): 68 | print("Results:") 69 | print(evaluator.get_results()) 70 | 71 | evaluator.save() 72 | -------------------------------------------------------------------------------- /sotabench_setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash -x 2 | source /workspace/venv/bin/activate 3 | PYTHON=${PYTHON:-"python"} 4 | $PYTHON -m pip install torch 5 | $PYTHON -m pip install torchvision 6 | $PYTHON -m pip install scipy 7 | -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import pytest 4 | import torch 5 | import torch.nn as nn 6 | 7 | from efficientnet_pytorch import EfficientNet 8 | 9 | 10 | # -- fixtures ------------------------------------------------------------------------------------- 11 | 12 | @pytest.fixture(scope='module', params=[x for x in range(4)]) 13 | def model(request): 14 | return 'efficientnet-b{}'.format(request.param) 15 | 16 | 17 | @pytest.fixture(scope='module', params=[True, False]) 18 | def pretrained(request): 19 | return request.param 20 | 21 | 22 | @pytest.fixture(scope='function') 23 | def net(model, pretrained): 24 | return EfficientNet.from_pretrained(model) if pretrained else EfficientNet.from_name(model) 25 | 26 | 27 | # -- tests ---------------------------------------------------------------------------------------- 28 | 29 | @pytest.mark.parametrize('img_size', [224, 256, 512]) 30 | def test_forward(net, img_size): 31 | """Test `.forward()` doesn't throw an error""" 32 | data = torch.zeros((1, 3, img_size, img_size)) 33 | output = net(data) 34 | assert not torch.isnan(output).any() 35 | 36 | 37 | def test_dropout_training(net): 38 | """Test dropout `.training` is set by `.train()` on parent `nn.module`""" 39 | net.train() 40 | assert net._dropout.training == True 41 | 42 | 43 | def test_dropout_eval(net): 44 | """Test dropout `.training` is set by `.eval()` on parent `nn.module`""" 45 | net.eval() 46 | assert net._dropout.training == False 47 | 48 | 49 | def test_dropout_update(net): 50 | """Test dropout `.training` is updated by `.train()` and `.eval()` on parent `nn.module`""" 51 | net.train() 52 | assert net._dropout.training == True 53 | net.eval() 54 | assert net._dropout.training == False 55 | net.train() 56 | assert net._dropout.training == True 57 | net.eval() 58 | assert net._dropout.training == False 59 | 60 | 61 | @pytest.mark.parametrize('img_size', [224, 256, 512]) 62 | def test_modify_dropout(net, img_size): 63 | """Test ability to modify dropout and fc modules of network""" 64 | dropout = nn.Sequential(OrderedDict([ 65 | ('_bn2', nn.BatchNorm1d(net._bn1.num_features)), 66 | ('_drop1', nn.Dropout(p=net._global_params.dropout_rate)), 67 | ('_linear1', nn.Linear(net._bn1.num_features, 512)), 68 | ('_relu', nn.ReLU()), 69 | ('_bn3', nn.BatchNorm1d(512)), 70 | ('_drop2', nn.Dropout(p=net._global_params.dropout_rate / 2)) 71 | ])) 72 | fc = nn.Linear(512, net._global_params.num_classes) 73 | 74 | net._dropout = dropout 75 | net._fc = fc 76 | 77 | data = torch.zeros((2, 3, img_size, img_size)) 78 | output = net(data) 79 | assert not torch.isnan(output).any() 80 | 81 | 82 | @pytest.mark.parametrize('img_size', [224, 256, 512]) 83 | def test_modify_pool(net, img_size): 84 | """Test ability to modify pooling module of network""" 85 | 86 | class AdaptiveMaxAvgPool(nn.Module): 87 | 88 | def __init__(self): 89 | super().__init__() 90 | self.ada_avgpool = nn.AdaptiveAvgPool2d(1) 91 | self.ada_maxpool = nn.AdaptiveMaxPool2d(1) 92 | 93 | def forward(self, x): 94 | avg_x = self.ada_avgpool(x) 95 | max_x = self.ada_maxpool(x) 96 | x = torch.cat((avg_x, max_x), dim=1) 97 | return x 98 | 99 | avg_pooling = AdaptiveMaxAvgPool() 100 | fc = nn.Linear(net._fc.in_features * 2, net._global_params.num_classes) 101 | 102 | net._avg_pooling = avg_pooling 103 | net._fc = fc 104 | 105 | data = torch.zeros((2, 3, img_size, img_size)) 106 | output = net(data) 107 | assert not torch.isnan(output).any() 108 | 109 | 110 | @pytest.mark.parametrize('img_size', [224, 256, 512]) 111 | def test_extract_endpoints(net, img_size): 112 | """Test `.extract_endpoints()` doesn't throw an error""" 113 | data = torch.zeros((1, 3, img_size, img_size)) 114 | endpoints = net.extract_endpoints(data) 115 | assert not torch.isnan(endpoints['reduction_1']).any() 116 | assert not torch.isnan(endpoints['reduction_2']).any() 117 | assert not torch.isnan(endpoints['reduction_3']).any() 118 | assert not torch.isnan(endpoints['reduction_4']).any() 119 | assert not torch.isnan(endpoints['reduction_5']).any() 120 | assert endpoints['reduction_1'].size(2) == img_size // 2 121 | assert endpoints['reduction_2'].size(2) == img_size // 4 122 | assert endpoints['reduction_3'].size(2) == img_size // 8 123 | assert endpoints['reduction_4'].size(2) == img_size // 16 124 | assert endpoints['reduction_5'].size(2) == img_size // 32 125 | -------------------------------------------------------------------------------- /tf_to_pytorch/README.md: -------------------------------------------------------------------------------- 1 | ### TensorFlow to PyTorch Conversion 2 | 3 | This directory is used to convert TensorFlow weights to PyTorch. It was hacked together fairly quickly, so the code is not the most beautiful (just a warning!), but it does the job. I will be refactoring it soon. 4 | 5 | I should also emphasize that you do *not* need to run any of this code to load pretrained weights. Simply use `EfficientNet.from_pretrained(...)`. 6 | 7 | That being said, the main script here is `convert_to_tf/load_tf_weights.py`. In order to use it, you should first download the pretrained TensorFlow weights: 8 | ```bash 9 | cd pretrained_tensorflow 10 | ./download.sh efficientnet-b0 11 | cd .. 12 | ``` 13 | Then 14 | ```bash 15 | mkdir -p pretrained_pytorch 16 | cd convert_tf_to_pt 17 | python load_tf_weights.py \ 18 | --model_name efficientnet-b0 \ 19 | --tf_checkpoint ../pretrained_tensorflow/efficientnet-b0/ \ 20 | --output_file ../pretrained_pytorch/efficientnet-b0.pth 21 | ``` 22 | 23 | 26 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | mkdir original_tf 4 | cd original_tf 5 | touch __init__.py 6 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/efficientnet_builder.py 7 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/efficientnet_model.py 8 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/eval_ckpt_main.py 9 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/utils.py 10 | wget https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/preprocessing.py 11 | cd .. 12 | mkdir -p tmp -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/load_tf_weights.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import torch 4 | 5 | tf.compat.v1.disable_v2_behavior() 6 | 7 | def load_param(checkpoint_file, conversion_table, model_name): 8 | """ 9 | Load parameters according to conversion_table. 10 | 11 | Args: 12 | checkpoint_file (string): pretrained checkpoint model file in tensorflow 13 | conversion_table (dict): { pytorch tensor in a model : checkpoint variable name } 14 | """ 15 | for pyt_param, tf_param_name in conversion_table.items(): 16 | tf_param_name = str(model_name) + '/' + tf_param_name 17 | tf_param = tf.train.load_variable(checkpoint_file, tf_param_name) 18 | if 'conv' in tf_param_name and 'kernel' in tf_param_name: 19 | tf_param = np.transpose(tf_param, (3, 2, 0, 1)) 20 | if 'depthwise' in tf_param_name: 21 | tf_param = np.transpose(tf_param, (1, 0, 2, 3)) 22 | elif tf_param_name.endswith('kernel'): # for weight(kernel), we should do transpose 23 | tf_param = np.transpose(tf_param) 24 | assert pyt_param.size() == tf_param.shape, \ 25 | 'Dim Mismatch: %s vs %s ; %s' % (tuple(pyt_param.size()), tf_param.shape, tf_param_name) 26 | pyt_param.data = torch.from_numpy(tf_param) 27 | 28 | 29 | def load_efficientnet(model, checkpoint_file, model_name): 30 | """ 31 | Load PyTorch EfficientNet from TensorFlow checkpoint file 32 | """ 33 | 34 | # This will store the enire conversion table 35 | conversion_table = {} 36 | merge = lambda dict1, dict2: {**dict1, **dict2} 37 | 38 | # All the weights not in the conv blocks 39 | conversion_table_for_weights_outside_blocks = { 40 | model._conv_stem.weight: 'stem/conv2d/kernel', # [3, 3, 3, 32]), 41 | model._bn0.bias: 'stem/tpu_batch_normalization/beta', # [32]), 42 | model._bn0.weight: 'stem/tpu_batch_normalization/gamma', # [32]), 43 | model._bn0.running_mean: 'stem/tpu_batch_normalization/moving_mean', # [32]), 44 | model._bn0.running_var: 'stem/tpu_batch_normalization/moving_variance', # [32]), 45 | model._conv_head.weight: 'head/conv2d/kernel', # [1, 1, 320, 1280]), 46 | model._bn1.bias: 'head/tpu_batch_normalization/beta', # [1280]), 47 | model._bn1.weight: 'head/tpu_batch_normalization/gamma', # [1280]), 48 | model._bn1.running_mean: 'head/tpu_batch_normalization/moving_mean', # [32]), 49 | model._bn1.running_var: 'head/tpu_batch_normalization/moving_variance', # [32]), 50 | model._fc.bias: 'head/dense/bias', # [1000]), 51 | model._fc.weight: 'head/dense/kernel', # [1280, 1000]), 52 | } 53 | conversion_table = merge(conversion_table, conversion_table_for_weights_outside_blocks) 54 | 55 | # The first conv block is special because it does not have _expand_conv 56 | conversion_table_for_first_block = { 57 | model._blocks[0]._project_conv.weight: 'blocks_0/conv2d/kernel', # 1, 1, 32, 16]), 58 | model._blocks[0]._depthwise_conv.weight: 'blocks_0/depthwise_conv2d/depthwise_kernel', # [3, 3, 32, 1]), 59 | model._blocks[0]._se_reduce.bias: 'blocks_0/se/conv2d/bias', # , [8]), 60 | model._blocks[0]._se_reduce.weight: 'blocks_0/se/conv2d/kernel', # , [1, 1, 32, 8]), 61 | model._blocks[0]._se_expand.bias: 'blocks_0/se/conv2d_1/bias', # , [32]), 62 | model._blocks[0]._se_expand.weight: 'blocks_0/se/conv2d_1/kernel', # , [1, 1, 8, 32]), 63 | model._blocks[0]._bn1.bias: 'blocks_0/tpu_batch_normalization/beta', # [32]), 64 | model._blocks[0]._bn1.weight: 'blocks_0/tpu_batch_normalization/gamma', # [32]), 65 | model._blocks[0]._bn1.running_mean: 'blocks_0/tpu_batch_normalization/moving_mean', 66 | model._blocks[0]._bn1.running_var: 'blocks_0/tpu_batch_normalization/moving_variance', 67 | model._blocks[0]._bn2.bias: 'blocks_0/tpu_batch_normalization_1/beta', # [16]), 68 | model._blocks[0]._bn2.weight: 'blocks_0/tpu_batch_normalization_1/gamma', # [16]), 69 | model._blocks[0]._bn2.running_mean: 'blocks_0/tpu_batch_normalization_1/moving_mean', 70 | model._blocks[0]._bn2.running_var: 'blocks_0/tpu_batch_normalization_1/moving_variance', 71 | } 72 | conversion_table = merge(conversion_table, conversion_table_for_first_block) 73 | 74 | # Conv blocks 75 | for i in range(len(model._blocks)): 76 | 77 | is_first_block = '_expand_conv.weight' not in [n for n, p in model._blocks[i].named_parameters()] 78 | 79 | if is_first_block: 80 | conversion_table_block = { 81 | model._blocks[i]._project_conv.weight: 'blocks_' + str(i) + '/conv2d/kernel', # 1, 1, 32, 16]), 82 | model._blocks[i]._depthwise_conv.weight: 'blocks_' + str(i) + '/depthwise_conv2d/depthwise_kernel', 83 | # [3, 3, 32, 1]), 84 | model._blocks[i]._se_reduce.bias: 'blocks_' + str(i) + '/se/conv2d/bias', # , [8]), 85 | model._blocks[i]._se_reduce.weight: 'blocks_' + str(i) + '/se/conv2d/kernel', # , [1, 1, 32, 8]), 86 | model._blocks[i]._se_expand.bias: 'blocks_' + str(i) + '/se/conv2d_1/bias', # , [32]), 87 | model._blocks[i]._se_expand.weight: 'blocks_' + str(i) + '/se/conv2d_1/kernel', # , [1, 1, 8, 32]), 88 | model._blocks[i]._bn1.bias: 'blocks_' + str(i) + '/tpu_batch_normalization/beta', # [32]), 89 | model._blocks[i]._bn1.weight: 'blocks_' + str(i) + '/tpu_batch_normalization/gamma', # [32]), 90 | model._blocks[i]._bn1.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_mean', 91 | model._blocks[i]._bn1.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_variance', 92 | model._blocks[i]._bn2.bias: 'blocks_' + str(i) + '/tpu_batch_normalization_1/beta', # [16]), 93 | model._blocks[i]._bn2.weight: 'blocks_' + str(i) + '/tpu_batch_normalization_1/gamma', # [16]), 94 | model._blocks[i]._bn2.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_mean', 95 | model._blocks[i]._bn2.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_variance', 96 | } 97 | 98 | else: 99 | conversion_table_block = { 100 | model._blocks[i]._expand_conv.weight: 'blocks_' + str(i) + '/conv2d/kernel', 101 | model._blocks[i]._project_conv.weight: 'blocks_' + str(i) + '/conv2d_1/kernel', 102 | model._blocks[i]._depthwise_conv.weight: 'blocks_' + str(i) + '/depthwise_conv2d/depthwise_kernel', 103 | model._blocks[i]._se_reduce.bias: 'blocks_' + str(i) + '/se/conv2d/bias', 104 | model._blocks[i]._se_reduce.weight: 'blocks_' + str(i) + '/se/conv2d/kernel', 105 | model._blocks[i]._se_expand.bias: 'blocks_' + str(i) + '/se/conv2d_1/bias', 106 | model._blocks[i]._se_expand.weight: 'blocks_' + str(i) + '/se/conv2d_1/kernel', 107 | model._blocks[i]._bn0.bias: 'blocks_' + str(i) + '/tpu_batch_normalization/beta', 108 | model._blocks[i]._bn0.weight: 'blocks_' + str(i) + '/tpu_batch_normalization/gamma', 109 | model._blocks[i]._bn0.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_mean', 110 | model._blocks[i]._bn0.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_variance', 111 | model._blocks[i]._bn1.bias: 'blocks_' + str(i) + '/tpu_batch_normalization_1/beta', 112 | model._blocks[i]._bn1.weight: 'blocks_' + str(i) + '/tpu_batch_normalization_1/gamma', 113 | model._blocks[i]._bn1.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_mean', 114 | model._blocks[i]._bn1.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_variance', 115 | model._blocks[i]._bn2.bias: 'blocks_' + str(i) + '/tpu_batch_normalization_2/beta', 116 | model._blocks[i]._bn2.weight: 'blocks_' + str(i) + '/tpu_batch_normalization_2/gamma', 117 | model._blocks[i]._bn2.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization_2/moving_mean', 118 | model._blocks[i]._bn2.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization_2/moving_variance', 119 | } 120 | 121 | conversion_table = merge(conversion_table, conversion_table_block) 122 | 123 | # Load TensorFlow parameters into PyTorch model 124 | load_param(checkpoint_file, conversion_table, model_name) 125 | return conversion_table 126 | 127 | 128 | def load_and_save_temporary_tensorflow_model(model_name, model_ckpt, example_img= '../../example/img.jpg'): 129 | """ Loads and saves a TensorFlow model. """ 130 | image_files = [example_img] 131 | eval_ckpt_driver = eval_ckpt_main.EvalCkptDriver(model_name) 132 | with tf.Graph().as_default(), tf.compat.v1.Session() as sess: 133 | images, labels = eval_ckpt_driver.build_dataset(image_files, [0] * len(image_files), False) 134 | probs = eval_ckpt_driver.build_model(images, is_training=False) 135 | sess.run(tf.compat.v1.global_variables_initializer()) 136 | print(model_ckpt) 137 | eval_ckpt_driver.restore_model(sess, model_ckpt) 138 | tf.compat.v1.train.Saver().save(sess, 'tmp/model.ckpt') 139 | 140 | 141 | if __name__ == '__main__': 142 | 143 | import sys 144 | import argparse 145 | 146 | sys.path.append('original_tf') 147 | import eval_ckpt_main 148 | 149 | from efficientnet_pytorch import EfficientNet 150 | 151 | parser = argparse.ArgumentParser( 152 | description='Convert TF model to PyTorch model and save for easier future loading') 153 | parser.add_argument('--model_name', type=str, default='efficientnet-b0', 154 | help='efficientnet-b{N}, where N is an integer 0 <= N <= 8') 155 | parser.add_argument('--tf_checkpoint', type=str, default='pretrained_tensorflow/efficientnet-b0/', 156 | help='checkpoint file path') 157 | parser.add_argument('--output_file', type=str, default='pretrained_pytorch/efficientnet-b0.pth', 158 | help='output PyTorch model file name') 159 | args = parser.parse_args() 160 | 161 | # Build model 162 | model = EfficientNet.from_name(args.model_name) 163 | 164 | # Load and save temporary TensorFlow file due to TF nuances 165 | print(args.tf_checkpoint) 166 | load_and_save_temporary_tensorflow_model(args.model_name, args.tf_checkpoint) 167 | 168 | # Load weights 169 | load_efficientnet(model, 'tmp/model.ckpt', model_name=args.model_name) 170 | print('Loaded TF checkpoint weights') 171 | 172 | # Save PyTorch file 173 | torch.save(model.state_dict(), args.output_file) 174 | print('Saved model to', args.output_file) 175 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/load_tf_weights_tf1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import torch 4 | 5 | def load_param(checkpoint_file, conversion_table, model_name): 6 | """ 7 | Load parameters according to conversion_table. 8 | 9 | Args: 10 | checkpoint_file (string): pretrained checkpoint model file in tensorflow 11 | conversion_table (dict): { pytorch tensor in a model : checkpoint variable name } 12 | """ 13 | for pyt_param, tf_param_name in conversion_table.items(): 14 | tf_param_name = str(model_name) + '/' + tf_param_name 15 | tf_param = tf.train.load_variable(checkpoint_file, tf_param_name) 16 | if 'conv' in tf_param_name and 'kernel' in tf_param_name: 17 | tf_param = np.transpose(tf_param, (3, 2, 0, 1)) 18 | if 'depthwise' in tf_param_name: 19 | tf_param = np.transpose(tf_param, (1, 0, 2, 3)) 20 | elif tf_param_name.endswith('kernel'): # for weight(kernel), we should do transpose 21 | tf_param = np.transpose(tf_param) 22 | assert pyt_param.size() == tf_param.shape, \ 23 | 'Dim Mismatch: %s vs %s ; %s' % (tuple(pyt_param.size()), tf_param.shape, tf_param_name) 24 | pyt_param.data = torch.from_numpy(tf_param) 25 | 26 | 27 | def load_efficientnet(model, checkpoint_file, model_name): 28 | """ 29 | Load PyTorch EfficientNet from TensorFlow checkpoint file 30 | """ 31 | 32 | # This will store the enire conversion table 33 | conversion_table = {} 34 | merge = lambda dict1, dict2: {**dict1, **dict2} 35 | 36 | # All the weights not in the conv blocks 37 | conversion_table_for_weights_outside_blocks = { 38 | model._conv_stem.weight: 'stem/conv2d/kernel', # [3, 3, 3, 32]), 39 | model._bn0.bias: 'stem/tpu_batch_normalization/beta', # [32]), 40 | model._bn0.weight: 'stem/tpu_batch_normalization/gamma', # [32]), 41 | model._bn0.running_mean: 'stem/tpu_batch_normalization/moving_mean', # [32]), 42 | model._bn0.running_var: 'stem/tpu_batch_normalization/moving_variance', # [32]), 43 | model._conv_head.weight: 'head/conv2d/kernel', # [1, 1, 320, 1280]), 44 | model._bn1.bias: 'head/tpu_batch_normalization/beta', # [1280]), 45 | model._bn1.weight: 'head/tpu_batch_normalization/gamma', # [1280]), 46 | model._bn1.running_mean: 'head/tpu_batch_normalization/moving_mean', # [32]), 47 | model._bn1.running_var: 'head/tpu_batch_normalization/moving_variance', # [32]), 48 | model._fc.bias: 'head/dense/bias', # [1000]), 49 | model._fc.weight: 'head/dense/kernel', # [1280, 1000]), 50 | } 51 | conversion_table = merge(conversion_table, conversion_table_for_weights_outside_blocks) 52 | 53 | # The first conv block is special because it does not have _expand_conv 54 | conversion_table_for_first_block = { 55 | model._blocks[0]._project_conv.weight: 'blocks_0/conv2d/kernel', # 1, 1, 32, 16]), 56 | model._blocks[0]._depthwise_conv.weight: 'blocks_0/depthwise_conv2d/depthwise_kernel', # [3, 3, 32, 1]), 57 | model._blocks[0]._se_reduce.bias: 'blocks_0/se/conv2d/bias', # , [8]), 58 | model._blocks[0]._se_reduce.weight: 'blocks_0/se/conv2d/kernel', # , [1, 1, 32, 8]), 59 | model._blocks[0]._se_expand.bias: 'blocks_0/se/conv2d_1/bias', # , [32]), 60 | model._blocks[0]._se_expand.weight: 'blocks_0/se/conv2d_1/kernel', # , [1, 1, 8, 32]), 61 | model._blocks[0]._bn1.bias: 'blocks_0/tpu_batch_normalization/beta', # [32]), 62 | model._blocks[0]._bn1.weight: 'blocks_0/tpu_batch_normalization/gamma', # [32]), 63 | model._blocks[0]._bn1.running_mean: 'blocks_0/tpu_batch_normalization/moving_mean', 64 | model._blocks[0]._bn1.running_var: 'blocks_0/tpu_batch_normalization/moving_variance', 65 | model._blocks[0]._bn2.bias: 'blocks_0/tpu_batch_normalization_1/beta', # [16]), 66 | model._blocks[0]._bn2.weight: 'blocks_0/tpu_batch_normalization_1/gamma', # [16]), 67 | model._blocks[0]._bn2.running_mean: 'blocks_0/tpu_batch_normalization_1/moving_mean', 68 | model._blocks[0]._bn2.running_var: 'blocks_0/tpu_batch_normalization_1/moving_variance', 69 | } 70 | conversion_table = merge(conversion_table, conversion_table_for_first_block) 71 | 72 | # Conv blocks 73 | for i in range(len(model._blocks)): 74 | 75 | is_first_block = '_expand_conv.weight' not in [n for n, p in model._blocks[i].named_parameters()] 76 | 77 | if is_first_block: 78 | conversion_table_block = { 79 | model._blocks[i]._project_conv.weight: 'blocks_' + str(i) + '/conv2d/kernel', # 1, 1, 32, 16]), 80 | model._blocks[i]._depthwise_conv.weight: 'blocks_' + str(i) + '/depthwise_conv2d/depthwise_kernel', 81 | # [3, 3, 32, 1]), 82 | model._blocks[i]._se_reduce.bias: 'blocks_' + str(i) + '/se/conv2d/bias', # , [8]), 83 | model._blocks[i]._se_reduce.weight: 'blocks_' + str(i) + '/se/conv2d/kernel', # , [1, 1, 32, 8]), 84 | model._blocks[i]._se_expand.bias: 'blocks_' + str(i) + '/se/conv2d_1/bias', # , [32]), 85 | model._blocks[i]._se_expand.weight: 'blocks_' + str(i) + '/se/conv2d_1/kernel', # , [1, 1, 8, 32]), 86 | model._blocks[i]._bn1.bias: 'blocks_' + str(i) + '/tpu_batch_normalization/beta', # [32]), 87 | model._blocks[i]._bn1.weight: 'blocks_' + str(i) + '/tpu_batch_normalization/gamma', # [32]), 88 | model._blocks[i]._bn1.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_mean', 89 | model._blocks[i]._bn1.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_variance', 90 | model._blocks[i]._bn2.bias: 'blocks_' + str(i) + '/tpu_batch_normalization_1/beta', # [16]), 91 | model._blocks[i]._bn2.weight: 'blocks_' + str(i) + '/tpu_batch_normalization_1/gamma', # [16]), 92 | model._blocks[i]._bn2.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_mean', 93 | model._blocks[i]._bn2.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_variance', 94 | } 95 | 96 | else: 97 | conversion_table_block = { 98 | model._blocks[i]._expand_conv.weight: 'blocks_' + str(i) + '/conv2d/kernel', 99 | model._blocks[i]._project_conv.weight: 'blocks_' + str(i) + '/conv2d_1/kernel', 100 | model._blocks[i]._depthwise_conv.weight: 'blocks_' + str(i) + '/depthwise_conv2d/depthwise_kernel', 101 | model._blocks[i]._se_reduce.bias: 'blocks_' + str(i) + '/se/conv2d/bias', 102 | model._blocks[i]._se_reduce.weight: 'blocks_' + str(i) + '/se/conv2d/kernel', 103 | model._blocks[i]._se_expand.bias: 'blocks_' + str(i) + '/se/conv2d_1/bias', 104 | model._blocks[i]._se_expand.weight: 'blocks_' + str(i) + '/se/conv2d_1/kernel', 105 | model._blocks[i]._bn0.bias: 'blocks_' + str(i) + '/tpu_batch_normalization/beta', 106 | model._blocks[i]._bn0.weight: 'blocks_' + str(i) + '/tpu_batch_normalization/gamma', 107 | model._blocks[i]._bn0.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_mean', 108 | model._blocks[i]._bn0.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_variance', 109 | model._blocks[i]._bn1.bias: 'blocks_' + str(i) + '/tpu_batch_normalization_1/beta', 110 | model._blocks[i]._bn1.weight: 'blocks_' + str(i) + '/tpu_batch_normalization_1/gamma', 111 | model._blocks[i]._bn1.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_mean', 112 | model._blocks[i]._bn1.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_variance', 113 | model._blocks[i]._bn2.bias: 'blocks_' + str(i) + '/tpu_batch_normalization_2/beta', 114 | model._blocks[i]._bn2.weight: 'blocks_' + str(i) + '/tpu_batch_normalization_2/gamma', 115 | model._blocks[i]._bn2.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization_2/moving_mean', 116 | model._blocks[i]._bn2.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization_2/moving_variance', 117 | } 118 | 119 | conversion_table = merge(conversion_table, conversion_table_block) 120 | 121 | # Load TensorFlow parameters into PyTorch model 122 | load_param(checkpoint_file, conversion_table, model_name) 123 | return conversion_table 124 | 125 | 126 | def load_and_save_temporary_tensorflow_model(model_name, model_ckpt, example_img= '../../example/img.jpg'): 127 | """ Loads and saves a TensorFlow model. """ 128 | image_files = [example_img] 129 | eval_ckpt_driver = eval_ckpt_main.EvalCkptDriver(model_name) 130 | with tf.Graph().as_default(), tf.Session() as sess: 131 | images, labels = eval_ckpt_driver.build_dataset(image_files, [0] * len(image_files), False) 132 | probs = eval_ckpt_driver.build_model(images, is_training=False) 133 | sess.run(tf.global_variables_initializer()) 134 | print(model_ckpt) 135 | eval_ckpt_driver.restore_model(sess, model_ckpt) 136 | tf.train.Saver().save(sess, 'tmp/model.ckpt') 137 | 138 | 139 | if __name__ == '__main__': 140 | 141 | import sys 142 | import argparse 143 | 144 | sys.path.append('original_tf') 145 | import eval_ckpt_main 146 | 147 | from efficientnet_pytorch import EfficientNet 148 | 149 | parser = argparse.ArgumentParser( 150 | description='Convert TF model to PyTorch model and save for easier future loading') 151 | parser.add_argument('--model_name', type=str, default='efficientnet-b0', 152 | help='efficientnet-b{N}, where N is an integer 0 <= N <= 8') 153 | parser.add_argument('--tf_checkpoint', type=str, default='pretrained_tensorflow/efficientnet-b0/', 154 | help='checkpoint file path') 155 | parser.add_argument('--output_file', type=str, default='pretrained_pytorch/efficientnet-b0.pth', 156 | help='output PyTorch model file name') 157 | args = parser.parse_args() 158 | 159 | # Build model 160 | model = EfficientNet.from_name(args.model_name) 161 | 162 | # Load and save temporary TensorFlow file due to TF nuances 163 | print(args.tf_checkpoint) 164 | load_and_save_temporary_tensorflow_model(args.model_name, args.tf_checkpoint) 165 | 166 | # Load weights 167 | load_efficientnet(model, 'tmp/model.ckpt', model_name=args.model_name) 168 | print('Loaded TF checkpoint weights') 169 | 170 | # Save PyTorch file 171 | torch.save(model.state_dict(), args.output_file) 172 | print('Saved model to', args.output_file) 173 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/original_tf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukemelas/EfficientNet-PyTorch/7e8b0d312162f335785fb5dcfa1df29a75a1783a/tf_to_pytorch/convert_tf_to_pt/original_tf/__init__.py -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Model Builder for EfficientNet.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import functools 22 | import os 23 | import re 24 | from absl import logging 25 | import numpy as np 26 | import six 27 | import tensorflow.compat.v1 as tf 28 | 29 | import efficientnet_model 30 | import utils 31 | MEAN_RGB = [0.485 * 255, 0.456 * 255, 0.406 * 255] 32 | STDDEV_RGB = [0.229 * 255, 0.224 * 255, 0.225 * 255] 33 | 34 | 35 | def efficientnet_params(model_name): 36 | """Get efficientnet params based on model name.""" 37 | params_dict = { 38 | # (width_coefficient, depth_coefficient, resolution, dropout_rate) 39 | 'efficientnet-b0': (1.0, 1.0, 224, 0.2), 40 | 'efficientnet-b1': (1.0, 1.1, 240, 0.2), 41 | 'efficientnet-b2': (1.1, 1.2, 260, 0.3), 42 | 'efficientnet-b3': (1.2, 1.4, 300, 0.3), 43 | 'efficientnet-b4': (1.4, 1.8, 380, 0.4), 44 | 'efficientnet-b5': (1.6, 2.2, 456, 0.4), 45 | 'efficientnet-b6': (1.8, 2.6, 528, 0.5), 46 | 'efficientnet-b7': (2.0, 3.1, 600, 0.5), 47 | 'efficientnet-b8': (2.2, 3.6, 672, 0.5), 48 | 'efficientnet-l2': (4.3, 5.3, 800, 0.5), 49 | } 50 | return params_dict[model_name] 51 | 52 | 53 | class BlockDecoder(object): 54 | """Block Decoder for readability.""" 55 | 56 | def _decode_block_string(self, block_string): 57 | """Gets a block through a string notation of arguments.""" 58 | if six.PY2: 59 | assert isinstance(block_string, (str, unicode)) 60 | else: 61 | assert isinstance(block_string, str) 62 | ops = block_string.split('_') 63 | options = {} 64 | for op in ops: 65 | splits = re.split(r'(\d.*)', op) 66 | if len(splits) >= 2: 67 | key, value = splits[:2] 68 | options[key] = value 69 | 70 | if 's' not in options or len(options['s']) != 2: 71 | raise ValueError('Strides options should be a pair of integers.') 72 | 73 | return efficientnet_model.BlockArgs( 74 | kernel_size=int(options['k']), 75 | num_repeat=int(options['r']), 76 | input_filters=int(options['i']), 77 | output_filters=int(options['o']), 78 | expand_ratio=int(options['e']), 79 | id_skip=('noskip' not in block_string), 80 | se_ratio=float(options['se']) if 'se' in options else None, 81 | strides=[int(options['s'][0]), 82 | int(options['s'][1])], 83 | conv_type=int(options['c']) if 'c' in options else 0, 84 | fused_conv=int(options['f']) if 'f' in options else 0, 85 | super_pixel=int(options['p']) if 'p' in options else 0, 86 | condconv=('cc' in block_string)) 87 | 88 | def _encode_block_string(self, block): 89 | """Encodes a block to a string.""" 90 | args = [ 91 | 'r%d' % block.num_repeat, 92 | 'k%d' % block.kernel_size, 93 | 's%d%d' % (block.strides[0], block.strides[1]), 94 | 'e%s' % block.expand_ratio, 95 | 'i%d' % block.input_filters, 96 | 'o%d' % block.output_filters, 97 | 'c%d' % block.conv_type, 98 | 'f%d' % block.fused_conv, 99 | 'p%d' % block.super_pixel, 100 | ] 101 | if block.se_ratio > 0 and block.se_ratio <= 1: 102 | args.append('se%s' % block.se_ratio) 103 | if block.id_skip is False: # pylint: disable=g-bool-id-comparison 104 | args.append('noskip') 105 | if block.condconv: 106 | args.append('cc') 107 | return '_'.join(args) 108 | 109 | def decode(self, string_list): 110 | """Decodes a list of string notations to specify blocks inside the network. 111 | 112 | Args: 113 | string_list: a list of strings, each string is a notation of block. 114 | 115 | Returns: 116 | A list of namedtuples to represent blocks arguments. 117 | """ 118 | assert isinstance(string_list, list) 119 | blocks_args = [] 120 | for block_string in string_list: 121 | blocks_args.append(self._decode_block_string(block_string)) 122 | return blocks_args 123 | 124 | def encode(self, blocks_args): 125 | """Encodes a list of Blocks to a list of strings. 126 | 127 | Args: 128 | blocks_args: A list of namedtuples to represent blocks arguments. 129 | Returns: 130 | a list of strings, each string is a notation of block. 131 | """ 132 | block_strings = [] 133 | for block in blocks_args: 134 | block_strings.append(self._encode_block_string(block)) 135 | return block_strings 136 | 137 | 138 | def swish(features, use_native=True, use_hard=False): 139 | """Computes the Swish activation function. 140 | 141 | We provide three alternnatives: 142 | - Native tf.nn.swish, use less memory during training than composable swish. 143 | - Quantization friendly hard swish. 144 | - A composable swish, equivalant to tf.nn.swish, but more general for 145 | finetuning and TF-Hub. 146 | 147 | Args: 148 | features: A `Tensor` representing preactivation values. 149 | use_native: Whether to use the native swish from tf.nn that uses a custom 150 | gradient to reduce memory usage, or to use customized swish that uses 151 | default TensorFlow gradient computation. 152 | use_hard: Whether to use quantization-friendly hard swish. 153 | 154 | Returns: 155 | The activation value. 156 | """ 157 | if use_native and use_hard: 158 | raise ValueError('Cannot specify both use_native and use_hard.') 159 | 160 | if use_native: 161 | return tf.nn.swish(features) 162 | 163 | if use_hard: 164 | return features * tf.nn.relu6(features + np.float32(3)) * (1. / 6.) 165 | 166 | features = tf.convert_to_tensor(features, name='features') 167 | return features * tf.nn.sigmoid(features) 168 | 169 | 170 | _DEFAULT_BLOCKS_ARGS = [ 171 | 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', 172 | 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', 173 | 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', 174 | 'r1_k3_s11_e6_i192_o320_se0.25', 175 | ] 176 | 177 | 178 | def efficientnet(width_coefficient=None, 179 | depth_coefficient=None, 180 | dropout_rate=0.2, 181 | survival_prob=0.8): 182 | """Creates a efficientnet model.""" 183 | global_params = efficientnet_model.GlobalParams( 184 | blocks_args=_DEFAULT_BLOCKS_ARGS, 185 | batch_norm_momentum=0.99, 186 | batch_norm_epsilon=1e-3, 187 | dropout_rate=dropout_rate, 188 | survival_prob=survival_prob, 189 | data_format='channels_last', 190 | num_classes=1000, 191 | width_coefficient=width_coefficient, 192 | depth_coefficient=depth_coefficient, 193 | depth_divisor=8, 194 | min_depth=None, 195 | relu_fn=tf.nn.swish, 196 | # The default is TPU-specific batch norm. 197 | # The alternative is tf.layers.BatchNormalization. 198 | batch_norm=utils.TpuBatchNormalization, # TPU-specific requirement. 199 | use_se=True, 200 | clip_projection_output=False) 201 | return global_params 202 | 203 | 204 | def get_model_params(model_name, override_params): 205 | """Get the block args and global params for a given model.""" 206 | if model_name.startswith('efficientnet'): 207 | width_coefficient, depth_coefficient, _, dropout_rate = ( 208 | efficientnet_params(model_name)) 209 | global_params = efficientnet( 210 | width_coefficient, depth_coefficient, dropout_rate) 211 | else: 212 | raise NotImplementedError('model name is not pre-defined: %s' % model_name) 213 | 214 | if override_params: 215 | # ValueError will be raised here if override_params has fields not included 216 | # in global_params. 217 | global_params = global_params._replace(**override_params) 218 | 219 | decoder = BlockDecoder() 220 | blocks_args = decoder.decode(global_params.blocks_args) 221 | 222 | logging.info('global_params= %s', global_params) 223 | return blocks_args, global_params 224 | 225 | 226 | def build_model(images, 227 | model_name, 228 | training, 229 | override_params=None, 230 | model_dir=None, 231 | fine_tuning=False, 232 | features_only=False, 233 | pooled_features_only=False): 234 | """A helper functiion to creates a model and returns predicted logits. 235 | 236 | Args: 237 | images: input images tensor. 238 | model_name: string, the predefined model name. 239 | training: boolean, whether the model is constructed for training. 240 | override_params: A dictionary of params for overriding. Fields must exist in 241 | efficientnet_model.GlobalParams. 242 | model_dir: string, optional model dir for saving configs. 243 | fine_tuning: boolean, whether the model is used for finetuning. 244 | features_only: build the base feature network only (excluding final 245 | 1x1 conv layer, global pooling, dropout and fc head). 246 | pooled_features_only: build the base network for features extraction (after 247 | 1x1 conv layer and global pooling, but before dropout and fc head). 248 | 249 | Returns: 250 | logits: the logits tensor of classes. 251 | endpoints: the endpoints for each layer. 252 | 253 | Raises: 254 | When model_name specified an undefined model, raises NotImplementedError. 255 | When override_params has invalid fields, raises ValueError. 256 | """ 257 | assert isinstance(images, tf.Tensor) 258 | assert not (features_only and pooled_features_only) 259 | 260 | # For backward compatibility. 261 | if override_params and override_params.get('drop_connect_rate', None): 262 | override_params['survival_prob'] = 1 - override_params['drop_connect_rate'] 263 | 264 | if not training or fine_tuning: 265 | if not override_params: 266 | override_params = {} 267 | override_params['batch_norm'] = utils.BatchNormalization 268 | if fine_tuning: 269 | override_params['relu_fn'] = functools.partial(swish, use_native=False) 270 | blocks_args, global_params = get_model_params(model_name, override_params) 271 | 272 | if model_dir: 273 | param_file = os.path.join(model_dir, 'model_params.txt') 274 | if not tf.gfile.Exists(param_file): 275 | if not tf.gfile.Exists(model_dir): 276 | tf.gfile.MakeDirs(model_dir) 277 | with tf.gfile.GFile(param_file, 'w') as f: 278 | logging.info('writing to %s', param_file) 279 | f.write('model_name= %s\n\n' % model_name) 280 | f.write('global_params= %s\n\n' % str(global_params)) 281 | f.write('blocks_args= %s\n\n' % str(blocks_args)) 282 | 283 | with tf.variable_scope(model_name): 284 | model = efficientnet_model.Model(blocks_args, global_params) 285 | outputs = model( 286 | images, 287 | training=training, 288 | features_only=features_only, 289 | pooled_features_only=pooled_features_only) 290 | if features_only: 291 | outputs = tf.identity(outputs, 'features') 292 | elif pooled_features_only: 293 | outputs = tf.identity(outputs, 'pooled_features') 294 | else: 295 | outputs = tf.identity(outputs, 'logits') 296 | return outputs, model.endpoints 297 | 298 | 299 | def build_model_base(images, model_name, training, override_params=None): 300 | """A helper functiion to create a base model and return global_pool. 301 | 302 | Args: 303 | images: input images tensor. 304 | model_name: string, the predefined model name. 305 | training: boolean, whether the model is constructed for training. 306 | override_params: A dictionary of params for overriding. Fields must exist in 307 | efficientnet_model.GlobalParams. 308 | 309 | Returns: 310 | features: global pool features. 311 | endpoints: the endpoints for each layer. 312 | 313 | Raises: 314 | When model_name specified an undefined model, raises NotImplementedError. 315 | When override_params has invalid fields, raises ValueError. 316 | """ 317 | assert isinstance(images, tf.Tensor) 318 | # For backward compatibility. 319 | if override_params and override_params.get('drop_connect_rate', None): 320 | override_params['survival_prob'] = 1 - override_params['drop_connect_rate'] 321 | 322 | blocks_args, global_params = get_model_params(model_name, override_params) 323 | 324 | with tf.variable_scope(model_name): 325 | model = efficientnet_model.Model(blocks_args, global_params) 326 | features = model(images, training=training, features_only=True) 327 | 328 | features = tf.identity(features, 'features') 329 | return features, model.endpoints 330 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/original_tf/efficientnet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains definitions for EfficientNet model. 16 | 17 | [1] Mingxing Tan, Quoc V. Le 18 | EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. 19 | ICML'19, https://arxiv.org/abs/1905.11946 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import collections 27 | import functools 28 | import math 29 | 30 | from absl import logging 31 | import numpy as np 32 | import six 33 | from six.moves import xrange 34 | import tensorflow.compat.v1 as tf 35 | 36 | import utils 37 | # from condconv import condconv_layers 38 | 39 | GlobalParams = collections.namedtuple('GlobalParams', [ 40 | 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 'data_format', 41 | 'num_classes', 'width_coefficient', 'depth_coefficient', 'depth_divisor', 42 | 'min_depth', 'survival_prob', 'relu_fn', 'batch_norm', 'use_se', 43 | 'local_pooling', 'condconv_num_experts', 'clip_projection_output', 44 | 'blocks_args' 45 | ]) 46 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) 47 | 48 | BlockArgs = collections.namedtuple('BlockArgs', [ 49 | 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', 50 | 'expand_ratio', 'id_skip', 'strides', 'se_ratio', 'conv_type', 'fused_conv', 51 | 'super_pixel', 'condconv' 52 | ]) 53 | # defaults will be a public argument for namedtuple in Python 3.7 54 | # https://docs.python.org/3/library/collections.html#collections.namedtuple 55 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) 56 | 57 | 58 | def conv_kernel_initializer(shape, dtype=None, partition_info=None): 59 | """Initialization for convolutional kernels. 60 | 61 | The main difference with tf.variance_scaling_initializer is that 62 | tf.variance_scaling_initializer uses a truncated normal with an uncorrected 63 | standard deviation, whereas here we use a normal distribution. Similarly, 64 | tf.initializers.variance_scaling uses a truncated normal with 65 | a corrected standard deviation. 66 | 67 | Args: 68 | shape: shape of variable 69 | dtype: dtype of variable 70 | partition_info: unused 71 | 72 | Returns: 73 | an initialization for the variable 74 | """ 75 | del partition_info 76 | kernel_height, kernel_width, _, out_filters = shape 77 | fan_out = int(kernel_height * kernel_width * out_filters) 78 | return tf.random_normal( 79 | shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype) 80 | 81 | 82 | def dense_kernel_initializer(shape, dtype=None, partition_info=None): 83 | """Initialization for dense kernels. 84 | 85 | This initialization is equal to 86 | tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out', 87 | distribution='uniform'). 88 | It is written out explicitly here for clarity. 89 | 90 | Args: 91 | shape: shape of variable 92 | dtype: dtype of variable 93 | partition_info: unused 94 | 95 | Returns: 96 | an initialization for the variable 97 | """ 98 | del partition_info 99 | init_range = 1.0 / np.sqrt(shape[1]) 100 | return tf.random_uniform(shape, -init_range, init_range, dtype=dtype) 101 | 102 | 103 | def superpixel_kernel_initializer(shape, dtype='float32', partition_info=None): 104 | """Initializes superpixel kernels. 105 | 106 | This is inspired by space-to-depth transformation that is mathematically 107 | equivalent before and after the transformation. But we do the space-to-depth 108 | via a convolution. Moreover, we make the layer trainable instead of direct 109 | transform, we can initialization it this way so that the model can learn not 110 | to do anything but keep it mathematically equivalent, when improving 111 | performance. 112 | 113 | 114 | Args: 115 | shape: shape of variable 116 | dtype: dtype of variable 117 | partition_info: unused 118 | 119 | Returns: 120 | an initialization for the variable 121 | """ 122 | del partition_info 123 | # use input depth to make superpixel kernel. 124 | depth = shape[-2] 125 | filters = np.zeros([2, 2, depth, 4 * depth], dtype=dtype) 126 | i = np.arange(2) 127 | j = np.arange(2) 128 | k = np.arange(depth) 129 | mesh = np.array(np.meshgrid(i, j, k)).T.reshape(-1, 3).T 130 | filters[ 131 | mesh[0], 132 | mesh[1], 133 | mesh[2], 134 | 4 * mesh[2] + 2 * mesh[0] + mesh[1]] = 1 135 | return filters 136 | 137 | 138 | def round_filters(filters, global_params): 139 | """Round number of filters based on depth multiplier.""" 140 | orig_f = filters 141 | multiplier = global_params.width_coefficient 142 | divisor = global_params.depth_divisor 143 | min_depth = global_params.min_depth 144 | if not multiplier: 145 | return filters 146 | 147 | filters *= multiplier 148 | min_depth = min_depth or divisor 149 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) 150 | # Make sure that round down does not go down by more than 10%. 151 | if new_filters < 0.9 * filters: 152 | new_filters += divisor 153 | logging.info('round_filter input=%s output=%s', orig_f, new_filters) 154 | return int(new_filters) 155 | 156 | 157 | def round_repeats(repeats, global_params): 158 | """Round number of filters based on depth multiplier.""" 159 | multiplier = global_params.depth_coefficient 160 | if not multiplier: 161 | return repeats 162 | return int(math.ceil(multiplier * repeats)) 163 | 164 | 165 | class MBConvBlock(tf.keras.layers.Layer): 166 | """A class of MBConv: Mobile Inverted Residual Bottleneck. 167 | 168 | Attributes: 169 | endpoints: dict. A list of internal tensors. 170 | """ 171 | 172 | def __init__(self, block_args, global_params): 173 | """Initializes a MBConv block. 174 | 175 | Args: 176 | block_args: BlockArgs, arguments to create a Block. 177 | global_params: GlobalParams, a set of global parameters. 178 | """ 179 | super(MBConvBlock, self).__init__() 180 | self._block_args = block_args 181 | self._batch_norm_momentum = global_params.batch_norm_momentum 182 | self._batch_norm_epsilon = global_params.batch_norm_epsilon 183 | self._batch_norm = global_params.batch_norm 184 | self._condconv_num_experts = global_params.condconv_num_experts 185 | self._data_format = global_params.data_format 186 | if self._data_format == 'channels_first': 187 | self._channel_axis = 1 188 | self._spatial_dims = [2, 3] 189 | else: 190 | self._channel_axis = -1 191 | self._spatial_dims = [1, 2] 192 | 193 | self._relu_fn = global_params.relu_fn or tf.nn.swish 194 | self._has_se = ( 195 | global_params.use_se and self._block_args.se_ratio is not None and 196 | 0 < self._block_args.se_ratio <= 1) 197 | 198 | self._clip_projection_output = global_params.clip_projection_output 199 | 200 | self.endpoints = None 201 | 202 | self.conv_cls = tf.layers.Conv2D 203 | self.depthwise_conv_cls = utils.DepthwiseConv2D 204 | if self._block_args.condconv: 205 | self.conv_cls = functools.partial( 206 | condconv_layers.CondConv2D, num_experts=self._condconv_num_experts) 207 | self.depthwise_conv_cls = functools.partial( 208 | condconv_layers.DepthwiseCondConv2D, 209 | num_experts=self._condconv_num_experts) 210 | 211 | # Builds the block accordings to arguments. 212 | self._build() 213 | 214 | def block_args(self): 215 | return self._block_args 216 | 217 | def _build(self): 218 | """Builds block according to the arguments.""" 219 | if self._block_args.super_pixel == 1: 220 | self._superpixel = tf.layers.Conv2D( 221 | self._block_args.input_filters, 222 | kernel_size=[2, 2], 223 | strides=[2, 2], 224 | kernel_initializer=conv_kernel_initializer, 225 | padding='same', 226 | data_format=self._data_format, 227 | use_bias=False) 228 | self._bnsp = self._batch_norm( 229 | axis=self._channel_axis, 230 | momentum=self._batch_norm_momentum, 231 | epsilon=self._batch_norm_epsilon) 232 | 233 | if self._block_args.condconv: 234 | # Add the example-dependent routing function 235 | self._avg_pooling = tf.keras.layers.GlobalAveragePooling2D( 236 | data_format=self._data_format) 237 | self._routing_fn = tf.layers.Dense( 238 | self._condconv_num_experts, activation=tf.nn.sigmoid) 239 | 240 | filters = self._block_args.input_filters * self._block_args.expand_ratio 241 | kernel_size = self._block_args.kernel_size 242 | 243 | # Fused expansion phase. Called if using fused convolutions. 244 | self._fused_conv = self.conv_cls( 245 | filters=filters, 246 | kernel_size=[kernel_size, kernel_size], 247 | strides=self._block_args.strides, 248 | kernel_initializer=conv_kernel_initializer, 249 | padding='same', 250 | data_format=self._data_format, 251 | use_bias=False) 252 | 253 | # Expansion phase. Called if not using fused convolutions and expansion 254 | # phase is necessary. 255 | self._expand_conv = self.conv_cls( 256 | filters=filters, 257 | kernel_size=[1, 1], 258 | strides=[1, 1], 259 | kernel_initializer=conv_kernel_initializer, 260 | padding='same', 261 | data_format=self._data_format, 262 | use_bias=False) 263 | self._bn0 = self._batch_norm( 264 | axis=self._channel_axis, 265 | momentum=self._batch_norm_momentum, 266 | epsilon=self._batch_norm_epsilon) 267 | 268 | # Depth-wise convolution phase. Called if not using fused convolutions. 269 | self._depthwise_conv = self.depthwise_conv_cls( 270 | kernel_size=[kernel_size, kernel_size], 271 | strides=self._block_args.strides, 272 | depthwise_initializer=conv_kernel_initializer, 273 | padding='same', 274 | data_format=self._data_format, 275 | use_bias=False) 276 | 277 | self._bn1 = self._batch_norm( 278 | axis=self._channel_axis, 279 | momentum=self._batch_norm_momentum, 280 | epsilon=self._batch_norm_epsilon) 281 | 282 | if self._has_se: 283 | num_reduced_filters = max( 284 | 1, int(self._block_args.input_filters * self._block_args.se_ratio)) 285 | # Squeeze and Excitation layer. 286 | self._se_reduce = tf.layers.Conv2D( 287 | num_reduced_filters, 288 | kernel_size=[1, 1], 289 | strides=[1, 1], 290 | kernel_initializer=conv_kernel_initializer, 291 | padding='same', 292 | data_format=self._data_format, 293 | use_bias=True) 294 | self._se_expand = tf.layers.Conv2D( 295 | filters, 296 | kernel_size=[1, 1], 297 | strides=[1, 1], 298 | kernel_initializer=conv_kernel_initializer, 299 | padding='same', 300 | data_format=self._data_format, 301 | use_bias=True) 302 | 303 | # Output phase. 304 | filters = self._block_args.output_filters 305 | self._project_conv = self.conv_cls( 306 | filters=filters, 307 | kernel_size=[1, 1], 308 | strides=[1, 1], 309 | kernel_initializer=conv_kernel_initializer, 310 | padding='same', 311 | data_format=self._data_format, 312 | use_bias=False) 313 | self._bn2 = self._batch_norm( 314 | axis=self._channel_axis, 315 | momentum=self._batch_norm_momentum, 316 | epsilon=self._batch_norm_epsilon) 317 | 318 | def _call_se(self, input_tensor): 319 | """Call Squeeze and Excitation layer. 320 | 321 | Args: 322 | input_tensor: Tensor, a single input tensor for Squeeze/Excitation layer. 323 | 324 | Returns: 325 | A output tensor, which should have the same shape as input. 326 | """ 327 | se_tensor = tf.reduce_mean(input_tensor, self._spatial_dims, keepdims=True) 328 | se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor))) 329 | logging.info('Built Squeeze and Excitation with tensor shape: %s', 330 | (se_tensor.shape)) 331 | return tf.sigmoid(se_tensor) * input_tensor 332 | 333 | def call(self, inputs, training=True, survival_prob=None): 334 | """Implementation of call(). 335 | 336 | Args: 337 | inputs: the inputs tensor. 338 | training: boolean, whether the model is constructed for training. 339 | survival_prob: float, between 0 to 1, drop connect rate. 340 | 341 | Returns: 342 | A output tensor. 343 | """ 344 | logging.info('Block input: %s shape: %s', inputs.name, inputs.shape) 345 | logging.info('Block input depth: %s output depth: %s', 346 | self._block_args.input_filters, 347 | self._block_args.output_filters) 348 | 349 | x = inputs 350 | 351 | fused_conv_fn = self._fused_conv 352 | expand_conv_fn = self._expand_conv 353 | depthwise_conv_fn = self._depthwise_conv 354 | project_conv_fn = self._project_conv 355 | 356 | if self._block_args.condconv: 357 | pooled_inputs = self._avg_pooling(inputs) 358 | routing_weights = self._routing_fn(pooled_inputs) 359 | # Capture routing weights as additional input to CondConv layers 360 | fused_conv_fn = functools.partial( 361 | self._fused_conv, routing_weights=routing_weights) 362 | expand_conv_fn = functools.partial( 363 | self._expand_conv, routing_weights=routing_weights) 364 | depthwise_conv_fn = functools.partial( 365 | self._depthwise_conv, routing_weights=routing_weights) 366 | project_conv_fn = functools.partial( 367 | self._project_conv, routing_weights=routing_weights) 368 | 369 | # creates conv 2x2 kernel 370 | if self._block_args.super_pixel == 1: 371 | with tf.variable_scope('super_pixel'): 372 | x = self._relu_fn( 373 | self._bnsp(self._superpixel(x), training=training)) 374 | logging.info( 375 | 'Block start with SuperPixel: %s shape: %s', x.name, x.shape) 376 | 377 | if self._block_args.fused_conv: 378 | # If use fused mbconv, skip expansion and use regular conv. 379 | x = self._relu_fn(self._bn1(fused_conv_fn(x), training=training)) 380 | logging.info('Conv2D: %s shape: %s', x.name, x.shape) 381 | else: 382 | # Otherwise, first apply expansion and then apply depthwise conv. 383 | if self._block_args.expand_ratio != 1: 384 | x = self._relu_fn(self._bn0(expand_conv_fn(x), training=training)) 385 | logging.info('Expand: %s shape: %s', x.name, x.shape) 386 | 387 | x = self._relu_fn(self._bn1(depthwise_conv_fn(x), training=training)) 388 | logging.info('DWConv: %s shape: %s', x.name, x.shape) 389 | 390 | if self._has_se: 391 | with tf.variable_scope('se'): 392 | x = self._call_se(x) 393 | 394 | self.endpoints = {'expansion_output': x} 395 | 396 | x = self._bn2(project_conv_fn(x), training=training) 397 | # Add identity so that quantization-aware training can insert quantization 398 | # ops correctly. 399 | x = tf.identity(x) 400 | if self._clip_projection_output: 401 | x = tf.clip_by_value(x, -6, 6) 402 | if self._block_args.id_skip: 403 | if all( 404 | s == 1 for s in self._block_args.strides 405 | ) and self._block_args.input_filters == self._block_args.output_filters: 406 | # Apply only if skip connection presents. 407 | if survival_prob: 408 | x = utils.drop_connect(x, training, survival_prob) 409 | x = tf.add(x, inputs) 410 | logging.info('Project: %s shape: %s', x.name, x.shape) 411 | return x 412 | 413 | 414 | class MBConvBlockWithoutDepthwise(MBConvBlock): 415 | """MBConv-like block without depthwise convolution and squeeze-and-excite.""" 416 | 417 | def _build(self): 418 | """Builds block according to the arguments.""" 419 | filters = self._block_args.input_filters * self._block_args.expand_ratio 420 | if self._block_args.expand_ratio != 1: 421 | # Expansion phase: 422 | self._expand_conv = tf.layers.Conv2D( 423 | filters, 424 | kernel_size=[3, 3], 425 | strides=[1, 1], 426 | kernel_initializer=conv_kernel_initializer, 427 | padding='same', 428 | use_bias=False) 429 | self._bn0 = self._batch_norm( 430 | axis=self._channel_axis, 431 | momentum=self._batch_norm_momentum, 432 | epsilon=self._batch_norm_epsilon) 433 | 434 | # Output phase: 435 | filters = self._block_args.output_filters 436 | self._project_conv = tf.layers.Conv2D( 437 | filters, 438 | kernel_size=[1, 1], 439 | strides=self._block_args.strides, 440 | kernel_initializer=conv_kernel_initializer, 441 | padding='same', 442 | use_bias=False) 443 | self._bn1 = self._batch_norm( 444 | axis=self._channel_axis, 445 | momentum=self._batch_norm_momentum, 446 | epsilon=self._batch_norm_epsilon) 447 | 448 | def call(self, inputs, training=True, survival_prob=None): 449 | """Implementation of call(). 450 | 451 | Args: 452 | inputs: the inputs tensor. 453 | training: boolean, whether the model is constructed for training. 454 | survival_prob: float, between 0 to 1, drop connect rate. 455 | 456 | Returns: 457 | A output tensor. 458 | """ 459 | logging.info('Block input: %s shape: %s', inputs.name, inputs.shape) 460 | if self._block_args.expand_ratio != 1: 461 | x = self._relu_fn(self._bn0(self._expand_conv(inputs), training=training)) 462 | else: 463 | x = inputs 464 | logging.info('Expand: %s shape: %s', x.name, x.shape) 465 | 466 | self.endpoints = {'expansion_output': x} 467 | 468 | x = self._bn1(self._project_conv(x), training=training) 469 | # Add identity so that quantization-aware training can insert quantization 470 | # ops correctly. 471 | x = tf.identity(x) 472 | if self._clip_projection_output: 473 | x = tf.clip_by_value(x, -6, 6) 474 | 475 | if self._block_args.id_skip: 476 | if all( 477 | s == 1 for s in self._block_args.strides 478 | ) and self._block_args.input_filters == self._block_args.output_filters: 479 | # Apply only if skip connection presents. 480 | if survival_prob: 481 | x = utils.drop_connect(x, training, survival_prob) 482 | x = tf.add(x, inputs) 483 | logging.info('Project: %s shape: %s', x.name, x.shape) 484 | return x 485 | 486 | 487 | class Model(tf.keras.Model): 488 | """A class implements tf.keras.Model for MNAS-like model. 489 | 490 | Reference: https://arxiv.org/abs/1807.11626 491 | """ 492 | 493 | def __init__(self, blocks_args=None, global_params=None): 494 | """Initializes an `Model` instance. 495 | 496 | Args: 497 | blocks_args: A list of BlockArgs to construct block modules. 498 | global_params: GlobalParams, a set of global parameters. 499 | 500 | Raises: 501 | ValueError: when blocks_args is not specified as a list. 502 | """ 503 | super(Model, self).__init__() 504 | if not isinstance(blocks_args, list): 505 | raise ValueError('blocks_args should be a list.') 506 | self._global_params = global_params 507 | self._blocks_args = blocks_args 508 | self._relu_fn = global_params.relu_fn or tf.nn.swish 509 | self._batch_norm = global_params.batch_norm 510 | 511 | self.endpoints = None 512 | 513 | self._build() 514 | 515 | def _get_conv_block(self, conv_type): 516 | conv_block_map = {0: MBConvBlock, 1: MBConvBlockWithoutDepthwise} 517 | return conv_block_map[conv_type] 518 | 519 | def _build(self): 520 | """Builds a model.""" 521 | self._blocks = [] 522 | batch_norm_momentum = self._global_params.batch_norm_momentum 523 | batch_norm_epsilon = self._global_params.batch_norm_epsilon 524 | if self._global_params.data_format == 'channels_first': 525 | channel_axis = 1 526 | self._spatial_dims = [2, 3] 527 | else: 528 | channel_axis = -1 529 | self._spatial_dims = [1, 2] 530 | 531 | # Stem part. 532 | self._conv_stem = tf.layers.Conv2D( 533 | filters=round_filters(32, self._global_params), 534 | kernel_size=[3, 3], 535 | strides=[2, 2], 536 | kernel_initializer=conv_kernel_initializer, 537 | padding='same', 538 | data_format=self._global_params.data_format, 539 | use_bias=False) 540 | self._bn0 = self._batch_norm( 541 | axis=channel_axis, 542 | momentum=batch_norm_momentum, 543 | epsilon=batch_norm_epsilon) 544 | 545 | # Builds blocks. 546 | for block_args in self._blocks_args: 547 | assert block_args.num_repeat > 0 548 | assert block_args.super_pixel in [0, 1, 2] 549 | # Update block input and output filters based on depth multiplier. 550 | input_filters = round_filters(block_args.input_filters, 551 | self._global_params) 552 | output_filters = round_filters(block_args.output_filters, 553 | self._global_params) 554 | kernel_size = block_args.kernel_size 555 | block_args = block_args._replace( 556 | input_filters=input_filters, 557 | output_filters=output_filters, 558 | num_repeat=round_repeats(block_args.num_repeat, self._global_params)) 559 | 560 | # The first block needs to take care of stride and filter size increase. 561 | conv_block = self._get_conv_block(block_args.conv_type) 562 | if not block_args.super_pixel: # no super_pixel at all 563 | self._blocks.append(conv_block(block_args, self._global_params)) 564 | else: 565 | # if superpixel, adjust filters, kernels, and strides. 566 | depth_factor = int(4 / block_args.strides[0] / block_args.strides[1]) 567 | block_args = block_args._replace( 568 | input_filters=block_args.input_filters * depth_factor, 569 | output_filters=block_args.output_filters * depth_factor, 570 | kernel_size=((block_args.kernel_size + 1) // 2 if depth_factor > 1 571 | else block_args.kernel_size)) 572 | # if the first block has stride-2 and super_pixel trandformation 573 | if (block_args.strides[0] == 2 and block_args.strides[1] == 2): 574 | block_args = block_args._replace(strides=[1, 1]) 575 | self._blocks.append(conv_block(block_args, self._global_params)) 576 | block_args = block_args._replace( # sp stops at stride-2 577 | super_pixel=0, 578 | input_filters=input_filters, 579 | output_filters=output_filters, 580 | kernel_size=kernel_size) 581 | elif block_args.super_pixel == 1: 582 | self._blocks.append(conv_block(block_args, self._global_params)) 583 | block_args = block_args._replace(super_pixel=2) 584 | else: 585 | self._blocks.append(conv_block(block_args, self._global_params)) 586 | if block_args.num_repeat > 1: # rest of blocks with the same block_arg 587 | # pylint: disable=protected-access 588 | block_args = block_args._replace( 589 | input_filters=block_args.output_filters, strides=[1, 1]) 590 | # pylint: enable=protected-access 591 | for _ in xrange(block_args.num_repeat - 1): 592 | self._blocks.append(conv_block(block_args, self._global_params)) 593 | 594 | # Head part. 595 | self._conv_head = tf.layers.Conv2D( 596 | filters=round_filters(1280, self._global_params), 597 | kernel_size=[1, 1], 598 | strides=[1, 1], 599 | kernel_initializer=conv_kernel_initializer, 600 | padding='same', 601 | use_bias=False) 602 | self._bn1 = self._batch_norm( 603 | axis=channel_axis, 604 | momentum=batch_norm_momentum, 605 | epsilon=batch_norm_epsilon) 606 | 607 | self._avg_pooling = tf.keras.layers.GlobalAveragePooling2D( 608 | data_format=self._global_params.data_format) 609 | if self._global_params.num_classes: 610 | self._fc = tf.layers.Dense( 611 | self._global_params.num_classes, 612 | kernel_initializer=dense_kernel_initializer) 613 | else: 614 | self._fc = None 615 | 616 | if self._global_params.dropout_rate > 0: 617 | self._dropout = tf.keras.layers.Dropout(self._global_params.dropout_rate) 618 | else: 619 | self._dropout = None 620 | 621 | def call(self, 622 | inputs, 623 | training=True, 624 | features_only=None, 625 | pooled_features_only=False): 626 | """Implementation of call(). 627 | 628 | Args: 629 | inputs: input tensors. 630 | training: boolean, whether the model is constructed for training. 631 | features_only: build the base feature network only. 632 | pooled_features_only: build the base network for features extraction 633 | (after 1x1 conv layer and global pooling, but before dropout and fc 634 | head). 635 | 636 | Returns: 637 | output tensors. 638 | """ 639 | outputs = None 640 | self.endpoints = {} 641 | reduction_idx = 0 642 | # Calls Stem layers 643 | with tf.variable_scope('stem'): 644 | outputs = self._relu_fn( 645 | self._bn0(self._conv_stem(inputs), training=training)) 646 | logging.info('Built stem layers with output shape: %s', outputs.shape) 647 | self.endpoints['stem'] = outputs 648 | 649 | # Calls blocks. 650 | for idx, block in enumerate(self._blocks): 651 | is_reduction = False # reduction flag for blocks after the stem layer 652 | # If the first block has super-pixel (space-to-depth) layer, then stem is 653 | # the first reduction point. 654 | if (block.block_args().super_pixel == 1 and idx == 0): 655 | reduction_idx += 1 656 | self.endpoints['reduction_%s' % reduction_idx] = outputs 657 | 658 | elif ((idx == len(self._blocks) - 1) or 659 | self._blocks[idx + 1].block_args().strides[0] > 1): 660 | is_reduction = True 661 | reduction_idx += 1 662 | 663 | with tf.variable_scope('blocks_%s' % idx): 664 | survival_prob = self._global_params.survival_prob 665 | if survival_prob: 666 | drop_rate = 1.0 - survival_prob 667 | survival_prob = 1.0 - drop_rate * float(idx) / len(self._blocks) 668 | logging.info('block_%s survival_prob: %s', idx, survival_prob) 669 | outputs = block.call( 670 | outputs, training=training, survival_prob=survival_prob) 671 | self.endpoints['block_%s' % idx] = outputs 672 | if is_reduction: 673 | self.endpoints['reduction_%s' % reduction_idx] = outputs 674 | if block.endpoints: 675 | for k, v in six.iteritems(block.endpoints): 676 | self.endpoints['block_%s/%s' % (idx, k)] = v 677 | if is_reduction: 678 | self.endpoints['reduction_%s/%s' % (reduction_idx, k)] = v 679 | self.endpoints['features'] = outputs 680 | 681 | if not features_only: 682 | # Calls final layers and returns logits. 683 | with tf.variable_scope('head'): 684 | outputs = self._relu_fn( 685 | self._bn1(self._conv_head(outputs), training=training)) 686 | self.endpoints['head_1x1'] = outputs 687 | 688 | if self._global_params.local_pooling: 689 | shape = outputs.get_shape().as_list() 690 | kernel_size = [ 691 | 1, shape[self._spatial_dims[0]], shape[self._spatial_dims[1]], 1] 692 | outputs = tf.nn.avg_pool( 693 | outputs, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID') 694 | self.endpoints['pooled_features'] = outputs 695 | if not pooled_features_only: 696 | if self._dropout: 697 | outputs = self._dropout(outputs, training=training) 698 | self.endpoints['global_pool'] = outputs 699 | if self._fc: 700 | outputs = tf.squeeze(outputs, self._spatial_dims) 701 | outputs = self._fc(outputs) 702 | self.endpoints['head'] = outputs 703 | else: 704 | outputs = self._avg_pooling(outputs) 705 | self.endpoints['pooled_features'] = outputs 706 | if not pooled_features_only: 707 | if self._dropout: 708 | outputs = self._dropout(outputs, training=training) 709 | self.endpoints['global_pool'] = outputs 710 | if self._fc: 711 | outputs = self._fc(outputs) 712 | self.endpoints['head'] = outputs 713 | return outputs 714 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/original_tf/eval_ckpt_main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Eval checkpoint driver. 16 | 17 | This is an example evaluation script for users to understand the EfficientNet 18 | model checkpoints on CPU. To serve EfficientNet, please consider to export a 19 | `SavedModel` from checkpoints and use tf-serving to serve. 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import json 27 | import sys 28 | from absl import app 29 | from absl import flags 30 | import numpy as np 31 | import tensorflow as tf 32 | 33 | 34 | import efficientnet_builder 35 | import preprocessing 36 | 37 | 38 | tf.compat.v1.disable_v2_behavior() 39 | 40 | flags.DEFINE_string('model_name', 'efficientnet-b0', 'Model name to eval.') 41 | flags.DEFINE_string('runmode', 'examples', 'Running mode: examples or imagenet') 42 | flags.DEFINE_string('imagenet_eval_glob', None, 43 | 'Imagenet eval image glob, ' 44 | 'such as /imagenet/ILSVRC2012*.JPEG') 45 | flags.DEFINE_string('imagenet_eval_label', None, 46 | 'Imagenet eval label file path, ' 47 | 'such as /imagenet/ILSVRC2012_validation_ground_truth.txt') 48 | flags.DEFINE_string('ckpt_dir', '/tmp/ckpt/', 'Checkpoint folders') 49 | flags.DEFINE_string('example_img', '/tmp/panda.jpg', 50 | 'Filepath for a single example image.') 51 | flags.DEFINE_string('labels_map_file', '/tmp/labels_map.txt', 52 | 'Labels map from label id to its meaning.') 53 | flags.DEFINE_integer('num_images', 5000, 54 | 'Number of images to eval. Use -1 to eval all images.') 55 | FLAGS = flags.FLAGS 56 | 57 | MEAN_RGB = [0.485 * 255, 0.456 * 255, 0.406 * 255] 58 | STDDEV_RGB = [0.229 * 255, 0.224 * 255, 0.225 * 255] 59 | 60 | 61 | class EvalCkptDriver(object): 62 | """A driver for running eval inference. 63 | 64 | Attributes: 65 | model_name: str. Model name to eval. 66 | batch_size: int. Eval batch size. 67 | num_classes: int. Number of classes, default to 1000 for ImageNet. 68 | image_size: int. Input image size, determined by model name. 69 | """ 70 | 71 | def __init__(self, model_name='efficientnet-b0', batch_size=1): 72 | """Initialize internal variables.""" 73 | self.model_name = model_name 74 | self.batch_size = batch_size 75 | self.num_classes = 1000 76 | # Model Scaling parameters 77 | _, _, self.image_size, _ = efficientnet_builder.efficientnet_params( 78 | model_name) 79 | 80 | def restore_model(self, sess, ckpt_dir): 81 | """Restore variables from checkpoint dir.""" 82 | checkpoint = tf.train.latest_checkpoint(ckpt_dir) 83 | ema = tf.train.ExponentialMovingAverage(decay=0.9999) 84 | ema_vars = tf.compat.v1.trainable_variables() + tf.compat.v1.get_collection('moving_vars') 85 | for v in tf.compat.v1.global_variables(): 86 | if 'moving_mean' in v.name or 'moving_variance' in v.name: 87 | ema_vars.append(v) 88 | ema_vars = list(set(ema_vars)) 89 | var_dict = ema.variables_to_restore(ema_vars) 90 | saver = tf.compat.v1.train.Saver(var_dict, max_to_keep=1) 91 | saver.restore(sess, checkpoint) 92 | 93 | def build_model(self, features, is_training): 94 | """Build model with input features.""" 95 | features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype) 96 | features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype) 97 | logits, _ = efficientnet_builder.build_model( 98 | features, self.model_name, is_training) 99 | probs = tf.nn.softmax(logits) 100 | probs = tf.squeeze(probs) 101 | return probs 102 | 103 | def build_dataset(self, filenames, labels, is_training): 104 | """Build input dataset.""" 105 | filenames = tf.constant(filenames) 106 | labels = tf.constant(labels) 107 | 108 | dataset = tf.compat.v1.data.Dataset.from_tensor_slices((filenames, labels)) 109 | 110 | def _parse_function(filename, label): 111 | image_string = tf.io.read_file(filename) 112 | image_decoded = preprocessing.preprocess_image( 113 | image_string, is_training, self.image_size) 114 | image = tf.cast(image_decoded, tf.float32) 115 | return image, label 116 | 117 | dataset = dataset.map(_parse_function) 118 | dataset = dataset.batch(self.batch_size) 119 | 120 | iterator = dataset.make_one_shot_iterator() 121 | #iterator = iter(dataset) 122 | images, labels = iterator.get_next() 123 | return images, labels 124 | 125 | def run_inference(self, ckpt_dir, image_files, labels): 126 | """Build and run inference on the target images and labels.""" 127 | with tf.Graph().as_default(), tf.Session() as sess: 128 | images, labels = self.build_dataset(image_files, labels, False) 129 | probs = self.build_model(images, is_training=False) 130 | 131 | sess.run(tf.global_variables_initializer()) 132 | self.restore_model(sess, ckpt_dir) 133 | 134 | prediction_idx = [] 135 | prediction_prob = [] 136 | for _ in range(len(image_files) // self.batch_size): 137 | out_probs = sess.run(probs) 138 | idx = np.argsort(out_probs)[::-1] 139 | prediction_idx.append(idx[:5]) 140 | prediction_prob.append([out_probs[pid] for pid in idx[:5]]) 141 | 142 | # Return the top 5 predictions (idx and prob) for each image. 143 | return prediction_idx, prediction_prob 144 | 145 | 146 | def eval_example_images(model_name, ckpt_dir, image_files, labels_map_file): 147 | """Eval a list of example images. 148 | 149 | Args: 150 | model_name: str. The name of model to eval. 151 | ckpt_dir: str. Checkpoint directory path. 152 | image_files: List[str]. A list of image file paths. 153 | labels_map_file: str. The labels map file path. 154 | 155 | Returns: 156 | A tuple (pred_idx, and pred_prob), where pred_idx is the top 5 prediction 157 | index and pred_prob is the top 5 prediction probability. 158 | """ 159 | eval_ckpt_driver = EvalCkptDriver(model_name) 160 | classes = json.loads(tf.gfile.Open(labels_map_file).read()) 161 | pred_idx, pred_prob = eval_ckpt_driver.run_inference( 162 | ckpt_dir, image_files, [0] * len(image_files)) 163 | for i in range(len(image_files)): 164 | print('predicted class for image {}: '.format(image_files[i])) 165 | for j, idx in enumerate(pred_idx[i]): 166 | print(' -> top_{} ({:4.2f}%): {} '.format( 167 | j, pred_prob[i][j] * 100, classes[str(idx)])) 168 | return pred_idx, pred_prob 169 | 170 | 171 | def eval_imagenet(model_name, 172 | ckpt_dir, 173 | imagenet_eval_glob, 174 | imagenet_eval_label, 175 | num_images): 176 | """Eval ImageNet images and report top1/top5 accuracy. 177 | 178 | Args: 179 | model_name: str. The name of model to eval. 180 | ckpt_dir: str. Checkpoint directory path. 181 | imagenet_eval_glob: str. File path glob for all eval images. 182 | imagenet_eval_label: str. File path for eval label. 183 | num_images: int. Number of images to eval: -1 means eval the whole dataset. 184 | 185 | Returns: 186 | A tuple (top1, top5) for top1 and top5 accuracy. 187 | """ 188 | eval_ckpt_driver = EvalCkptDriver(model_name) 189 | imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)] 190 | imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob)) 191 | if num_images < 0: 192 | num_images = len(imagenet_filenames) 193 | image_files = imagenet_filenames[:num_images] 194 | labels = imagenet_val_labels[:num_images] 195 | 196 | pred_idx, _ = eval_ckpt_driver.run_inference(ckpt_dir, image_files, labels) 197 | top1_cnt, top5_cnt = 0.0, 0.0 198 | for i, label in enumerate(labels): 199 | top1_cnt += label in pred_idx[i][:1] 200 | top5_cnt += label in pred_idx[i][:5] 201 | if i % 100 == 0: 202 | print('Step {}: top1_acc = {:4.2f}% top5_acc = {:4.2f}%'.format( 203 | i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1))) 204 | sys.stdout.flush() 205 | top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images 206 | print('Final: top1_acc = {:4.2f}% top5_acc = {:4.2f}%'.format(top1, top5)) 207 | return top1, top5 208 | 209 | 210 | def main(unused_argv): 211 | tf.logging.set_verbosity(tf.logging.ERROR) 212 | if FLAGS.runmode == 'examples': 213 | # Run inference for an example image. 214 | eval_example_images(FLAGS.model_name, FLAGS.ckpt_dir, [FLAGS.example_img], 215 | FLAGS.labels_map_file) 216 | elif FLAGS.runmode == 'imagenet': 217 | # Run inference for imagenet. 218 | eval_imagenet(FLAGS.model_name, FLAGS.ckpt_dir, FLAGS.imagenet_eval_glob, 219 | FLAGS.imagenet_eval_label, FLAGS.num_images) 220 | else: 221 | print('must specify runmode: examples or imagenet') 222 | 223 | 224 | if __name__ == '__main__': 225 | app.run(main) 226 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/original_tf/eval_ckpt_main_tf1.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Eval checkpoint driver. 16 | 17 | This is an example evaluation script for users to understand the EfficientNet 18 | model checkpoints on CPU. To serve EfficientNet, please consider to export a 19 | `SavedModel` from checkpoints and use tf-serving to serve. 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import json 27 | import sys 28 | from absl import app 29 | from absl import flags 30 | import numpy as np 31 | import tensorflow as tf 32 | 33 | 34 | import efficientnet_builder 35 | import preprocessing 36 | 37 | 38 | flags.DEFINE_string('model_name', 'efficientnet-b0', 'Model name to eval.') 39 | flags.DEFINE_string('runmode', 'examples', 'Running mode: examples or imagenet') 40 | flags.DEFINE_string('imagenet_eval_glob', None, 41 | 'Imagenet eval image glob, ' 42 | 'such as /imagenet/ILSVRC2012*.JPEG') 43 | flags.DEFINE_string('imagenet_eval_label', None, 44 | 'Imagenet eval label file path, ' 45 | 'such as /imagenet/ILSVRC2012_validation_ground_truth.txt') 46 | flags.DEFINE_string('ckpt_dir', '/tmp/ckpt/', 'Checkpoint folders') 47 | flags.DEFINE_string('example_img', '/tmp/panda.jpg', 48 | 'Filepath for a single example image.') 49 | flags.DEFINE_string('labels_map_file', '/tmp/labels_map.txt', 50 | 'Labels map from label id to its meaning.') 51 | flags.DEFINE_integer('num_images', 5000, 52 | 'Number of images to eval. Use -1 to eval all images.') 53 | FLAGS = flags.FLAGS 54 | 55 | MEAN_RGB = [0.485 * 255, 0.456 * 255, 0.406 * 255] 56 | STDDEV_RGB = [0.229 * 255, 0.224 * 255, 0.225 * 255] 57 | 58 | 59 | class EvalCkptDriver(object): 60 | """A driver for running eval inference. 61 | 62 | Attributes: 63 | model_name: str. Model name to eval. 64 | batch_size: int. Eval batch size. 65 | num_classes: int. Number of classes, default to 1000 for ImageNet. 66 | image_size: int. Input image size, determined by model name. 67 | """ 68 | 69 | def __init__(self, model_name='efficientnet-b0', batch_size=1): 70 | """Initialize internal variables.""" 71 | self.model_name = model_name 72 | self.batch_size = batch_size 73 | self.num_classes = 1000 74 | # Model Scaling parameters 75 | _, _, self.image_size, _ = efficientnet_builder.efficientnet_params( 76 | model_name) 77 | 78 | def restore_model(self, sess, ckpt_dir): 79 | """Restore variables from checkpoint dir.""" 80 | checkpoint = tf.train.latest_checkpoint(ckpt_dir) 81 | ema = tf.train.ExponentialMovingAverage(decay=0.9999) 82 | ema_vars = tf.trainable_variables() + tf.get_collection('moving_vars') 83 | for v in tf.global_variables(): 84 | if 'moving_mean' in v.name or 'moving_variance' in v.name: 85 | ema_vars.append(v) 86 | ema_vars = list(set(ema_vars)) 87 | var_dict = ema.variables_to_restore(ema_vars) 88 | saver = tf.train.Saver(var_dict, max_to_keep=1) 89 | saver.restore(sess, checkpoint) 90 | 91 | def build_model(self, features, is_training): 92 | """Build model with input features.""" 93 | features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype) 94 | features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype) 95 | logits, _ = efficientnet_builder.build_model( 96 | features, self.model_name, is_training) 97 | probs = tf.nn.softmax(logits) 98 | probs = tf.squeeze(probs) 99 | return probs 100 | 101 | def build_dataset(self, filenames, labels, is_training): 102 | """Build input dataset.""" 103 | filenames = tf.constant(filenames) 104 | labels = tf.constant(labels) 105 | dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) 106 | 107 | def _parse_function(filename, label): 108 | image_string = tf.read_file(filename) 109 | image_decoded = preprocessing.preprocess_image( 110 | image_string, is_training, self.image_size) 111 | image = tf.cast(image_decoded, tf.float32) 112 | return image, label 113 | 114 | dataset = dataset.map(_parse_function) 115 | dataset = dataset.batch(self.batch_size) 116 | 117 | iterator = dataset.make_one_shot_iterator() 118 | images, labels = iterator.get_next() 119 | return images, labels 120 | 121 | def run_inference(self, ckpt_dir, image_files, labels): 122 | """Build and run inference on the target images and labels.""" 123 | with tf.Graph().as_default(), tf.Session() as sess: 124 | images, labels = self.build_dataset(image_files, labels, False) 125 | probs = self.build_model(images, is_training=False) 126 | 127 | sess.run(tf.global_variables_initializer()) 128 | self.restore_model(sess, ckpt_dir) 129 | 130 | prediction_idx = [] 131 | prediction_prob = [] 132 | for _ in range(len(image_files) // self.batch_size): 133 | out_probs = sess.run(probs) 134 | idx = np.argsort(out_probs)[::-1] 135 | prediction_idx.append(idx[:5]) 136 | prediction_prob.append([out_probs[pid] for pid in idx[:5]]) 137 | 138 | # Return the top 5 predictions (idx and prob) for each image. 139 | return prediction_idx, prediction_prob 140 | 141 | 142 | def eval_example_images(model_name, ckpt_dir, image_files, labels_map_file): 143 | """Eval a list of example images. 144 | 145 | Args: 146 | model_name: str. The name of model to eval. 147 | ckpt_dir: str. Checkpoint directory path. 148 | image_files: List[str]. A list of image file paths. 149 | labels_map_file: str. The labels map file path. 150 | 151 | Returns: 152 | A tuple (pred_idx, and pred_prob), where pred_idx is the top 5 prediction 153 | index and pred_prob is the top 5 prediction probability. 154 | """ 155 | eval_ckpt_driver = EvalCkptDriver(model_name) 156 | classes = json.loads(tf.gfile.Open(labels_map_file).read()) 157 | pred_idx, pred_prob = eval_ckpt_driver.run_inference( 158 | ckpt_dir, image_files, [0] * len(image_files)) 159 | for i in range(len(image_files)): 160 | print('predicted class for image {}: '.format(image_files[i])) 161 | for j, idx in enumerate(pred_idx[i]): 162 | print(' -> top_{} ({:4.2f}%): {} '.format( 163 | j, pred_prob[i][j] * 100, classes[str(idx)])) 164 | return pred_idx, pred_prob 165 | 166 | 167 | def eval_imagenet(model_name, 168 | ckpt_dir, 169 | imagenet_eval_glob, 170 | imagenet_eval_label, 171 | num_images): 172 | """Eval ImageNet images and report top1/top5 accuracy. 173 | 174 | Args: 175 | model_name: str. The name of model to eval. 176 | ckpt_dir: str. Checkpoint directory path. 177 | imagenet_eval_glob: str. File path glob for all eval images. 178 | imagenet_eval_label: str. File path for eval label. 179 | num_images: int. Number of images to eval: -1 means eval the whole dataset. 180 | 181 | Returns: 182 | A tuple (top1, top5) for top1 and top5 accuracy. 183 | """ 184 | eval_ckpt_driver = EvalCkptDriver(model_name) 185 | imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)] 186 | imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob)) 187 | if num_images < 0: 188 | num_images = len(imagenet_filenames) 189 | image_files = imagenet_filenames[:num_images] 190 | labels = imagenet_val_labels[:num_images] 191 | 192 | pred_idx, _ = eval_ckpt_driver.run_inference(ckpt_dir, image_files, labels) 193 | top1_cnt, top5_cnt = 0.0, 0.0 194 | for i, label in enumerate(labels): 195 | top1_cnt += label in pred_idx[i][:1] 196 | top5_cnt += label in pred_idx[i][:5] 197 | if i % 100 == 0: 198 | print('Step {}: top1_acc = {:4.2f}% top5_acc = {:4.2f}%'.format( 199 | i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1))) 200 | sys.stdout.flush() 201 | top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images 202 | print('Final: top1_acc = {:4.2f}% top5_acc = {:4.2f}%'.format(top1, top5)) 203 | return top1, top5 204 | 205 | 206 | def main(unused_argv): 207 | tf.logging.set_verbosity(tf.logging.ERROR) 208 | if FLAGS.runmode == 'examples': 209 | # Run inference for an example image. 210 | eval_example_images(FLAGS.model_name, FLAGS.ckpt_dir, [FLAGS.example_img], 211 | FLAGS.labels_map_file) 212 | elif FLAGS.runmode == 'imagenet': 213 | # Run inference for imagenet. 214 | eval_imagenet(FLAGS.model_name, FLAGS.ckpt_dir, FLAGS.imagenet_eval_glob, 215 | FLAGS.imagenet_eval_label, FLAGS.num_images) 216 | else: 217 | print('must specify runmode: examples or imagenet') 218 | 219 | 220 | if __name__ == '__main__': 221 | app.run(main) 222 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/original_tf/preprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """ImageNet preprocessing.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from absl import logging 21 | 22 | import tensorflow.compat.v1 as tf 23 | 24 | 25 | IMAGE_SIZE = 224 26 | CROP_PADDING = 32 27 | 28 | 29 | def distorted_bounding_box_crop(image_bytes, 30 | bbox, 31 | min_object_covered=0.1, 32 | aspect_ratio_range=(0.75, 1.33), 33 | area_range=(0.05, 1.0), 34 | max_attempts=100, 35 | scope=None): 36 | """Generates cropped_image using one of the bboxes randomly distorted. 37 | 38 | See `tf.image.sample_distorted_bounding_box` for more documentation. 39 | 40 | Args: 41 | image_bytes: `Tensor` of binary image data. 42 | bbox: `Tensor` of bounding boxes arranged `[1, num_boxes, coords]` 43 | where each coordinate is [0, 1) and the coordinates are arranged 44 | as `[ymin, xmin, ymax, xmax]`. If num_boxes is 0 then use the whole 45 | image. 46 | min_object_covered: An optional `float`. Defaults to `0.1`. The cropped 47 | area of the image must contain at least this fraction of any bounding 48 | box supplied. 49 | aspect_ratio_range: An optional list of `float`s. The cropped area of the 50 | image must have an aspect ratio = width / height within this range. 51 | area_range: An optional list of `float`s. The cropped area of the image 52 | must contain a fraction of the supplied image within in this range. 53 | max_attempts: An optional `int`. Number of attempts at generating a cropped 54 | region of the image of the specified constraints. After `max_attempts` 55 | failures, return the entire image. 56 | scope: Optional `str` for name scope. 57 | Returns: 58 | cropped image `Tensor` 59 | """ 60 | with tf.name_scope(scope, 'distorted_bounding_box_crop', [image_bytes, bbox]): 61 | shape = tf.image.extract_jpeg_shape(image_bytes) 62 | sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( 63 | shape, 64 | bounding_boxes=bbox, 65 | min_object_covered=min_object_covered, 66 | aspect_ratio_range=aspect_ratio_range, 67 | area_range=area_range, 68 | max_attempts=max_attempts, 69 | use_image_if_no_bounding_boxes=True) 70 | bbox_begin, bbox_size, _ = sample_distorted_bounding_box 71 | 72 | # Crop the image to the specified bounding box. 73 | offset_y, offset_x, _ = tf.unstack(bbox_begin) 74 | target_height, target_width, _ = tf.unstack(bbox_size) 75 | crop_window = tf.stack([offset_y, offset_x, target_height, target_width]) 76 | image = tf.image.decode_and_crop_jpeg(image_bytes, crop_window, channels=3) 77 | 78 | return image 79 | 80 | 81 | def _at_least_x_are_equal(a, b, x): 82 | """At least `x` of `a` and `b` `Tensors` are equal.""" 83 | match = tf.equal(a, b) 84 | match = tf.cast(match, tf.int32) 85 | return tf.greater_equal(tf.reduce_sum(match), x) 86 | 87 | 88 | def _decode_and_random_crop(image_bytes, image_size): 89 | """Make a random crop of image_size.""" 90 | bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 91 | image = distorted_bounding_box_crop( 92 | image_bytes, 93 | bbox, 94 | min_object_covered=0.1, 95 | aspect_ratio_range=(3. / 4, 4. / 3.), 96 | area_range=(0.08, 1.0), 97 | max_attempts=10, 98 | scope=None) 99 | original_shape = tf.image.extract_jpeg_shape(image_bytes) 100 | bad = _at_least_x_are_equal(original_shape, tf.shape(image), 3) 101 | 102 | image = tf.cond( 103 | bad, 104 | lambda: _decode_and_center_crop(image_bytes, image_size), 105 | lambda: tf.image.resize_bicubic([image], # pylint: disable=g-long-lambda 106 | [image_size, image_size])[0]) 107 | 108 | return image 109 | 110 | 111 | def _decode_and_center_crop(image_bytes, image_size): 112 | """Crops to center of image with padding then scales image_size.""" 113 | shape = tf.image.extract_jpeg_shape(image_bytes) 114 | image_height = shape[0] 115 | image_width = shape[1] 116 | 117 | padded_center_crop_size = tf.cast( 118 | ((image_size / (image_size + CROP_PADDING)) * 119 | tf.cast(tf.minimum(image_height, image_width), tf.float32)), 120 | tf.int32) 121 | 122 | offset_height = ((image_height - padded_center_crop_size) + 1) // 2 123 | offset_width = ((image_width - padded_center_crop_size) + 1) // 2 124 | crop_window = tf.stack([offset_height, offset_width, 125 | padded_center_crop_size, padded_center_crop_size]) 126 | image = tf.image.decode_and_crop_jpeg(image_bytes, crop_window, channels=3) 127 | image = tf.image.resize_bicubic([image], [image_size, image_size])[0] 128 | return image 129 | 130 | 131 | def _flip(image): 132 | """Random horizontal image flip.""" 133 | image = tf.image.random_flip_left_right(image) 134 | return image 135 | 136 | 137 | def preprocess_for_train(image_bytes, use_bfloat16, image_size=IMAGE_SIZE, 138 | augment_name=None, 139 | randaug_num_layers=None, randaug_magnitude=None): 140 | """Preprocesses the given image for evaluation. 141 | 142 | Args: 143 | image_bytes: `Tensor` representing an image binary of arbitrary size. 144 | use_bfloat16: `bool` for whether to use bfloat16. 145 | image_size: image size. 146 | augment_name: `string` that is the name of the augmentation method 147 | to apply to the image. `autoaugment` if AutoAugment is to be used or 148 | `randaugment` if RandAugment is to be used. If the value is `None` no 149 | augmentation method will be applied applied. See autoaugment.py for more 150 | details. 151 | randaug_num_layers: 'int', if RandAug is used, what should the number of 152 | layers be. See autoaugment.py for detailed description. 153 | randaug_magnitude: 'int', if RandAug is used, what should the magnitude 154 | be. See autoaugment.py for detailed description. 155 | 156 | Returns: 157 | A preprocessed image `Tensor`. 158 | """ 159 | image = _decode_and_random_crop(image_bytes, image_size) 160 | image = _flip(image) 161 | image = tf.reshape(image, [image_size, image_size, 3]) 162 | 163 | image = tf.image.convert_image_dtype( 164 | image, dtype=tf.bfloat16 if use_bfloat16 else tf.float32) 165 | 166 | if augment_name: 167 | try: 168 | import autoaugment # pylint: disable=g-import-not-at-top 169 | except ImportError as e: 170 | logging.exception('Autoaugment is not supported in TF 2.x.') 171 | raise e 172 | 173 | logging.info('Apply AutoAugment policy %s', augment_name) 174 | input_image_type = image.dtype 175 | image = tf.clip_by_value(image, 0.0, 255.0) 176 | image = tf.cast(image, dtype=tf.uint8) 177 | 178 | if augment_name == 'autoaugment': 179 | logging.info('Apply AutoAugment policy %s', augment_name) 180 | image = autoaugment.distort_image_with_autoaugment(image, 'v0') 181 | elif augment_name == 'randaugment': 182 | image = autoaugment.distort_image_with_randaugment( 183 | image, randaug_num_layers, randaug_magnitude) 184 | else: 185 | raise ValueError('Invalid value for augment_name: %s' % (augment_name)) 186 | 187 | image = tf.cast(image, dtype=input_image_type) 188 | return image 189 | 190 | 191 | def preprocess_for_eval(image_bytes, use_bfloat16, image_size=IMAGE_SIZE): 192 | """Preprocesses the given image for evaluation. 193 | 194 | Args: 195 | image_bytes: `Tensor` representing an image binary of arbitrary size. 196 | use_bfloat16: `bool` for whether to use bfloat16. 197 | image_size: image size. 198 | 199 | Returns: 200 | A preprocessed image `Tensor`. 201 | """ 202 | image = _decode_and_center_crop(image_bytes, image_size) 203 | image = tf.reshape(image, [image_size, image_size, 3]) 204 | image = tf.image.convert_image_dtype( 205 | image, dtype=tf.bfloat16 if use_bfloat16 else tf.float32) 206 | return image 207 | 208 | 209 | def preprocess_image(image_bytes, 210 | is_training=False, 211 | use_bfloat16=False, 212 | image_size=IMAGE_SIZE, 213 | augment_name=None, 214 | randaug_num_layers=None, 215 | randaug_magnitude=None): 216 | """Preprocesses the given image. 217 | 218 | Args: 219 | image_bytes: `Tensor` representing an image binary of arbitrary size. 220 | is_training: `bool` for whether the preprocessing is for training. 221 | use_bfloat16: `bool` for whether to use bfloat16. 222 | image_size: image size. 223 | augment_name: `string` that is the name of the augmentation method 224 | to apply to the image. `autoaugment` if AutoAugment is to be used or 225 | `randaugment` if RandAugment is to be used. If the value is `None` no 226 | augmentation method will be applied applied. See autoaugment.py for more 227 | details. 228 | randaug_num_layers: 'int', if RandAug is used, what should the number of 229 | layers be. See autoaugment.py for detailed description. 230 | randaug_magnitude: 'int', if RandAug is used, what should the magnitude 231 | be. See autoaugment.py for detailed description. 232 | 233 | Returns: 234 | A preprocessed image `Tensor` with value range of [0, 255]. 235 | """ 236 | if is_training: 237 | return preprocess_for_train( 238 | image_bytes, use_bfloat16, image_size, augment_name, 239 | randaug_num_layers, randaug_magnitude) 240 | else: 241 | return preprocess_for_eval(image_bytes, use_bfloat16, image_size) 242 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/original_tf/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Model utilities.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import json 22 | import os 23 | import sys 24 | 25 | from absl import logging 26 | import numpy as np 27 | import tensorflow.compat.v1 as tf 28 | 29 | from tensorflow.python.tpu import tpu_function # pylint:disable=g-direct-tensorflow-import 30 | 31 | 32 | def build_learning_rate(initial_lr, 33 | global_step, 34 | steps_per_epoch=None, 35 | lr_decay_type='exponential', 36 | decay_factor=0.97, 37 | decay_epochs=2.4, 38 | total_steps=None, 39 | warmup_epochs=5): 40 | """Build learning rate.""" 41 | if lr_decay_type == 'exponential': 42 | assert steps_per_epoch is not None 43 | decay_steps = steps_per_epoch * decay_epochs 44 | lr = tf.train.exponential_decay( 45 | initial_lr, global_step, decay_steps, decay_factor, staircase=True) 46 | elif lr_decay_type == 'cosine': 47 | assert total_steps is not None 48 | lr = 0.5 * initial_lr * ( 49 | 1 + tf.cos(np.pi * tf.cast(global_step, tf.float32) / total_steps)) 50 | elif lr_decay_type == 'constant': 51 | lr = initial_lr 52 | else: 53 | assert False, 'Unknown lr_decay_type : %s' % lr_decay_type 54 | 55 | if warmup_epochs: 56 | logging.info('Learning rate warmup_epochs: %d', warmup_epochs) 57 | warmup_steps = int(warmup_epochs * steps_per_epoch) 58 | warmup_lr = ( 59 | initial_lr * tf.cast(global_step, tf.float32) / tf.cast( 60 | warmup_steps, tf.float32)) 61 | lr = tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr) 62 | 63 | return lr 64 | 65 | 66 | def build_optimizer(learning_rate, 67 | optimizer_name='rmsprop', 68 | decay=0.9, 69 | epsilon=0.001, 70 | momentum=0.9): 71 | """Build optimizer.""" 72 | if optimizer_name == 'sgd': 73 | logging.info('Using SGD optimizer') 74 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) 75 | elif optimizer_name == 'momentum': 76 | logging.info('Using Momentum optimizer') 77 | optimizer = tf.train.MomentumOptimizer( 78 | learning_rate=learning_rate, momentum=momentum) 79 | elif optimizer_name == 'rmsprop': 80 | logging.info('Using RMSProp optimizer') 81 | optimizer = tf.train.RMSPropOptimizer(learning_rate, decay, momentum, 82 | epsilon) 83 | else: 84 | logging.fatal('Unknown optimizer: %s', optimizer_name) 85 | 86 | return optimizer 87 | 88 | 89 | class TpuBatchNormalization(tf.layers.BatchNormalization): 90 | # class TpuBatchNormalization(tf.layers.BatchNormalization): 91 | """Cross replica batch normalization.""" 92 | 93 | def __init__(self, fused=False, **kwargs): 94 | if fused in (True, None): 95 | raise ValueError('TpuBatchNormalization does not support fused=True.') 96 | super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs) 97 | 98 | def _cross_replica_average(self, t, num_shards_per_group): 99 | """Calculates the average value of input tensor across TPU replicas.""" 100 | num_shards = tpu_function.get_tpu_context().number_of_shards 101 | group_assignment = None 102 | if num_shards_per_group > 1: 103 | if num_shards % num_shards_per_group != 0: 104 | raise ValueError('num_shards: %d mod shards_per_group: %d, should be 0' 105 | % (num_shards, num_shards_per_group)) 106 | num_groups = num_shards // num_shards_per_group 107 | group_assignment = [[ 108 | x for x in range(num_shards) if x // num_shards_per_group == y 109 | ] for y in range(num_groups)] 110 | return tf.tpu.cross_replica_sum(t, group_assignment) / tf.cast( 111 | num_shards_per_group, t.dtype) 112 | 113 | def _moments(self, inputs, reduction_axes, keep_dims): 114 | """Compute the mean and variance: it overrides the original _moments.""" 115 | shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments( 116 | inputs, reduction_axes, keep_dims=keep_dims) 117 | 118 | num_shards = tpu_function.get_tpu_context().number_of_shards or 1 119 | if num_shards <= 8: # Skip cross_replica for 2x2 or smaller slices. 120 | num_shards_per_group = 1 121 | else: 122 | num_shards_per_group = max(8, num_shards // 8) 123 | logging.info('TpuBatchNormalization with num_shards_per_group %s', 124 | num_shards_per_group) 125 | if num_shards_per_group > 1: 126 | # Compute variance using: Var[X]= E[X^2] - E[X]^2. 127 | shard_square_of_mean = tf.math.square(shard_mean) 128 | shard_mean_of_square = shard_variance + shard_square_of_mean 129 | group_mean = self._cross_replica_average( 130 | shard_mean, num_shards_per_group) 131 | group_mean_of_square = self._cross_replica_average( 132 | shard_mean_of_square, num_shards_per_group) 133 | group_variance = group_mean_of_square - tf.math.square(group_mean) 134 | return (group_mean, group_variance) 135 | else: 136 | return (shard_mean, shard_variance) 137 | 138 | 139 | class BatchNormalization(tf.layers.BatchNormalization): 140 | """Fixed default name of BatchNormalization to match TpuBatchNormalization.""" 141 | 142 | def __init__(self, name='tpu_batch_normalization', **kwargs): 143 | super(BatchNormalization, self).__init__(name=name, **kwargs) 144 | 145 | 146 | def drop_connect(inputs, is_training, survival_prob): 147 | """Drop the entire conv with given survival probability.""" 148 | # "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf 149 | if not is_training: 150 | return inputs 151 | 152 | # Compute tensor. 153 | batch_size = tf.shape(inputs)[0] 154 | random_tensor = survival_prob 155 | random_tensor += tf.random_uniform([batch_size, 1, 1, 1], dtype=inputs.dtype) 156 | binary_tensor = tf.floor(random_tensor) 157 | # Unlike conventional way that multiply survival_prob at test time, here we 158 | # divide survival_prob at training time, such that no addition compute is 159 | # needed at test time. 160 | output = tf.div(inputs, survival_prob) * binary_tensor 161 | return output 162 | 163 | 164 | def archive_ckpt(ckpt_eval, ckpt_objective, ckpt_path): 165 | """Archive a checkpoint if the metric is better.""" 166 | ckpt_dir, ckpt_name = os.path.split(ckpt_path) 167 | 168 | saved_objective_path = os.path.join(ckpt_dir, 'best_objective.txt') 169 | saved_objective = float('-inf') 170 | if tf.gfile.Exists(saved_objective_path): 171 | with tf.gfile.GFile(saved_objective_path, 'r') as f: 172 | saved_objective = float(f.read()) 173 | if saved_objective > ckpt_objective: 174 | logging.info('Ckpt %s is worse than %s', ckpt_objective, saved_objective) 175 | return False 176 | 177 | filenames = tf.gfile.Glob(ckpt_path + '.*') 178 | if filenames is None: 179 | logging.info('No files to copy for checkpoint %s', ckpt_path) 180 | return False 181 | 182 | # Clear the old folder. 183 | dst_dir = os.path.join(ckpt_dir, 'archive') 184 | if tf.gfile.Exists(dst_dir): 185 | tf.gfile.DeleteRecursively(dst_dir) 186 | tf.gfile.MakeDirs(dst_dir) 187 | 188 | # Write checkpoints. 189 | for f in filenames: 190 | dest = os.path.join(dst_dir, os.path.basename(f)) 191 | tf.gfile.Copy(f, dest, overwrite=True) 192 | ckpt_state = tf.train.generate_checkpoint_state_proto( 193 | dst_dir, 194 | model_checkpoint_path=ckpt_name, 195 | all_model_checkpoint_paths=[ckpt_name]) 196 | with tf.gfile.GFile(os.path.join(dst_dir, 'checkpoint'), 'w') as f: 197 | f.write(str(ckpt_state)) 198 | with tf.gfile.GFile(os.path.join(dst_dir, 'best_eval.txt'), 'w') as f: 199 | f.write('%s' % ckpt_eval) 200 | 201 | # Update the best objective. 202 | with tf.gfile.GFile(saved_objective_path, 'w') as f: 203 | f.write('%f' % ckpt_objective) 204 | 205 | logging.info('Copying checkpoint %s to %s', ckpt_path, dst_dir) 206 | return True 207 | 208 | 209 | def get_ema_vars(): 210 | """Get all exponential moving average (ema) variables.""" 211 | ema_vars = tf.trainable_variables() + tf.get_collection('moving_vars') 212 | for v in tf.global_variables(): 213 | # We maintain mva for batch norm moving mean and variance as well. 214 | if 'moving_mean' in v.name or 'moving_variance' in v.name: 215 | ema_vars.append(v) 216 | return list(set(ema_vars)) 217 | 218 | 219 | class DepthwiseConv2D(tf.keras.layers.DepthwiseConv2D, tf.layers.Layer): 220 | """Wrap keras DepthwiseConv2D to tf.layers.""" 221 | 222 | pass 223 | 224 | 225 | class EvalCkptDriver(object): 226 | """A driver for running eval inference. 227 | 228 | Attributes: 229 | model_name: str. Model name to eval. 230 | batch_size: int. Eval batch size. 231 | image_size: int. Input image size, determined by model name. 232 | num_classes: int. Number of classes, default to 1000 for ImageNet. 233 | include_background_label: whether to include extra background label. 234 | """ 235 | 236 | def __init__(self, 237 | model_name, 238 | batch_size=1, 239 | image_size=224, 240 | num_classes=1000, 241 | include_background_label=False): 242 | """Initialize internal variables.""" 243 | self.model_name = model_name 244 | self.batch_size = batch_size 245 | self.num_classes = num_classes 246 | self.include_background_label = include_background_label 247 | self.image_size = image_size 248 | 249 | def restore_model(self, sess, ckpt_dir, enable_ema=True, export_ckpt=None): 250 | """Restore variables from checkpoint dir.""" 251 | sess.run(tf.global_variables_initializer()) 252 | checkpoint = tf.train.latest_checkpoint(ckpt_dir) 253 | if enable_ema: 254 | ema = tf.train.ExponentialMovingAverage(decay=0.0) 255 | ema_vars = get_ema_vars() 256 | var_dict = ema.variables_to_restore(ema_vars) 257 | ema_assign_op = ema.apply(ema_vars) 258 | else: 259 | var_dict = get_ema_vars() 260 | ema_assign_op = None 261 | 262 | tf.train.get_or_create_global_step() 263 | sess.run(tf.global_variables_initializer()) 264 | saver = tf.train.Saver(var_dict, max_to_keep=1) 265 | saver.restore(sess, checkpoint) 266 | 267 | if export_ckpt: 268 | if ema_assign_op is not None: 269 | sess.run(ema_assign_op) 270 | saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) 271 | saver.save(sess, export_ckpt) 272 | 273 | def build_model(self, features, is_training): 274 | """Build model with input features.""" 275 | del features, is_training 276 | raise ValueError('Must be implemented by subclasses.') 277 | 278 | def get_preprocess_fn(self): 279 | raise ValueError('Must be implemented by subclsses.') 280 | 281 | def build_dataset(self, filenames, labels, is_training): 282 | """Build input dataset.""" 283 | batch_drop_remainder = False 284 | if 'condconv' in self.model_name and not is_training: 285 | # CondConv layers can only be called with known batch dimension. Thus, we 286 | # must drop all remaining examples that do not make up one full batch. 287 | # To ensure all examples are evaluated, use a batch size that evenly 288 | # divides the number of files. 289 | batch_drop_remainder = True 290 | num_files = len(filenames) 291 | if num_files % self.batch_size != 0: 292 | tf.logging.warn('Remaining examples in last batch are not being ' 293 | 'evaluated.') 294 | filenames = tf.constant(filenames) 295 | labels = tf.constant(labels) 296 | dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) 297 | 298 | def _parse_function(filename, label): 299 | image_string = tf.read_file(filename) 300 | preprocess_fn = self.get_preprocess_fn() 301 | image_decoded = preprocess_fn( 302 | image_string, is_training, image_size=self.image_size) 303 | image = tf.cast(image_decoded, tf.float32) 304 | return image, label 305 | 306 | dataset = dataset.map(_parse_function) 307 | dataset = dataset.batch(self.batch_size, 308 | drop_remainder=batch_drop_remainder) 309 | 310 | iterator = dataset.make_one_shot_iterator() 311 | images, labels = iterator.get_next() 312 | return images, labels 313 | 314 | def run_inference(self, 315 | ckpt_dir, 316 | image_files, 317 | labels, 318 | enable_ema=True, 319 | export_ckpt=None): 320 | """Build and run inference on the target images and labels.""" 321 | label_offset = 1 if self.include_background_label else 0 322 | with tf.Graph().as_default(), tf.Session() as sess: 323 | images, labels = self.build_dataset(image_files, labels, False) 324 | probs = self.build_model(images, is_training=False) 325 | if isinstance(probs, tuple): 326 | probs = probs[0] 327 | 328 | self.restore_model(sess, ckpt_dir, enable_ema, export_ckpt) 329 | 330 | prediction_idx = [] 331 | prediction_prob = [] 332 | for _ in range(len(image_files) // self.batch_size): 333 | out_probs = sess.run(probs) 334 | idx = np.argsort(out_probs)[::-1] 335 | prediction_idx.append(idx[:5] - label_offset) 336 | prediction_prob.append([out_probs[pid] for pid in idx[:5]]) 337 | 338 | # Return the top 5 predictions (idx and prob) for each image. 339 | return prediction_idx, prediction_prob 340 | 341 | def eval_example_images(self, 342 | ckpt_dir, 343 | image_files, 344 | labels_map_file, 345 | enable_ema=True, 346 | export_ckpt=None): 347 | """Eval a list of example images. 348 | 349 | Args: 350 | ckpt_dir: str. Checkpoint directory path. 351 | image_files: List[str]. A list of image file paths. 352 | labels_map_file: str. The labels map file path. 353 | enable_ema: enable expotential moving average. 354 | export_ckpt: export ckpt folder. 355 | 356 | Returns: 357 | A tuple (pred_idx, and pred_prob), where pred_idx is the top 5 prediction 358 | index and pred_prob is the top 5 prediction probability. 359 | """ 360 | classes = json.loads(tf.gfile.Open(labels_map_file).read()) 361 | pred_idx, pred_prob = self.run_inference( 362 | ckpt_dir, image_files, [0] * len(image_files), enable_ema, export_ckpt) 363 | for i in range(len(image_files)): 364 | print('predicted class for image {}: '.format(image_files[i])) 365 | for j, idx in enumerate(pred_idx[i]): 366 | print(' -> top_{} ({:4.2f}%): {} '.format(j, pred_prob[i][j] * 100, 367 | classes[str(idx)])) 368 | return pred_idx, pred_prob 369 | 370 | def eval_imagenet(self, ckpt_dir, imagenet_eval_glob, 371 | imagenet_eval_label, num_images, enable_ema, export_ckpt): 372 | """Eval ImageNet images and report top1/top5 accuracy. 373 | 374 | Args: 375 | ckpt_dir: str. Checkpoint directory path. 376 | imagenet_eval_glob: str. File path glob for all eval images. 377 | imagenet_eval_label: str. File path for eval label. 378 | num_images: int. Number of images to eval: -1 means eval the whole 379 | dataset. 380 | enable_ema: enable expotential moving average. 381 | export_ckpt: export checkpoint folder. 382 | 383 | Returns: 384 | A tuple (top1, top5) for top1 and top5 accuracy. 385 | """ 386 | imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)] 387 | imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob)) 388 | if num_images < 0: 389 | num_images = len(imagenet_filenames) 390 | image_files = imagenet_filenames[:num_images] 391 | labels = imagenet_val_labels[:num_images] 392 | 393 | pred_idx, _ = self.run_inference( 394 | ckpt_dir, image_files, labels, enable_ema, export_ckpt) 395 | top1_cnt, top5_cnt = 0.0, 0.0 396 | for i, label in enumerate(labels): 397 | top1_cnt += label in pred_idx[i][:1] 398 | top5_cnt += label in pred_idx[i][:5] 399 | if i % 100 == 0: 400 | print('Step {}: top1_acc = {:4.2f}% top5_acc = {:4.2f}%'.format( 401 | i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1))) 402 | sys.stdout.flush() 403 | top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images 404 | print('Final: top1_acc = {:4.2f}% top5_acc = {:4.2f}%'.format(top1, top5)) 405 | return top1, top5 406 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/rename.sh: -------------------------------------------------------------------------------- 1 | for i in 0 1 2 3 4 5 6 7 8 2 | do 3 | X=$(sha256sum efficientnet-b${i}.pth | head -c 8) 4 | mv efficientnet-b${i}.pth efficientnet-b${i}-${X}.pth 5 | done 6 | -------------------------------------------------------------------------------- /tf_to_pytorch/convert_tf_to_pt/run.sh: -------------------------------------------------------------------------------- 1 | python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b0 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b0/ --output_file ../pretrained_pytorch/efficientnet-b0.pth 2 | 3 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b1 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b1/ --output_file ../pretrained_pytorch/efficientnet-b1.pth 4 | 5 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b2 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b2/ --output_file ../pretrained_pytorch/efficientnet-b2.pth 6 | 7 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b3 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b3/ --output_file ../pretrained_pytorch/efficientnet-b3.pth 8 | 9 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b4 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b4/ --output_file ../pretrained_pytorch/efficientnet-b4.pth 10 | 11 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b5 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b5/ --output_file ../pretrained_pytorch/efficientnet-b5.pth 12 | 13 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b6 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b6/ --output_file ../pretrained_pytorch/efficientnet-b6.pth 14 | 15 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b7 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b7/ --output_file ../pretrained_pytorch/efficientnet-b7.pth 16 | 17 | # python ../convert_tf_to_pt/load_tf_weights.py --model_name efficientnet-b8 --tf_checkpoint ../pretrained_tensorflow/efficientnet-b8/ --output_file ../pretrained_pytorch/efficientnet-b8.pth 18 | -------------------------------------------------------------------------------- /tf_to_pytorch/pretrained_tensorflow/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | # This script accepts a single command-line argument, which specifies which model to download. 5 | # Only the b0, b1, b2, and b3 models have been released, so your command must be one of them. 6 | 7 | # For example, to download efficientnet-b0, run: 8 | # ./download.sh efficientnet-b0 9 | # And to download efficientnet-b3, run: 10 | # ./download.sh efficientnet-b3 11 | 12 | MODEL=$1 13 | wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/advprop/${MODEL}.tar.gz 14 | tar xvf ${MODEL}.tar.gz 15 | rm ${MODEL}.tar.gz 16 | --------------------------------------------------------------------------------