├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── examples ├── CIFAR10_Classification_transfer_Learning.ipynb └── CIFAR10_classification.ipynb └── microsoftvision ├── __init__.py ├── models ├── __init__.py ├── resnext.py └── utils.py └── version.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project 2 | # Microsoft Vision 3 | 4 | ## Installation 5 | ``pip install microsoftvision`` 6 | 7 | 8 | ## Usage 9 | Input images should be in BGR format of shape (3 x H x W), where H and W are expected to be at least 224. 10 | The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225]. 11 | 12 | Example script: 13 | ``` 14 | import microsoftvision 15 | import torch 16 | 17 | # This will load pretrained model 18 | model = microsoftvision.models.resnet50(pretrained=True) 19 | 20 | # Load model to CPU memory, interface is the same as torchvision 21 | model = microsoftvision.models.resnet50(pretrained=True, map_location=torch.device('cpu')) 22 | ``` 23 | 24 | Example of creating image embeddings: 25 | ``` 26 | import microsoftvision 27 | from torchvision import transforms 28 | import torch 29 | from PIL import Image 30 | 31 | def get_image(): 32 | img = cv2.imread('example.jpg', cv2.IMREAD_COLOR) 33 | preprocess = transforms.Compose([ 34 | transforms.ToPILImage(), 35 | transforms.Resize(224), 36 | transforms.CenterCrop(224), 37 | transforms.ToTensor(), 38 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 39 | ]) 40 | return preprocess(img).unsqueeze(0) # Unsqueeze only required when there's 1 image in images batch 41 | 42 | model = microsoftvision.models.resnet50(pretrained=True) 43 | features = model(get_image()) 44 | print(features.shape) 45 | ``` 46 | Should output 47 | ``` 48 | ... 49 | torch.Size([1, 2048]) 50 | ``` 51 | 52 | 53 | 54 | ## Contributing 55 | 56 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 57 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 58 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 59 | 60 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 61 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 62 | provided by the bot. You will only need to do this once across all repos using our CLA. 63 | 64 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 65 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 66 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 67 | 68 | ## Trademarks 69 | 70 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 71 | trademarks or logos is subject to and must follow 72 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 73 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 74 | Any use of third-party trademarks or logos are subject to those third-party's policies. 75 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /examples/CIFAR10_Classification_transfer_Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "CIFAR10_Classification_transfer_Learning.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "accelerator": "GPU", 18 | "widgets": { 19 | "application/vnd.jupyter.widget-state+json": { 20 | "bd820309094f48b38f473a235ef5a93d": { 21 | "model_module": "@jupyter-widgets/controls", 22 | "model_name": "HBoxModel", 23 | "state": { 24 | "_view_name": "HBoxView", 25 | "_dom_classes": [], 26 | "_model_name": "HBoxModel", 27 | "_view_module": "@jupyter-widgets/controls", 28 | "_model_module_version": "1.5.0", 29 | "_view_count": null, 30 | "_view_module_version": "1.5.0", 31 | "box_style": "", 32 | "layout": "IPY_MODEL_3a7877ed8e174bf38de1d4ca2ee29e4c", 33 | "_model_module": "@jupyter-widgets/controls", 34 | "children": [ 35 | "IPY_MODEL_1f0f20c8720c4dcd9917057ff1c7dc3f", 36 | "IPY_MODEL_47e1b2f348aa4a2c9d039706260d5781" 37 | ] 38 | } 39 | }, 40 | "3a7877ed8e174bf38de1d4ca2ee29e4c": { 41 | "model_module": "@jupyter-widgets/base", 42 | "model_name": "LayoutModel", 43 | "state": { 44 | "_view_name": "LayoutView", 45 | "grid_template_rows": null, 46 | "right": null, 47 | "justify_content": null, 48 | "_view_module": "@jupyter-widgets/base", 49 | "overflow": null, 50 | "_model_module_version": "1.2.0", 51 | "_view_count": null, 52 | "flex_flow": null, 53 | "width": null, 54 | "min_width": null, 55 | "border": null, 56 | "align_items": null, 57 | "bottom": null, 58 | "_model_module": "@jupyter-widgets/base", 59 | "top": null, 60 | "grid_column": null, 61 | "overflow_y": null, 62 | "overflow_x": null, 63 | "grid_auto_flow": null, 64 | "grid_area": null, 65 | "grid_template_columns": null, 66 | "flex": null, 67 | "_model_name": "LayoutModel", 68 | "justify_items": null, 69 | "grid_row": null, 70 | "max_height": null, 71 | "align_content": null, 72 | "visibility": null, 73 | "align_self": null, 74 | "height": null, 75 | "min_height": null, 76 | "padding": null, 77 | "grid_auto_rows": null, 78 | "grid_gap": null, 79 | "max_width": null, 80 | "order": null, 81 | "_view_module_version": "1.2.0", 82 | "grid_template_areas": null, 83 | "object_position": null, 84 | "object_fit": null, 85 | "grid_auto_columns": null, 86 | "margin": null, 87 | "display": null, 88 | "left": null 89 | } 90 | }, 91 | "1f0f20c8720c4dcd9917057ff1c7dc3f": { 92 | "model_module": "@jupyter-widgets/controls", 93 | "model_name": "FloatProgressModel", 94 | "state": { 95 | "_view_name": "ProgressView", 96 | "style": "IPY_MODEL_b2ad45b530124e2f81a4fcb8f60d6893", 97 | "_dom_classes": [], 98 | "description": "", 99 | "_model_name": "FloatProgressModel", 100 | "bar_style": "success", 101 | "max": 170498071, 102 | "_view_module": "@jupyter-widgets/controls", 103 | "_model_module_version": "1.5.0", 104 | "value": 170498071, 105 | "_view_count": null, 106 | "_view_module_version": "1.5.0", 107 | "orientation": "horizontal", 108 | "min": 0, 109 | "description_tooltip": null, 110 | "_model_module": "@jupyter-widgets/controls", 111 | "layout": "IPY_MODEL_11a0f5cf47914b839188653402d94651" 112 | } 113 | }, 114 | "47e1b2f348aa4a2c9d039706260d5781": { 115 | "model_module": "@jupyter-widgets/controls", 116 | "model_name": "HTMLModel", 117 | "state": { 118 | "_view_name": "HTMLView", 119 | "style": "IPY_MODEL_36dc7a4e4687495e8430f27749e099a3", 120 | "_dom_classes": [], 121 | "description": "", 122 | "_model_name": "HTMLModel", 123 | "placeholder": "​", 124 | "_view_module": "@jupyter-widgets/controls", 125 | "_model_module_version": "1.5.0", 126 | "value": " 170499072/? [00:05<00:00, 29465459.73it/s]", 127 | "_view_count": null, 128 | "_view_module_version": "1.5.0", 129 | "description_tooltip": null, 130 | "_model_module": "@jupyter-widgets/controls", 131 | "layout": "IPY_MODEL_8dcb70e7032941cf9e7f2bcefe775486" 132 | } 133 | }, 134 | "b2ad45b530124e2f81a4fcb8f60d6893": { 135 | "model_module": "@jupyter-widgets/controls", 136 | "model_name": "ProgressStyleModel", 137 | "state": { 138 | "_view_name": "StyleView", 139 | "_model_name": "ProgressStyleModel", 140 | "description_width": "initial", 141 | "_view_module": "@jupyter-widgets/base", 142 | "_model_module_version": "1.5.0", 143 | "_view_count": null, 144 | "_view_module_version": "1.2.0", 145 | "bar_color": null, 146 | "_model_module": "@jupyter-widgets/controls" 147 | } 148 | }, 149 | "11a0f5cf47914b839188653402d94651": { 150 | "model_module": "@jupyter-widgets/base", 151 | "model_name": "LayoutModel", 152 | "state": { 153 | "_view_name": "LayoutView", 154 | "grid_template_rows": null, 155 | "right": null, 156 | "justify_content": null, 157 | "_view_module": "@jupyter-widgets/base", 158 | "overflow": null, 159 | "_model_module_version": "1.2.0", 160 | "_view_count": null, 161 | "flex_flow": null, 162 | "width": null, 163 | "min_width": null, 164 | "border": null, 165 | "align_items": null, 166 | "bottom": null, 167 | "_model_module": "@jupyter-widgets/base", 168 | "top": null, 169 | "grid_column": null, 170 | "overflow_y": null, 171 | "overflow_x": null, 172 | "grid_auto_flow": null, 173 | "grid_area": null, 174 | "grid_template_columns": null, 175 | "flex": null, 176 | "_model_name": "LayoutModel", 177 | "justify_items": null, 178 | "grid_row": null, 179 | "max_height": null, 180 | "align_content": null, 181 | "visibility": null, 182 | "align_self": null, 183 | "height": null, 184 | "min_height": null, 185 | "padding": null, 186 | "grid_auto_rows": null, 187 | "grid_gap": null, 188 | "max_width": null, 189 | "order": null, 190 | "_view_module_version": "1.2.0", 191 | "grid_template_areas": null, 192 | "object_position": null, 193 | "object_fit": null, 194 | "grid_auto_columns": null, 195 | "margin": null, 196 | "display": null, 197 | "left": null 198 | } 199 | }, 200 | "36dc7a4e4687495e8430f27749e099a3": { 201 | "model_module": "@jupyter-widgets/controls", 202 | "model_name": "DescriptionStyleModel", 203 | "state": { 204 | "_view_name": "StyleView", 205 | "_model_name": "DescriptionStyleModel", 206 | "description_width": "", 207 | "_view_module": "@jupyter-widgets/base", 208 | "_model_module_version": "1.5.0", 209 | "_view_count": null, 210 | "_view_module_version": "1.2.0", 211 | "_model_module": "@jupyter-widgets/controls" 212 | } 213 | }, 214 | "8dcb70e7032941cf9e7f2bcefe775486": { 215 | "model_module": "@jupyter-widgets/base", 216 | "model_name": "LayoutModel", 217 | "state": { 218 | "_view_name": "LayoutView", 219 | "grid_template_rows": null, 220 | "right": null, 221 | "justify_content": null, 222 | "_view_module": "@jupyter-widgets/base", 223 | "overflow": null, 224 | "_model_module_version": "1.2.0", 225 | "_view_count": null, 226 | "flex_flow": null, 227 | "width": null, 228 | "min_width": null, 229 | "border": null, 230 | "align_items": null, 231 | "bottom": null, 232 | "_model_module": "@jupyter-widgets/base", 233 | "top": null, 234 | "grid_column": null, 235 | "overflow_y": null, 236 | "overflow_x": null, 237 | "grid_auto_flow": null, 238 | "grid_area": null, 239 | "grid_template_columns": null, 240 | "flex": null, 241 | "_model_name": "LayoutModel", 242 | "justify_items": null, 243 | "grid_row": null, 244 | "max_height": null, 245 | "align_content": null, 246 | "visibility": null, 247 | "align_self": null, 248 | "height": null, 249 | "min_height": null, 250 | "padding": null, 251 | "grid_auto_rows": null, 252 | "grid_gap": null, 253 | "max_width": null, 254 | "order": null, 255 | "_view_module_version": "1.2.0", 256 | "grid_template_areas": null, 257 | "object_position": null, 258 | "object_fit": null, 259 | "grid_auto_columns": null, 260 | "margin": null, 261 | "display": null, 262 | "left": null 263 | } 264 | } 265 | } 266 | } 267 | }, 268 | "cells": [ 269 | { 270 | "cell_type": "markdown", 271 | "metadata": { 272 | "id": "RWSeWEwL0yo5" 273 | }, 274 | "source": [ 275 | "# **Microsoft Vision classification example**\n", 276 | "\n", 277 | "This example shows a simple way to use Microsoft vision with PyTorch for transfer learning and feature extraction from the Microsoft Vision model.\n", 278 | "\n", 279 | "This shows an example to plug-in a fully connected Neural network on top of the vision model which provides features from the data.." 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": { 285 | "id": "Xr1xFVth2niR" 286 | }, 287 | "source": [ 288 | "Reading the necessary imports." 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "metadata": { 294 | "id": "7IKQz4Rjc2RU" 295 | }, 296 | "source": [ 297 | "import time\n", 298 | "import torch\n", 299 | "import numpy as np\n", 300 | "import torch.nn as nn\n", 301 | "from torch import Tensor\n", 302 | "import torch.optim as optim\n", 303 | "import torch.nn.functional as F\n", 304 | "from torch.utils.data import DataLoader,TensorDataset\n", 305 | "from torchvision.datasets import CIFAR10\n", 306 | "import torchvision.transforms as transforms\n", 307 | "from progressbar import progressbar" 308 | ], 309 | "execution_count": 1, 310 | "outputs": [] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": { 315 | "id": "JbdB_EU_2uSK" 316 | }, 317 | "source": [ 318 | "## Install the Microsoft Vision" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "metadata": { 324 | "colab": { 325 | "base_uri": "https://localhost:8080/" 326 | }, 327 | "id": "IIDSsFp7olYB", 328 | "outputId": "126ec78e-56af-4cde-a0ef-eb451ee54115" 329 | }, 330 | "source": [ 331 | "!pip install microsoftvision\n", 332 | "\n", 333 | "import microsoftvision" 334 | ], 335 | "execution_count": 2, 336 | "outputs": [ 337 | { 338 | "output_type": "stream", 339 | "text": [ 340 | "Collecting microsoftvision\n", 341 | " Downloading https://files.pythonhosted.org/packages/71/db/65a4aebd1eac4c5920ac5fcf7c964f9834675b129ef82871435ea902b393/microsoftvision-1.0.5-py3-none-any.whl\n", 342 | "Requirement already satisfied: torch>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from microsoftvision) (1.8.0+cu101)\n", 343 | "Collecting azure-identity\n", 344 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/2a/35/64e29615e7709c10c4f1d4310a8c13a6770142e9fcb9358fb8fa4d9b1578/azure_identity-1.5.0-py2.py3-none-any.whl (103kB)\n", 345 | "\u001b[K |████████████████████████████████| 112kB 22.0MB/s \n", 346 | "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from microsoftvision) (4.41.1)\n", 347 | "Collecting azure-storage-blob\n", 348 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/09/14/4ca417a9c92b0fb93516575dd7be9b058bf13d531dcc21239b5f8f216a69/azure_storage_blob-12.8.0-py2.py3-none-any.whl (341kB)\n", 349 | "\u001b[K |████████████████████████████████| 348kB 14.4MB/s \n", 350 | "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from torch>=1.2.0->microsoftvision) (1.19.5)\n", 351 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch>=1.2.0->microsoftvision) (3.7.4.3)\n", 352 | "Collecting msal<2.0.0,>=1.6.0\n", 353 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e6/69/83ffc3004a19140a3c5d7151d7f79c280ac1b40a425fe5308b879eefcf25/msal-1.10.0-py2.py3-none-any.whl (60kB)\n", 354 | "\u001b[K |████████████████████████████████| 61kB 6.9MB/s \n", 355 | "\u001b[?25hRequirement already satisfied: six>=1.6 in /usr/local/lib/python3.7/dist-packages (from azure-identity->microsoftvision) (1.15.0)\n", 356 | "Collecting azure-core<2.0.0,>=1.0.0\n", 357 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/3d/6d/95ca41cace692206529bddae67ee0a079e97f250951f28a213eea16d5050/azure_core-1.12.0-py2.py3-none-any.whl (130kB)\n", 358 | "\u001b[K |████████████████████████████████| 133kB 60.2MB/s \n", 359 | "\u001b[?25hCollecting cryptography>=2.1.4\n", 360 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f8/1f/acde6ff69864c5e78b56488e3afd93c1ccc8c2651186e2a5f93d93f64859/cryptography-3.4.6-cp36-abi3-manylinux2014_x86_64.whl (3.2MB)\n", 361 | "\u001b[K |████████████████████████████████| 3.2MB 52.4MB/s \n", 362 | "\u001b[?25hCollecting msal-extensions~=0.3.0\n", 363 | " Downloading https://files.pythonhosted.org/packages/49/cb/c833ffa0f97c3098b146ac19bb2266c2d84b2119ffff83fdf001bb59d3ae/msal_extensions-0.3.0-py2.py3-none-any.whl\n", 364 | "Collecting msrest>=0.6.18\n", 365 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e8/cc/6c96bfb3d3cf4c3bdedfa6b46503223f4c2a4fa388377697e0f8082a4fed/msrest-0.6.21-py2.py3-none-any.whl (85kB)\n", 366 | "\u001b[K |████████████████████████████████| 92kB 11.3MB/s \n", 367 | "\u001b[?25hRequirement already satisfied: requests<3,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from msal<2.0.0,>=1.6.0->azure-identity->microsoftvision) (2.23.0)\n", 368 | "Collecting PyJWT[crypto]<3,>=1.0.0\n", 369 | " Downloading https://files.pythonhosted.org/packages/b4/9b/8850f99027ed029af6828199cc87179eaccbbf1f9e6e373e7f0177d32dad/PyJWT-2.0.1-py3-none-any.whl\n", 370 | "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.7/dist-packages (from cryptography>=2.1.4->azure-identity->microsoftvision) (1.14.5)\n", 371 | "Collecting portalocker~=1.0; platform_system != \"Windows\"\n", 372 | " Downloading https://files.pythonhosted.org/packages/3b/e7/ceef002a300a98a208232fab593183249b6964b306ee7dabb29908419cca/portalocker-1.7.1-py2.py3-none-any.whl\n", 373 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from msrest>=0.6.18->azure-storage-blob->microsoftvision) (2020.12.5)\n", 374 | "Requirement already satisfied: requests-oauthlib>=0.5.0 in /usr/local/lib/python3.7/dist-packages (from msrest>=0.6.18->azure-storage-blob->microsoftvision) (1.3.0)\n", 375 | "Collecting isodate>=0.6.0\n", 376 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9b/9f/b36f7774ff5ea8e428fdcfc4bb332c39ee5b9362ddd3d40d9516a55221b2/isodate-0.6.0-py2.py3-none-any.whl (45kB)\n", 377 | "\u001b[K |████████████████████████████████| 51kB 7.6MB/s \n", 378 | "\u001b[?25hRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.0.0->msal<2.0.0,>=1.6.0->azure-identity->microsoftvision) (1.24.3)\n", 379 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.0.0->msal<2.0.0,>=1.6.0->azure-identity->microsoftvision) (3.0.4)\n", 380 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.0.0->msal<2.0.0,>=1.6.0->azure-identity->microsoftvision) (2.10)\n", 381 | "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.12->cryptography>=2.1.4->azure-identity->microsoftvision) (2.20)\n", 382 | "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.5.0->msrest>=0.6.18->azure-storage-blob->microsoftvision) (3.1.0)\n", 383 | "Installing collected packages: cryptography, PyJWT, msal, azure-core, portalocker, msal-extensions, azure-identity, isodate, msrest, azure-storage-blob, microsoftvision\n", 384 | "Successfully installed PyJWT-2.0.1 azure-core-1.12.0 azure-identity-1.5.0 azure-storage-blob-12.8.0 cryptography-3.4.6 isodate-0.6.0 microsoftvision-1.0.5 msal-1.10.0 msal-extensions-0.3.0 msrest-0.6.21 portalocker-1.7.1\n" 385 | ], 386 | "name": "stdout" 387 | } 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": { 393 | "id": "QQS10yGI3R4z" 394 | }, 395 | "source": [ 396 | "## Preprocess the Input Images\n", 397 | "\n", 398 | "Microsoft Vision model is using images in BGR format, hence the swapping of image channels at the end of preprocessing" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "metadata": { 404 | "id": "vxUUWJGuolfS" 405 | }, 406 | "source": [ 407 | "class Preprocess:\n", 408 | " def __init__(self):\n", 409 | " self.preprocess = transforms.Compose([\n", 410 | " transforms.Resize(224),\n", 411 | " transforms.CenterCrop(224),\n", 412 | " transforms.ToTensor(),\n", 413 | " transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])])\n", 414 | "\n", 415 | " def __call__(self, x):\n", 416 | " return self.preprocess(x)[[2,1,0],:,:]" 417 | ], 418 | "execution_count": 3, 419 | "outputs": [] 420 | }, 421 | { 422 | "cell_type": "markdown", 423 | "metadata": { 424 | "id": "eqxNc9dc3bNU" 425 | }, 426 | "source": [ 427 | "Import the CIFAR-10 dataset with the division to train and test sets. This can be replaced with any dataset without any changes to the rest of the code.\n", 428 | "\n", 429 | "Even this can be replaced with PyTorch with torchvision.datasets.ImageFolder\n", 430 | "\n", 431 | "A generic data loader where the images are arranged in this way:\n", 432 | "\n", 433 | ">root/dog/xxx.png
\n", 434 | "> root/dog/xxy.png
\n", 435 | ">root/dog/[...]/xxz.png
\n", 436 | ">root/cat/123.png
\n", 437 | ">root/cat/nsdf3.png
\n", 438 | ">root/cat/[...]/asd932_.png
" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "metadata": { 444 | "colab": { 445 | "base_uri": "https://localhost:8080/", 446 | "height": 117, 447 | "referenced_widgets": [ 448 | "bd820309094f48b38f473a235ef5a93d", 449 | "3a7877ed8e174bf38de1d4ca2ee29e4c", 450 | "1f0f20c8720c4dcd9917057ff1c7dc3f", 451 | "47e1b2f348aa4a2c9d039706260d5781", 452 | "b2ad45b530124e2f81a4fcb8f60d6893", 453 | "11a0f5cf47914b839188653402d94651", 454 | "36dc7a4e4687495e8430f27749e099a3", 455 | "8dcb70e7032941cf9e7f2bcefe775486" 456 | ] 457 | }, 458 | "id": "KPHTGjJYoljB", 459 | "outputId": "67095cbf-55ad-4534-c5c4-5f52bfbbec77" 460 | }, 461 | "source": [ 462 | "train_dataset = CIFAR10('path', download=True, train=True, transform=Preprocess())\n", 463 | "test_dataset = CIFAR10('path', download=True, train=False, transform=Preprocess())" 464 | ], 465 | "execution_count": 4, 466 | "outputs": [ 467 | { 468 | "output_type": "stream", 469 | "text": [ 470 | "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to path/cifar-10-python.tar.gz\n" 471 | ], 472 | "name": "stdout" 473 | }, 474 | { 475 | "output_type": "display_data", 476 | "data": { 477 | "application/vnd.jupyter.widget-view+json": { 478 | "model_id": "bd820309094f48b38f473a235ef5a93d", 479 | "version_minor": 0, 480 | "version_major": 2 481 | }, 482 | "text/plain": [ 483 | "HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))" 484 | ] 485 | }, 486 | "metadata": { 487 | "tags": [] 488 | } 489 | }, 490 | { 491 | "output_type": "stream", 492 | "text": [ 493 | "\n", 494 | "Extracting path/cifar-10-python.tar.gz to path\n", 495 | "Files already downloaded and verified\n" 496 | ], 497 | "name": "stdout" 498 | } 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": { 504 | "id": "Bc2Nzklg-L9f" 505 | }, 506 | "source": [ 507 | "## Loading Microsoft Vision pretrained model" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "metadata": { 513 | "colab": { 514 | "base_uri": "https://localhost:8080/" 515 | }, 516 | "id": "tAHrdn_ho3ta", 517 | "outputId": "adf7778c-6635-4fb3-b04d-f921212bb975" 518 | }, 519 | "source": [ 520 | "model = microsoftvision.models.resnet50(pretrained=True)" 521 | ], 522 | "execution_count": 5, 523 | "outputs": [ 524 | { 525 | "output_type": "stream", 526 | "text": [ 527 | "Loading Microsoft Vision pretrained model\n", 528 | "Downloading model.\n" 529 | ], 530 | "name": "stdout" 531 | }, 532 | { 533 | "output_type": "stream", 534 | "text": [ 535 | "\r 0%| | 0/23 [00:00 1: 40 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 41 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 42 | self.conv1 = conv3x3(inplanes, planes, stride) 43 | self.bn1 = norm_layer(planes) 44 | self.relu = nn.ReLU(inplace=True) 45 | self.conv2 = conv3x3(planes, planes) 46 | self.bn2 = norm_layer(planes) 47 | self.downsample = downsample 48 | self.stride = stride 49 | 50 | def forward(self, x): 51 | identity = x 52 | 53 | out = self.conv1(x) 54 | out = self.bn1(out) 55 | out = self.relu(out) 56 | 57 | out = self.conv2(out) 58 | out = self.bn2(out) 59 | 60 | if self.downsample is not None: 61 | identity = self.downsample(x) 62 | 63 | out += identity 64 | out = self.relu(out) 65 | 66 | return out 67 | 68 | 69 | class Bottleneck(nn.Module): 70 | # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) 71 | # while original implementation places the stride at the first 1x1 convolution(self.conv1) 72 | # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. 73 | # This variant is also known as ResNet V1.5 and improves accuracy according to 74 | # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. 75 | 76 | expansion = 4 77 | 78 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 79 | base_width=64, dilation=1, norm_layer=None): 80 | super(Bottleneck, self).__init__() 81 | if norm_layer is None: 82 | norm_layer = nn.BatchNorm2d 83 | width = int(planes * (base_width / 64.)) * groups 84 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 85 | self.conv1 = conv1x1(inplanes, width) 86 | self.bn1 = norm_layer(width) 87 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 88 | self.bn2 = norm_layer(width) 89 | self.conv3 = conv1x1(width, planes * self.expansion) 90 | self.bn3 = norm_layer(planes * self.expansion) 91 | self.relu = nn.ReLU(inplace=True) 92 | self.downsample = downsample 93 | self.stride = stride 94 | 95 | def forward(self, x): 96 | identity = x 97 | 98 | out = self.conv1(x) 99 | out = self.bn1(out) 100 | out = self.relu(out) 101 | 102 | out = self.conv2(out) 103 | out = self.bn2(out) 104 | out = self.relu(out) 105 | 106 | out = self.conv3(out) 107 | out = self.bn3(out) 108 | 109 | if self.downsample is not None: 110 | identity = self.downsample(x) 111 | 112 | out += identity 113 | out = self.relu(out) 114 | 115 | return out 116 | 117 | 118 | class ResNet(nn.Module): 119 | 120 | def __init__(self, block, layers, zero_init_residual=False, 121 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 122 | norm_layer=None): 123 | super(ResNet, self).__init__() 124 | if norm_layer is None: 125 | norm_layer = nn.BatchNorm2d 126 | self._norm_layer = norm_layer 127 | 128 | self.inplanes = 64 129 | self.dilation = 1 130 | if replace_stride_with_dilation is None: 131 | # each element in the tuple indicates if we should replace 132 | # the 2x2 stride with a dilated convolution instead 133 | replace_stride_with_dilation = [False, False, False] 134 | if len(replace_stride_with_dilation) != 3: 135 | raise ValueError("replace_stride_with_dilation should be None " 136 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 137 | self.groups = groups 138 | self.base_width = width_per_group 139 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 140 | bias=False) 141 | self.bn1 = norm_layer(self.inplanes) 142 | self.relu = nn.ReLU(inplace=True) 143 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 144 | self.layer1 = self._make_layer(block, 64, layers[0]) 145 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 146 | dilate=replace_stride_with_dilation[0]) 147 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 148 | dilate=replace_stride_with_dilation[1]) 149 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 150 | dilate=replace_stride_with_dilation[2]) 151 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 152 | 153 | for m in self.modules(): 154 | if isinstance(m, nn.Conv2d): 155 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 156 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 157 | nn.init.constant_(m.weight, 1) 158 | nn.init.constant_(m.bias, 0) 159 | 160 | # Zero-initialize the last BN in each residual branch, 161 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 162 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 163 | if zero_init_residual: 164 | for m in self.modules(): 165 | if isinstance(m, Bottleneck): 166 | nn.init.constant_(m.bn3.weight, 0) 167 | elif isinstance(m, BasicBlock): 168 | nn.init.constant_(m.bn2.weight, 0) 169 | 170 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 171 | norm_layer = self._norm_layer 172 | downsample = None 173 | previous_dilation = self.dilation 174 | if dilate: 175 | self.dilation *= stride 176 | stride = 1 177 | if stride != 1 or self.inplanes != planes * block.expansion: 178 | downsample = nn.Sequential( 179 | conv1x1(self.inplanes, planes * block.expansion, stride), 180 | norm_layer(planes * block.expansion), 181 | ) 182 | 183 | layers = [] 184 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 185 | self.base_width, previous_dilation, norm_layer)) 186 | self.inplanes = planes * block.expansion 187 | for _ in range(1, blocks): 188 | layers.append(block(self.inplanes, planes, groups=self.groups, 189 | base_width=self.base_width, dilation=self.dilation, 190 | norm_layer=norm_layer)) 191 | 192 | return nn.Sequential(*layers) 193 | 194 | def _forward_impl(self, x): 195 | # See note [TorchScript super()] 196 | x = self.conv1(x) 197 | x = self.bn1(x) 198 | x = self.relu(x) 199 | x = self.maxpool(x) 200 | 201 | x = self.layer1(x) 202 | x = self.layer2(x) 203 | x = self.layer3(x) 204 | x = self.layer4(x) 205 | 206 | x = self.avgpool(x) 207 | x = torch.flatten(x, 1) 208 | 209 | return x 210 | 211 | def forward(self, x): 212 | return self._forward_impl(x) 213 | 214 | 215 | def _resnet(arch, block, layers, pretrained, map_location, **kwargs): 216 | model = ResNet(block, layers, **kwargs) 217 | if pretrained: 218 | print("Loading Microsoft Vision pretrained model") 219 | state_dict = load_state_dict_from_url(model_urls[arch], map_location) 220 | load_state_dict(model, state_dict) 221 | else: 222 | print("Using default model initialization") 223 | return model 224 | 225 | def resnet50(pretrained=True, map_location=None): 226 | return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, map_location) -------------------------------------------------------------------------------- /microsoftvision/models/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import tempfile 4 | from urllib.request import urlopen, Request 5 | import shutil 6 | from azure.identity import DeviceCodeCredential 7 | from azure.storage.blob import BlobClient 8 | from tqdm import tqdm 9 | 10 | def download_model_from_blob(url, dst, progress=True): 11 | blob_client = BlobClient.from_blob_url(url) 12 | model_size = int(blob_client.get_blob_properties()['size']) 13 | try: 14 | print(f"Model size: {model_size//(1024*1024)} MB") 15 | with open(dst, "wb") as my_blob: 16 | segment_size = 4 * 1024 * 1024 # 1MB chunk 17 | offset = 0 18 | for i in tqdm(range((model_size // segment_size) + 1), unit='MB'): 19 | if offset >= model_size: 20 | break 21 | download_stream = blob_client.download_blob(offset=offset, length=segment_size) 22 | my_blob.write(download_stream.readall()) 23 | offset += segment_size 24 | 25 | except: 26 | os.remove(dst) 27 | print("Downloading error") 28 | raise 29 | 30 | print(f"Model saved to {dst}") 31 | 32 | 33 | def load_state_dict_from_url(model_path, map_location=None): 34 | filename = os.path.basename(model_path) 35 | 36 | # This checks if model exists in current folder 37 | if not os.path.exists(filename): 38 | print("Downloading model.") 39 | download_model_from_blob(model_path, filename) 40 | else: 41 | print("Model already downloaded.") 42 | 43 | return torch.load(filename, map_location=map_location)['state_dict'] 44 | 45 | def load_state_dict(model, pretrained_weights): 46 | weights = model.state_dict() 47 | 48 | # Remove non-exist keys 49 | for key in pretrained_weights.keys() - weights.keys(): 50 | print("Delete unused model state key: %s" % key) 51 | del pretrained_weights[key] 52 | 53 | # Remove keys that size does not match 54 | for key, pretrained_weight in list(pretrained_weights.items()): 55 | weight = weights[key] 56 | if pretrained_weight.shape != weight.shape: 57 | print("Delete model state key with unmatched shape: %s" % key) 58 | del pretrained_weights[key] 59 | 60 | # Copy everything that pretrained_weights miss 61 | for key in weights.keys() - pretrained_weights.keys(): 62 | print("Missing model state key: %s" % key) 63 | pretrained_weights[key] = weights[key] 64 | 65 | # Load the weights to model 66 | model.load_state_dict(pretrained_weights) 67 | -------------------------------------------------------------------------------- /microsoftvision/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.5' 2 | --------------------------------------------------------------------------------