├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── PULL_REQUEST_TEMPLATE.md ├── README.md ├── REPRODUCE_RESULTS.md ├── augmentations.ipynb ├── big-images-ids.csv ├── big-images-ids_v2.csv ├── common_blocks ├── __init__.py ├── architectures │ ├── __init__.py │ ├── base.py │ ├── classification.py │ ├── encoders.py │ ├── large_kernel_matters.py │ ├── pspnet.py │ └── unet.py ├── augmentation.py ├── callbacks.py ├── loaders.py ├── lovasz_losses.py ├── metrics.py ├── models.py ├── pipelines.py ├── postprocessing.py └── utils │ ├── __init__.py │ ├── io.py │ ├── masks.py │ └── misc.py ├── main.py ├── neptune.yaml ├── prediction_exploration.ipynb ├── prepare_metadata.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .pytest_cache 6 | tests/.cache 7 | 8 | # C extensions 9 | *.so 10 | 11 | # neptune, pycharm 12 | .cache 13 | .cache/ 14 | .idea/ 15 | .idea_modules/ 16 | *_local.yaml 17 | out/ 18 | output 19 | output/ 20 | *.log 21 | target/ 22 | devbook.ipynb 23 | devbook_local.ipynb 24 | 25 | # Distribution / packaging 26 | .Python 27 | env/ 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | 44 | # PyInstaller 45 | # Usually these files are written by a python script from a template 46 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 47 | *.manifest 48 | *.spec 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .coverage 58 | .coverage.* 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | .hypothesis/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | local_settings.py 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # Jupyter Notebook 82 | Untitled*.ipynb 83 | .ipynb_checkpoints 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # SageMath parsed files 92 | *.sage.py 93 | 94 | # dotenv 95 | .env 96 | 97 | # virtualenv 98 | .venv 99 | venv/ 100 | ENV/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # mkdocs documentation 110 | /site 111 | 112 | # mypy 113 | .mypy_cache/ 114 | 115 | # Working directories 116 | examples/cache/ 117 | neptune_local.yaml 118 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at ml-team@neptune.ml. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing to [Mapping Challenge](https://www.crowdai.org/challenges/mapping-challenge) Open Solution 2 | 3 | ### Get involved 4 | You are welcome to contribute to this Open Solution. To get started: 5 | 1. Check [our kanban board](https://github.com/neptune-ml/open-solution-mapping-challenge/projects/1) to see what we are working on right now. 6 | 1. Express your interest in a particular [issue](https://github.com/neptune-ml/open-solution-mapping-challenge/issues) by submitting a comment or, 7 | * submit your own [issue](https://github.com/neptune-ml/open-solution-mapping-challenge/issues). 8 | 1. We will get back to you in order to start working together. 9 | 10 | ### Code contributions 11 | Major - and most appreciated - contribution is [pull request](https://github.com/neptune-ml/open-solution-mapping-challenge/pulls) with feature or bug fix. 12 | 13 | ### Remarks 14 | In case of custom ideas, please contact core contributors directly at ml-team@neptune.ml. 15 | # 16 | 17 | Thanks! 18 | 19 | Kuba & Kamil, 20 | 21 | *core contributors to the Open Solution* 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 neptune.ml 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Pull Request template to the [Mapping Challenge](https://www.crowdai.org/challenges/mapping-challenge) Open Solution 2 | 3 | Major - and most appreciated - contribution is pull request with feature or bug fix. Each pull request initiates discussion about your code contribution. 4 | 5 | Each pull request should be provided with minimal description about its contents. 6 | # 7 | 8 | Thanks! 9 | 10 | Kuba & Kamil, 11 | 12 | _core contributors to the Open Solutions_ 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Airbus Ship Detection Challenge 2 | [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/minerva-ml/open-solution-ship-detection/blob/master/LICENSE) 3 | 4 | This is an open solution to the [Airbus Ship Detection Challenge](https://www.kaggle.com/c/airbus-ship-detection). 5 | 6 | ## Our goals 7 | We are building entirely open solution to this competition. Specifically: 8 | 1. **Learning from the process** - updates about new ideas, code and experiments is the best way to learn data science. Our activity is especially useful for people who wants to enter the competition, but lack appropriate experience. 9 | 1. Encourage more Kagglers to start working on this competition. 10 | 1. Deliver open source solution with no strings attached. Code is available on our [GitHub repository :computer:](https://github.com/minerva-ml/open-solution-ship-detection). This solution should establish solid benchmark, as well as provide good base for your custom ideas and experiments. We care about clean code :smiley: 11 | 1. We are opening our experiments as well: everybody can have **live preview** on our experiments, parameters, code, etc. Check: [Airbus Ship Detection Challenge :chart_with_upwards_trend:](https://app.neptune.ml/neptune-ml/Ships?namedFilterId=mainListFilter) or screen below. 12 | 13 | |Train and validation monitor :bar_chart:| 14 | |:---:| 15 | |[![training monitor](https://gist.githubusercontent.com/jakubczakon/cac72983726a970690ba7c33708e100b/raw/02a2ab13edfe41cbad7e04c4a75b105393c14e02/ships_neptune.png)](https://app.neptune.ml/neptune-ml/Ships)| 16 | 17 | ## Disclaimer 18 | In this open source solution you will find references to the [neptune.ml](https://neptune.ml). It is free platform for community Users, which we use daily to keep track of our experiments. Please note that using neptune.ml is not necessary to proceed with this solution. You may run it as plain Python script :snake:. 19 | 20 | # How to start? 21 | ## Learn about our solutions 22 | 1. Check [Kaggle forum](https://www.kaggle.com/c/airbus-ship-detection/discussion/62988) and participate in the discussions. 23 | 1. See solutions below: 24 | 25 | | link to code | CV | LB | 26 | |:---:|:---:|:---:| 27 | |[solution 1](https://app.neptune.ml/neptune-ml/Ships?namedFilterId=1bc4da1e-6e47-4a26-a50e-3e55cbc052a7)|0.541|0.573| 28 | |[solution 2](https://app.neptune.ml/neptune-ml/Ships?namedFilterId=8ad61fcb-f0ac-4aaf-aa9c-9db47e0aa222)|0.661|0.679| 29 | |[solution 3](https://app.neptune.ml/neptune-ml/Ships?namedFilterId=be842434-7c8b-4ab9-afa5-f9c00816d3c3)|0.694|0.696| 30 | |[solution 4](https://app.neptune.ml/neptune-ml/Ships?namedFilterId=e43c10b9-6a3d-4f0b-80e7-8d74eb86ff62)|0.722|0.703| 31 | |[solution 5](https://app.neptune.ml/neptune-ml/Ships/experiments/ac8cdaf2-c372-46e9-9045-b17c43af41a6)|0.719|0.725| 32 | 33 | ## Start experimenting with ready-to-use code 34 | You can jump start your participation in the competition by using our starter pack. Installation instruction below will guide you through the setup. 35 | 36 | ### Installation *(fast track)* 37 | 1. Clone repository and install requirements (*use Python3.5*) `pip3 install -r requirements.txt` 38 | 1. Register to the [neptune.ml](https://neptune.ml) _(if you wish to use it)_ 39 | 1. Run experiment based on U-Net: 40 | 41 | 42 | #### Cloud 43 | ```bash 44 | neptune account login 45 | ``` 46 | 47 | Create project say Ships (SHIP) 48 | 49 | Go to `neptune.yaml` and change: 50 | 51 | ```yaml 52 | project: USERNAME/PROJECT_NAME 53 | ``` 54 | to your username and project name 55 | 56 | Prepare metadata and overlayed target masks 57 | It only needs to be **done once** 58 | 59 | ```bash 60 | neptune send --worker xs \ 61 | --environment base-cpu-py3 \ 62 | --config neptune.yaml \ 63 | prepare_metadata.py 64 | 65 | ``` 66 | 67 | They will be saved in the 68 | 69 | ```yaml 70 | metadata_filepath: /output/metadata.csv 71 | masks_overlayed_dir: /output/masks_overlayed 72 | ``` 73 | 74 | From now on we will load the metadata by changing the `neptune.yaml` 75 | 76 | ```yaml 77 | metadata_filepath: /input/metadata.csv 78 | masks_overlayed_dir: /input/masks_overlayed 79 | ``` 80 | 81 | and adding the path to the experiment that generated metadata say SHIP-1 to every command `--input/metadata.csv` 82 | 83 | Let's train the model by running the `main.py`: 84 | 85 | ```bash 86 | neptune send --worker m-2p100 \ 87 | --environment pytorch-0.3.1-gpu-py3 \ 88 | --config neptune.yaml \ 89 | --input /SHIP-1/output/metadata.csv \ 90 | --input /SHIP-1/output/masks_overlayed \ 91 | main.py 92 | 93 | ``` 94 | 95 | The model will be saved in the: 96 | 97 | ```yaml 98 | experiment_dir: /output/experiment 99 | ``` 100 | 101 | and the `submission.csv` will be saved in `/output/experiment/submission.csv` 102 | 103 | You can easily use models trained during one experiment in other experiments. 104 | For example when running evaluation we need to use the previous model folder in our experiment. We do that by: 105 | 106 | changing `main.py` 107 | 108 | ```python 109 | CLONE_EXPERIMENT_DIR_FROM = '/SHIP-2/output/experiment' 110 | ``` 111 | 112 | and running the following command: 113 | 114 | 115 | ```bash 116 | neptune send --worker m-2p100 \ 117 | --environment pytorch-0.3.1-gpu-py3 \ 118 | --config neptune.yaml \ 119 | --input /SHIP-1/output/metadata.csv \ 120 | --input /SHIP-1/output/masks_overlayed \ 121 | --input /SHIP-2 \ 122 | main.py 123 | ``` 124 | 125 | #### Local 126 | Login to neptune if you want to use it 127 | ```bash 128 | neptune account login 129 | ``` 130 | 131 | Prepare metadata by running: 132 | 133 | ```bash 134 | neptune run --config neptune.yaml prepare_metadata.py 135 | ``` 136 | 137 | Training and inference by running `main.py`: 138 | 139 | ```bash 140 | neptune run --config neptune.yaml main.py 141 | ``` 142 | 143 | You can always run it with pure python :snake: 144 | 145 | ```bash 146 | python main.py 147 | ``` 148 | 149 | ## Get involved 150 | You are welcome to contribute your code and ideas to this open solution. To get started: 151 | 1. Check [competition project](https://github.com/neptune-ml/open-solution-ship-detection/projects/1) on GitHub to see what we are working on right now. 152 | 1. Express your interest in particular task by writing comment in this task, or by creating new one with your fresh idea. 153 | 1. We will get back to you quickly in order to start working together. 154 | 1. Check [CONTRIBUTING](CONTRIBUTING.md) for some more information. 155 | 156 | ## User support 157 | There are several ways to seek help: 158 | 1. Kaggle [discussion](https://www.kaggle.com/c/airbus-ship-detection/discussion/62988) is our primary way of communication. 159 | 1. Submit an [issue](https://github.com/neptune-ml/open-solution-ship-detection/issues) directly in this repo. 160 | -------------------------------------------------------------------------------- /REPRODUCE_RESULTS.md: -------------------------------------------------------------------------------- 1 | # Working on it :smiley: 2 | -------------------------------------------------------------------------------- /augmentations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "\n", 11 | "import numpy as np\n", 12 | "import glob\n", 13 | "from PIL import Image\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "\n", 16 | "from common_blocks.augmentation import iaa\n", 17 | "from common_blocks.utils.misc import plot_list" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "IMG_DIR = 'YOUR/DIR'\n", 27 | "IMG_DIR = '/mnt/ml-team/minerva/open-solutions/ships/data/train'\n", 28 | "\n", 29 | "IMG_IDX = 113\n", 30 | "img_filepath = sorted(glob.glob('{}/*'.format(IMG_DIR)))[IMG_IDX]\n", 31 | "img = np.array(Image.open(img_filepath)).astype(np.uint8)\n", 32 | "plt.imshow(img)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Affine augmentations" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "affine_seq = iaa.Sequential([\n", 49 | "# General\n", 50 | " iaa.Fliplr(0.5),\n", 51 | " iaa.Flipud(0.5), \n", 52 | " iaa.Sometimes(0.5, iaa.CropAndPad(percent=(0.0,1.0), pad_mode='wrap'))\n", 53 | "], random_order=True)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "AUG_NR = 6\n", 63 | "aug_imgs = []\n", 64 | "for _ in range(AUG_NR):\n", 65 | " aug_img = affine_seq.augment_image(img)\n", 66 | " aug_imgs.append(aug_img)\n", 67 | "plot_list(images=aug_imgs)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# Intensity Augmentations" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "intensity_seq = iaa.Sequential([\n", 84 | " iaa.Sometimes(0.3, iaa.ContrastNormalization((0.5, 1.5))),\n", 85 | " iaa.OneOf([\n", 86 | " iaa.Noop(),\n", 87 | " iaa.OneOf([\n", 88 | " iaa.Add((-10, 10)),\n", 89 | " iaa.AddElementwise((-10, 10)),\n", 90 | " iaa.Multiply((0.95, 1.05)),\n", 91 | " iaa.MultiplyElementwise((0.95, 1.05)),\n", 92 | " ]),\n", 93 | " iaa.GaussianBlur(sigma=(0.0, 3.0)),\n", 94 | " ])\n", 95 | "], random_order=False)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "AUG_NR = 6\n", 105 | "aug_imgs = []\n", 106 | "for _ in range(AUG_NR):\n", 107 | " aug_img = intensity_seq.augment_image(img)\n", 108 | " aug_imgs.append(aug_img)\n", 109 | "plot_list(images=aug_imgs)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "ships", 123 | "language": "python", 124 | "name": "ships" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.5.2" 137 | } 138 | }, 139 | "nbformat": 4, 140 | "nbformat_minor": 2 141 | } 142 | -------------------------------------------------------------------------------- /common_blocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minerva-ml/open-solution-ship-detection/9e26c98e25a1d38fcfa426640697e37dd39f4776/common_blocks/__init__.py -------------------------------------------------------------------------------- /common_blocks/architectures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minerva-ml/open-solution-ship-detection/9e26c98e25a1d38fcfa426640697e37dd39f4776/common_blocks/architectures/__init__.py -------------------------------------------------------------------------------- /common_blocks/architectures/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch import nn 3 | from torch.nn import functional as F 4 | import torch 5 | 6 | 7 | class Conv2dBnRelu(nn.Module): 8 | PADDING_METHODS = {'replication': nn.ReplicationPad2d, 9 | 'reflection': nn.ReflectionPad2d, 10 | 'zero': nn.ZeroPad2d, 11 | } 12 | 13 | def __init__(self, in_channels, out_channels, kernel_size=(3, 3), 14 | use_relu=True, use_batch_norm=True, use_padding=True, padding_method='replication'): 15 | super().__init__() 16 | self.use_relu = use_relu 17 | self.use_batch_norm = use_batch_norm 18 | self.use_padding = use_padding 19 | self.kernel_w = kernel_size[0] 20 | self.kernel_h = kernel_size[1] 21 | self.padding_w = kernel_size[0] - 1 22 | self.padding_h = kernel_size[1] - 1 23 | 24 | self.batch_norm = nn.BatchNorm2d(out_channels) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.padding = Conv2dBnRelu.PADDING_METHODS[padding_method](padding=(0, self.padding_h, self.padding_w, 0)) 27 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=0) 28 | 29 | def forward(self, x): 30 | if self.use_padding: 31 | x = self.padding(x) 32 | x = self.conv(x) 33 | if self.use_batch_norm: 34 | x = self.batch_norm(x) 35 | if self.use_relu: 36 | x = self.relu(x) 37 | return x 38 | 39 | 40 | class DeconvConv2dBnRelu(nn.Module): 41 | def __init__(self, in_channels, out_channels, use_relu=True, use_batch_norm=True, 42 | use_channel_se=False, use_spatial_se=False, reduction=16): 43 | super().__init__() 44 | self.use_relu = use_relu 45 | self.use_batch_norm = use_batch_norm 46 | self.use_channel_se = use_channel_se 47 | self.use_spatial_se = use_spatial_se 48 | 49 | self.batch_norm = nn.BatchNorm2d(out_channels) 50 | self.relu = nn.ReLU(inplace=True) 51 | self.deconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=3, 52 | stride=2, padding=1, output_padding=1) 53 | 54 | if use_channel_se: 55 | self.channel_se = ChannelSELayer(out_channels, reduction=reduction) 56 | if use_spatial_se: 57 | self.spatial_se = SpatialSELayer(out_channels) 58 | 59 | def forward(self, x): 60 | x = self.deconv(x) 61 | if self.use_batch_norm: 62 | x = self.batch_norm(x) 63 | if self.use_relu: 64 | x = self.relu(x) 65 | 66 | if self.use_channel_se and self.use_spatial_se: 67 | channel_se = self.channel_se(x) 68 | spatial_se = self.spatial_se(x) 69 | x = channel_se + spatial_se 70 | elif self.use_channel_se: 71 | x = self.channel_se(x) 72 | elif self.use_spatial_se: 73 | x = self.spatial_se(x) 74 | return x 75 | 76 | 77 | class NoOperation(nn.Module): 78 | def forward(self, x): 79 | return x 80 | 81 | 82 | class DecoderBlock(nn.Module): 83 | def __init__(self, in_channels, middle_channels, out_channels): 84 | super(DecoderBlock, self).__init__() 85 | self.conv1 = Conv2dBnRelu(in_channels, middle_channels) 86 | self.conv2 = Conv2dBnRelu(middle_channels, out_channels) 87 | self.upsample = nn.Upsample(scale_factor=2, mode='bilinear') 88 | self.channel_se = ChannelSELayer(out_channels, reduction=16) 89 | self.spatial_se = SpatialSELayer(out_channels) 90 | 91 | def forward(self, x, e=None): 92 | x = self.upsample(x) 93 | if e is not None: 94 | x = torch.cat([x, e], 1) 95 | x = self.conv1(x) 96 | x = self.conv2(x) 97 | 98 | channel_se = self.channel_se(x) 99 | spatial_se = self.spatial_se(x) 100 | 101 | x = channel_se + spatial_se 102 | return x 103 | 104 | 105 | class ChannelSELayer(nn.Module): 106 | def __init__(self, channel, reduction=16): 107 | super().__init__() 108 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 109 | self.fc = nn.Sequential( 110 | nn.Linear(channel, channel // reduction), 111 | nn.ReLU(inplace=True), 112 | nn.Linear(channel // reduction, channel), 113 | nn.Sigmoid() 114 | ) 115 | 116 | def forward(self, x): 117 | b, c, _, _ = x.size() 118 | y = self.avg_pool(x).view(b, c) 119 | y = self.fc(y).view(b, c, 1, 1) 120 | return x * y 121 | 122 | 123 | class SpatialSELayer(nn.Module): 124 | def __init__(self, channels): 125 | super().__init__() 126 | self.fc = nn.Conv2d(channels, 1, kernel_size=1) 127 | self.sigmoid = nn.Sigmoid() 128 | 129 | def forward(self, x): 130 | module_input = x 131 | x = self.fc(x) 132 | x = self.sigmoid(x) 133 | return module_input * x 134 | 135 | 136 | class DepthChannelExcitation(nn.Module): 137 | def __init__(self, channels): 138 | super().__init__() 139 | 140 | self.fc = nn.Sequential(nn.Linear(1, channels), 141 | nn.Sigmoid() 142 | ) 143 | 144 | def forward(self, x, d=None): 145 | b, c, _, _ = x.size() 146 | y = self.fc(d).view(b, c, 1, 1) 147 | return x * y 148 | 149 | 150 | class DepthSpatialExcitation(nn.Module): 151 | def __init__(self, grid_size=16): 152 | super().__init__() 153 | self.grid_size = grid_size 154 | self.grid_size_sqrt = int(np.sqrt(grid_size)) 155 | 156 | self.fc = nn.Sequential(nn.Linear(1, grid_size), 157 | nn.Sigmoid() 158 | ) 159 | 160 | def forward(self, x, d=None): 161 | b, _, h, w = x.size() 162 | y = self.fc(d).view(b, 1, self.grid_size_sqrt, self.grid_size_sqrt) 163 | scale_factor = h // self.grid_size_sqrt 164 | y = F.upsample(y, scale_factor=scale_factor, mode='bilinear') 165 | return x * y 166 | 167 | 168 | class GlobalConvolutionalNetwork(nn.Module): 169 | def __init__(self, in_channels, out_channels, kernel_size, use_relu=False): 170 | super().__init__() 171 | 172 | self.conv1 = nn.Sequential(Conv2dBnRelu(in_channels=in_channels, 173 | out_channels=out_channels, 174 | kernel_size=(kernel_size, 1), 175 | use_relu=use_relu, use_padding=True), 176 | Conv2dBnRelu(in_channels=out_channels, 177 | out_channels=out_channels, 178 | kernel_size=(1, kernel_size), 179 | use_relu=use_relu, use_padding=True), 180 | ) 181 | self.conv2 = nn.Sequential(Conv2dBnRelu(in_channels=in_channels, 182 | out_channels=out_channels, 183 | kernel_size=(1, kernel_size), 184 | use_relu=use_relu, use_padding=True), 185 | Conv2dBnRelu(in_channels=out_channels, 186 | out_channels=out_channels, 187 | kernel_size=(kernel_size, 1), 188 | use_relu=use_relu, use_padding=True), 189 | ) 190 | 191 | def forward(self, x): 192 | conv1 = self.conv1(x) 193 | conv2 = self.conv2(x) 194 | return conv1 + conv2 195 | 196 | 197 | class BoundaryRefinement(nn.Module): 198 | def __init__(self, in_channels, out_channels, kernel_size): 199 | super().__init__() 200 | 201 | self.conv = nn.Sequential(Conv2dBnRelu(in_channels=in_channels, 202 | out_channels=out_channels, 203 | kernel_size=(kernel_size, kernel_size), 204 | use_relu=True, use_padding=True), 205 | Conv2dBnRelu(in_channels=in_channels, 206 | out_channels=out_channels, 207 | kernel_size=(kernel_size, kernel_size), 208 | use_relu=False, use_padding=True), 209 | ) 210 | 211 | def forward(self, x): 212 | conv = self.conv(x) 213 | return x + conv 214 | -------------------------------------------------------------------------------- /common_blocks/architectures/classification.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import pretrainedmodels 3 | 4 | class Densenet(nn.Module): 5 | def __init__(self, pretrained): 6 | super().__init__() 7 | self.features = pretrainedmodels.__dict__['densenet201'](num_classes=1000, pretrained=pretrained) 8 | self.classifier = nn.Linear(in_features=1000, out_features=2) 9 | 10 | def forward(self, input): 11 | x = self.features(input) 12 | x = self.classifier(x) 13 | return x 14 | -------------------------------------------------------------------------------- /common_blocks/architectures/encoders.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | from torch import nn 6 | import torchvision 7 | import pretrainedmodels 8 | 9 | 10 | class ResNetEncoders(nn.Module): 11 | def __init__(self, encoder_depth, pretrained=False, pool0=False): 12 | super().__init__() 13 | 14 | if encoder_depth == 18: 15 | self.encoder = torchvision.models.resnet18(pretrained=pretrained) 16 | elif encoder_depth == 34: 17 | self.encoder = torchvision.models.resnet34(pretrained=pretrained) 18 | elif encoder_depth == 50: 19 | self.encoder = torchvision.models.resnet50(pretrained=pretrained) 20 | elif encoder_depth == 101: 21 | self.encoder = torchvision.models.resnet101(pretrained=pretrained) 22 | elif encoder_depth == 152: 23 | self.encoder = torchvision.models.resnet152(pretrained=pretrained) 24 | else: 25 | raise NotImplementedError('only 18, 34, 50, 101, 152 version of Resnet are implemented') 26 | 27 | if pool0: 28 | self.conv1 = nn.Sequential(self.encoder.conv1, 29 | self.encoder.bn1, 30 | self.encoder.relu, 31 | self.encoder.maxpool) 32 | else: 33 | self.conv1 = nn.Sequential(self.encoder.conv1, 34 | self.encoder.bn1, 35 | self.encoder.relu) 36 | 37 | self.encoder2 = self.encoder.layer1 38 | self.encoder3 = self.encoder.layer2 39 | self.encoder4 = self.encoder.layer3 40 | self.encoder5 = self.encoder.layer4 41 | 42 | def forward(self, x): 43 | conv1 = self.conv1(x) 44 | encoder2 = self.encoder2(conv1) 45 | encoder3 = self.encoder3(encoder2) 46 | encoder4 = self.encoder4(encoder3) 47 | encoder5 = self.encoder5(encoder4) 48 | 49 | return encoder2, encoder3, encoder4, encoder5 50 | 51 | 52 | class SeResNetEncoders(nn.Module): 53 | def __init__(self, encoder_depth, pretrained='imagenet', pool0=False): 54 | super().__init__() 55 | 56 | if encoder_depth == 50: 57 | self.encoder = pretrainedmodels.__dict__['se_resnet50'](num_classes=1000, pretrained=pretrained) 58 | elif encoder_depth == 101: 59 | self.encoder = pretrainedmodels.__dict__['se_resnet101'](num_classes=1000, pretrained=pretrained) 60 | elif encoder_depth == 152: 61 | self.encoder = pretrainedmodels.__dict__['se_resnet152'](num_classes=1000, pretrained=pretrained) 62 | else: 63 | raise NotImplementedError('only 50, 101, 152 version of Resnet are implemented') 64 | 65 | if pool0: 66 | self.conv1 = nn.Sequential(self.encoder.layer0.conv1, 67 | self.encoder.layer0.bn1, 68 | self.encoder.layer0.relu1, 69 | self.encoder.layer0.pool0) 70 | else: 71 | self.conv1 = nn.Sequential(self.encoder.layer0.conv1, 72 | self.encoder.layer0.bn1, 73 | self.encoder.layer0.relu1) 74 | 75 | self.encoder2 = self.encoder.layer1 76 | self.encoder3 = self.encoder.layer2 77 | self.encoder4 = self.encoder.layer3 78 | self.encoder5 = self.encoder.layer4 79 | 80 | def forward(self, x): 81 | conv1 = self.conv1(x) 82 | encoder2 = self.encoder2(conv1) 83 | encoder3 = self.encoder3(encoder2) 84 | encoder4 = self.encoder4(encoder3) 85 | encoder5 = self.encoder5(encoder4) 86 | 87 | return encoder2, encoder3, encoder4, encoder5 88 | 89 | 90 | class SeResNetXtEncoders(nn.Module): 91 | def __init__(self, encoder_depth, pretrained='imagenet', pool0=False): 92 | super().__init__() 93 | 94 | if encoder_depth == 50: 95 | self.encoder = pretrainedmodels.__dict__['se_resnext50_32x4d'](num_classes=1000, pretrained=pretrained) 96 | elif encoder_depth == 101: 97 | self.encoder = pretrainedmodels.__dict__['se_resnext101_32x4d'](num_classes=1000, pretrained=pretrained) 98 | else: 99 | raise NotImplementedError('only 50, 101 version of Resnet are implemented') 100 | 101 | if pool0: 102 | self.conv1 = nn.Sequential(self.encoder.layer0.conv1, 103 | self.encoder.layer0.bn1, 104 | self.encoder.layer0.relu1, 105 | self.encoder.layer0.pool) 106 | else: 107 | self.conv1 = nn.Sequential(self.encoder.layer0.conv1, 108 | self.encoder.layer0.bn1, 109 | self.encoder.layer0.relu1) 110 | 111 | self.encoder2 = self.encoder.layer1 112 | self.encoder3 = self.encoder.layer2 113 | self.encoder4 = self.encoder.layer3 114 | self.encoder5 = self.encoder.layer4 115 | 116 | def forward(self, x): 117 | conv1 = self.conv1(x) 118 | encoder2 = self.encoder2(conv1) 119 | encoder3 = self.encoder3(encoder2) 120 | encoder4 = self.encoder4(encoder3) 121 | encoder5 = self.encoder5(encoder4) 122 | 123 | return encoder2, encoder3, encoder4, encoder5 124 | 125 | 126 | class DenseNetEncoders(nn.Module): 127 | def __init__(self, encoder_depth, pretrained='imagenet', pool0=False): 128 | super().__init__() 129 | 130 | if encoder_depth == 121: 131 | self.encoder = pretrainedmodels.__dict__['densenet121'](num_classes=1000, pretrained=pretrained) 132 | elif encoder_depth == 161: 133 | self.encoder = pretrainedmodels.__dict__['densenet161'](num_classes=1000, pretrained=pretrained) 134 | elif encoder_depth == 169: 135 | self.encoder = pretrainedmodels.__dict__['densenet169'](num_classes=1000, pretrained=pretrained) 136 | elif encoder_depth == 201: 137 | self.encoder = pretrainedmodels.__dict__['densenet201'](num_classes=1000, pretrained=pretrained) 138 | else: 139 | raise NotImplementedError('only 121, 161, 169, 201 version of Densenet are implemented') 140 | 141 | if pool0: 142 | self.conv1 = nn.Sequential(self.encoder.features.conv0, 143 | self.encoder.features.norm0, 144 | self.encoder.features.relu0, 145 | self.encoder.features.pool0) 146 | else: 147 | self.conv1 = nn.Sequential(self.encoder.features.conv0, 148 | self.encoder.features.norm0, 149 | self.encoder.features.relu0) 150 | 151 | self.encoder2 = self.encoder.features.denseblock1 152 | self.transition1 = self.encoder.features.transition1 153 | self.encoder3 = self.encoder.features.denseblock2 154 | self.transition2 = self.encoder.features.transition2 155 | self.encoder4 = self.encoder.features.denseblock3 156 | self.transition3 = self.encoder.features.transition3 157 | self.encoder5 = self.encoder.features.denseblock4 158 | 159 | def forward(self, x): 160 | conv1 = self.conv1(x) 161 | encoder2 = self.encoder2(conv1) 162 | transition1 = self.transition1(encoder2) 163 | encoder3 = self.encoder3(transition1) 164 | transition2 = self.transition2(encoder3) 165 | encoder4 = self.encoder4(transition2) 166 | transition3 = self.transition3(encoder4) 167 | encoder5 = self.encoder5(transition3) 168 | 169 | return encoder2, encoder3, encoder4, encoder5 170 | 171 | 172 | def get_encoder_channel_nr(encoder): 173 | encoder_clone = deepcopy(encoder) 174 | x = Variable(torch.ones((1, 3, 256, 256))) 175 | if torch.cuda.is_available(): 176 | encoder_clone = encoder_clone.cuda() 177 | x = x.cuda() 178 | encoder2, encoder3, encoder4, encoder5 = encoder_clone(x) 179 | encoder_channel_nr = [encoder2.shape[1], encoder3.shape[1], encoder4.shape[1], encoder5.shape[1]] 180 | return encoder_channel_nr 181 | -------------------------------------------------------------------------------- /common_blocks/architectures/large_kernel_matters.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from .base import GlobalConvolutionalNetwork, BoundaryRefinement, DeconvConv2dBnRelu 5 | from .encoders import get_encoder_channel_nr 6 | 7 | 8 | class LargeKernelMatters(nn.Module): 9 | """PyTorch LKM model using ResNet(18, 34, 50, 101 or 152) encoder. 10 | 11 | https://arxiv.org/pdf/1703.02719.pdf 12 | """ 13 | 14 | def __init__(self, encoder, num_classes, kernel_size=9, internal_channels=21, use_relu=False, pool0=False, 15 | use_channel_se=False, use_spatial_se=False, reduction_se=4, dropout_2d=0.0): 16 | super().__init__() 17 | 18 | self.dropout_2d = dropout_2d 19 | self.pool0 = pool0 20 | 21 | self.encoder = encoder 22 | encoder_channel_nr = get_encoder_channel_nr(self.encoder) 23 | 24 | self.gcn2 = GlobalConvolutionalNetwork(in_channels=encoder_channel_nr[0], 25 | out_channels=internal_channels, 26 | kernel_size=kernel_size, 27 | use_relu=use_relu) 28 | self.gcn3 = GlobalConvolutionalNetwork(in_channels=encoder_channel_nr[1], 29 | out_channels=internal_channels, 30 | kernel_size=kernel_size, 31 | use_relu=use_relu) 32 | self.gcn4 = GlobalConvolutionalNetwork(in_channels=encoder_channel_nr[2], 33 | out_channels=internal_channels, 34 | kernel_size=kernel_size, 35 | use_relu=use_relu) 36 | self.gcn5 = GlobalConvolutionalNetwork(in_channels=encoder_channel_nr[3], 37 | out_channels=internal_channels, 38 | kernel_size=kernel_size, 39 | use_relu=use_relu) 40 | self.enc_br2 = BoundaryRefinement(in_channels=internal_channels, 41 | out_channels=internal_channels, 42 | kernel_size=3) 43 | 44 | self.enc_br3 = BoundaryRefinement(in_channels=internal_channels, 45 | out_channels=internal_channels, 46 | kernel_size=3) 47 | 48 | self.enc_br4 = BoundaryRefinement(in_channels=internal_channels, 49 | out_channels=internal_channels, 50 | kernel_size=3) 51 | 52 | self.enc_br5 = BoundaryRefinement(in_channels=internal_channels, 53 | out_channels=internal_channels, 54 | kernel_size=3) 55 | 56 | self.dec_br1 = BoundaryRefinement(in_channels=internal_channels, 57 | out_channels=internal_channels, 58 | kernel_size=3) 59 | 60 | self.dec_br2 = BoundaryRefinement(in_channels=internal_channels, 61 | out_channels=internal_channels, 62 | kernel_size=3) 63 | 64 | self.dec_br3 = BoundaryRefinement(in_channels=internal_channels, 65 | out_channels=internal_channels, 66 | kernel_size=3) 67 | 68 | self.dec_br4 = BoundaryRefinement(in_channels=internal_channels, 69 | out_channels=internal_channels, 70 | kernel_size=3) 71 | 72 | self.deconv5 = DeconvConv2dBnRelu(in_channels=internal_channels, out_channels=internal_channels, 73 | use_channel_se=use_channel_se, use_spatial_se=use_spatial_se, 74 | reduction=reduction_se) 75 | self.deconv4 = DeconvConv2dBnRelu(in_channels=internal_channels, out_channels=internal_channels, 76 | use_channel_se=use_channel_se, use_spatial_se=use_spatial_se, 77 | reduction=reduction_se) 78 | self.deconv3 = DeconvConv2dBnRelu(in_channels=internal_channels, out_channels=internal_channels, 79 | use_channel_se=use_channel_se, use_spatial_se=use_spatial_se, 80 | reduction=reduction_se) 81 | self.deconv2 = DeconvConv2dBnRelu(in_channels=internal_channels, out_channels=internal_channels, 82 | use_channel_se=use_channel_se, use_spatial_se=use_spatial_se, 83 | reduction=reduction_se) 84 | self.deconv1 = DeconvConv2dBnRelu(in_channels=internal_channels, out_channels=internal_channels, 85 | use_channel_se=use_channel_se, use_spatial_se=use_spatial_se, 86 | reduction=reduction_se) 87 | self.dec_br0_1 = BoundaryRefinement(in_channels=internal_channels, 88 | out_channels=internal_channels, 89 | kernel_size=3) 90 | self.dec_br0_2 = BoundaryRefinement(in_channels=internal_channels, 91 | out_channels=internal_channels, 92 | kernel_size=3) 93 | 94 | self.final = nn.Conv2d(internal_channels, num_classes, kernel_size=1, padding=0) 95 | 96 | def forward(self, x): 97 | encoder2, encoder3, encoder4, encoder5 = self.encoder(x) 98 | encoder5 = F.dropout2d(encoder5, p=self.dropout_2d) 99 | 100 | gcn2 = self.enc_br2(self.gcn2(encoder2)) 101 | gcn3 = self.enc_br3(self.gcn3(encoder3)) 102 | gcn4 = self.enc_br4(self.gcn4(encoder4)) 103 | gcn5 = self.enc_br5(self.gcn5(encoder5)) 104 | 105 | decoder5 = self.deconv5(gcn5) 106 | decoder4 = self.deconv4(self.dec_br4(decoder5 + gcn4)) 107 | decoder3 = self.deconv3(self.dec_br3(decoder4 + gcn3)) 108 | decoder2 = self.dec_br1(self.deconv2(self.dec_br2(decoder3 + gcn2))) 109 | 110 | if self.pool0: 111 | decoder2 = self.dec_br0_2(self.deconv1(self.dec_br0_1(decoder2))) 112 | 113 | return self.final(decoder2) 114 | -------------------------------------------------------------------------------- /common_blocks/architectures/pspnet.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | import torch 4 | 5 | from .base import Conv2dBnRelu 6 | from .encoders import get_encoder_channel_nr 7 | 8 | 9 | class PSPModule(nn.Module): 10 | def __init__(self, features, out_features=1024, sizes=(1, 2, 3, 6)): 11 | super().__init__() 12 | self.stages = [] 13 | self.stages = nn.ModuleList([self._make_stage(features, size) for size in sizes]) 14 | self.bottleneck = nn.Conv2d(features * (len(sizes) + 1), out_features, kernel_size=1) 15 | self.relu = nn.ReLU() 16 | 17 | def _make_stage(self, features, size): 18 | prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) 19 | conv = nn.Conv2d(features, features, kernel_size=1, bias=False) 20 | return nn.Sequential(prior, conv) 21 | 22 | def forward(self, feats): 23 | h, w = feats.size(2), feats.size(3) 24 | priors = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear') for stage in self.stages] + [feats] 25 | bottle = self.bottleneck(torch.cat(priors, 1)) 26 | return self.relu(bottle) 27 | 28 | 29 | class PSPUpsample(nn.Module): 30 | def __init__(self, in_channels, out_channels): 31 | super().__init__() 32 | self.conv = nn.Sequential( 33 | nn.Conv2d(in_channels, out_channels, 3, padding=1), 34 | nn.BatchNorm2d(out_channels), 35 | nn.PReLU() 36 | ) 37 | 38 | def forward(self, x): 39 | p = F.upsample(input=x, scale_factor=2, mode='bilinear') 40 | return self.conv(p) 41 | 42 | 43 | class PSPNet(nn.Module): 44 | def __init__(self, 45 | encoder, 46 | num_classes=2, 47 | sizes=(1, 2, 3, 6), 48 | dropout_2d=0.2, 49 | use_hypercolumn=False, 50 | pool0=False): 51 | super().__init__() 52 | self.num_classes = num_classes 53 | self.dropout_2d = dropout_2d 54 | self.use_hypercolumn = use_hypercolumn 55 | self.pool0 = pool0 56 | 57 | self.encoder = encoder 58 | encoder_channel_nr = get_encoder_channel_nr(self.encoder) 59 | bottom_channel_nr = encoder_channel_nr[3] 60 | 61 | self.psp = PSPModule(bottom_channel_nr, bottom_channel_nr, sizes) 62 | 63 | self.up4 = PSPUpsample(bottom_channel_nr, bottom_channel_nr // 2) 64 | self.up3 = PSPUpsample(bottom_channel_nr // 2, bottom_channel_nr // 4) 65 | self.up2 = PSPUpsample(bottom_channel_nr // 4, bottom_channel_nr // 8) 66 | self.up1 = PSPUpsample(bottom_channel_nr // 8, bottom_channel_nr // 16) 67 | 68 | if self.use_hypercolumn: 69 | self.up0 = PSPUpsample(15 * bottom_channel_nr // 16, 15 * bottom_channel_nr // 16) 70 | self.final = nn.Sequential(Conv2dBnRelu(15 * bottom_channel_nr // 16, bottom_channel_nr // 16), 71 | nn.Conv2d(bottom_channel_nr // 16, num_classes, kernel_size=1, padding=0)) 72 | else: 73 | self.up0 = PSPUpsample(bottom_channel_nr // 16, bottom_channel_nr // 16) 74 | self.final = nn.Sequential(Conv2dBnRelu(bottom_channel_nr // 16, bottom_channel_nr // 16), 75 | nn.Conv2d(bottom_channel_nr // 16, num_classes, kernel_size=1, padding=0)) 76 | 77 | def forward(self, x): 78 | encoder2, encoder3, encoder4, encoder5 = self.encoder(x) 79 | encoder5 = F.dropout2d(encoder5, p=self.dropout_2d) 80 | 81 | psp = self.psp(encoder5) 82 | up4 = self.up4(psp) 83 | up3 = self.up3(up4) 84 | up2 = self.up2(up3) 85 | up1 = self.up1(up2) 86 | if self.use_hypercolumn: 87 | hypercolumn = torch.cat([up1, 88 | F.upsample(up2, scale_factor=2, mode='bilinear'), 89 | F.upsample(up3, scale_factor=4, mode='bilinear'), 90 | F.upsample(up4, scale_factor=8, mode='bilinear'), 91 | ], 1) 92 | drop = F.dropout2d(hypercolumn, p=self.dropout_2d) 93 | else: 94 | drop = F.dropout2d(up1, p=self.dropout_2d) 95 | 96 | if self.pool0: 97 | drop = self.up0(drop) 98 | return self.final(drop) 99 | -------------------------------------------------------------------------------- /common_blocks/architectures/unet.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | import torch 4 | 5 | from .base import Conv2dBnRelu, DecoderBlock 6 | from .encoders import get_encoder_channel_nr 7 | 8 | """ 9 | This script has been taken (and modified) from : 10 | https://github.com/ternaus/TernausNet 11 | 12 | @ARTICLE{arXiv:1801.05746, 13 | author = {V. Iglovikov and A. Shvets}, 14 | title = {TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation}, 15 | journal = {ArXiv e-prints}, 16 | eprint = {1801.05746}, 17 | year = 2018 18 | } 19 | """ 20 | 21 | 22 | class UNet(nn.Module): 23 | def __init__(self, encoder, num_classes, dropout_2d=0.0, use_hypercolumn=False, pool0=False): 24 | super().__init__() 25 | self.num_classes = num_classes 26 | self.dropout_2d = dropout_2d 27 | self.use_hypercolumn = use_hypercolumn 28 | self.pool0 = pool0 29 | 30 | self.encoder = encoder 31 | encoder_channel_nr = get_encoder_channel_nr(self.encoder) 32 | 33 | self.center = nn.Sequential(Conv2dBnRelu(encoder_channel_nr[3], encoder_channel_nr[3]), 34 | Conv2dBnRelu(encoder_channel_nr[3], encoder_channel_nr[2]), 35 | nn.AvgPool2d(kernel_size=2, stride=2) 36 | ) 37 | 38 | self.dec5 = DecoderBlock(encoder_channel_nr[3] + encoder_channel_nr[2], 39 | encoder_channel_nr[3], 40 | encoder_channel_nr[3] // 8) 41 | 42 | self.dec4 = DecoderBlock(encoder_channel_nr[2] + encoder_channel_nr[3] // 8, 43 | encoder_channel_nr[3] // 2, 44 | encoder_channel_nr[3] // 8) 45 | self.dec3 = DecoderBlock(encoder_channel_nr[1] + encoder_channel_nr[3] // 8, 46 | encoder_channel_nr[3] // 4, 47 | encoder_channel_nr[3] // 8) 48 | self.dec2 = DecoderBlock(encoder_channel_nr[0] + encoder_channel_nr[3] // 8, 49 | encoder_channel_nr[3] // 8, 50 | encoder_channel_nr[3] // 8) 51 | self.dec1 = DecoderBlock(encoder_channel_nr[3] // 8, 52 | encoder_channel_nr[3] // 16, 53 | encoder_channel_nr[3] // 8) 54 | 55 | self.dec0 = DecoderBlock(encoder_channel_nr[3] // 8, 56 | encoder_channel_nr[3] // 16, 57 | encoder_channel_nr[3] // 8) 58 | 59 | if self.use_hypercolumn: 60 | self.dec0 = DecoderBlock(5 * encoder_channel_nr[3] // 8, 61 | encoder_channel_nr[3] // 8, 62 | 5 * encoder_channel_nr[3] // 8) 63 | self.final = nn.Sequential(Conv2dBnRelu(5 * encoder_channel_nr[3] // 8, encoder_channel_nr[3] // 8), 64 | nn.Conv2d(encoder_channel_nr[3] // 8, num_classes, kernel_size=1, padding=0)) 65 | else: 66 | self.dec0 = DecoderBlock(encoder_channel_nr[3] // 8, 67 | encoder_channel_nr[3] // 8, 68 | encoder_channel_nr[3] // 8) 69 | self.final = nn.Sequential(Conv2dBnRelu(encoder_channel_nr[3] // 8, encoder_channel_nr[3] // 8), 70 | nn.Conv2d(encoder_channel_nr[3] // 8, num_classes, kernel_size=1, padding=0)) 71 | 72 | def forward(self, x): 73 | encoder2, encoder3, encoder4, encoder5 = self.encoder(x) 74 | encoder5 = F.dropout2d(encoder5, p=self.dropout_2d) 75 | 76 | center = self.center(encoder5) 77 | 78 | dec5 = self.dec5(center, encoder5) 79 | dec4 = self.dec4(dec5, encoder4) 80 | dec3 = self.dec3(dec4, encoder3) 81 | dec2 = self.dec2(dec3, encoder2) 82 | dec1 = self.dec1(dec2) 83 | 84 | if self.use_hypercolumn: 85 | dec1 = torch.cat([dec1, 86 | F.upsample(dec2, scale_factor=2, mode='bilinear'), 87 | F.upsample(dec3, scale_factor=4, mode='bilinear'), 88 | F.upsample(dec4, scale_factor=8, mode='bilinear'), 89 | F.upsample(dec5, scale_factor=16, mode='bilinear'), 90 | ], 1) 91 | 92 | if self.pool0: 93 | dec1 = self.dec0(dec1) 94 | 95 | return self.final(dec1) 96 | -------------------------------------------------------------------------------- /common_blocks/augmentation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from imgaug import augmenters as iaa 4 | from toolkit.utils import reseed 5 | 6 | from common_blocks.utils.misc import get_crop_pad_sequence 7 | 8 | affine_seq = iaa.Sequential([ 9 | iaa.Fliplr(0.5), 10 | iaa.Flipud(0.5), 11 | iaa.Sometimes(0.5, iaa.CropAndPad(percent=(0.0, 1.0), pad_mode='wrap')) 12 | ], random_order=True) 13 | 14 | intensity_seq = iaa.Sequential([ 15 | iaa.Noop(), 16 | iaa.Sometimes(0.3, iaa.ContrastNormalization((0.5, 1.5))), 17 | iaa.OneOf([ 18 | iaa.Noop(), 19 | iaa.OneOf([ 20 | iaa.Add((-10, 10)), 21 | iaa.AddElementwise((-10, 10)), 22 | iaa.Multiply((0.95, 1.05)), 23 | iaa.MultiplyElementwise((0.95, 1.05)), 24 | ]), 25 | iaa.GaussianBlur(sigma=(0.0, 3.0)), 26 | ]) 27 | ], random_order=False) 28 | 29 | tta_intensity_seq = iaa.Sequential([ 30 | iaa.Sometimes(0.3, iaa.ContrastNormalization((0.5, 1.5))) 31 | ], random_order=False) 32 | 33 | 34 | def resize_seq(resize_target_size): 35 | seq = iaa.Sequential([ 36 | affine_seq, 37 | iaa.Scale({'height': resize_target_size, 'width': resize_target_size}), 38 | ], random_order=False) 39 | return seq 40 | 41 | 42 | def resize_pad_seq(resize_target_size, pad_method, pad_size): 43 | seq = iaa.Sequential([ 44 | affine_seq, 45 | iaa.Scale({'height': resize_target_size, 'width': resize_target_size}), 46 | PadFixed(pad=(pad_size, pad_size), pad_method=pad_method), 47 | ], random_order=False) 48 | return seq 49 | 50 | 51 | def resize_to_fit_net(resize_target_size): 52 | seq = iaa.Sequential(iaa.Scale({'height': resize_target_size, 'width': resize_target_size})) 53 | return seq 54 | 55 | 56 | def pad_to_fit_net(divisor, pad_mode, rest_of_augs=iaa.Noop()): 57 | seq = iaa.Sequential(InferencePad(divisor, pad_mode), rest_of_augs) 58 | return seq 59 | 60 | 61 | class PadFixed(iaa.Augmenter): 62 | PAD_FUNCTION = {'reflect': cv2.BORDER_REFLECT_101, 63 | 'edge': cv2.BORDER_REPLICATE, 64 | } 65 | 66 | def __init__(self, pad=None, pad_method=None, name=None, deterministic=False, random_state=None): 67 | super().__init__(name, deterministic, random_state) 68 | self.pad = pad 69 | self.pad_method = pad_method 70 | 71 | def _augment_images(self, images, random_state, parents, hooks): 72 | result = [] 73 | for i, image in enumerate(images): 74 | image_pad = self._pad(image) 75 | result.append(image_pad) 76 | return result 77 | 78 | def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): 79 | result = [] 80 | return result 81 | 82 | def _pad(self, img): 83 | img_ = img.copy() 84 | 85 | if self._is_expanded_grey_format(img): 86 | img_ = np.squeeze(img_, axis=-1) 87 | 88 | h_pad, w_pad = self.pad 89 | img_ = cv2.copyMakeBorder(img_.copy(), h_pad, h_pad, w_pad, w_pad, PadFixed.PAD_FUNCTION[self.pad_method]) 90 | 91 | if self._is_expanded_grey_format(img): 92 | img_ = np.expand_dims(img_, axis=-1) 93 | 94 | return img_ 95 | 96 | def get_parameters(self): 97 | return [] 98 | 99 | def _is_expanded_grey_format(self, img): 100 | if len(img.shape) == 3 and img.shape[2] == 1: 101 | return True 102 | else: 103 | return False 104 | 105 | 106 | def test_time_augmentation_transform(image, tta_parameters): 107 | if tta_parameters['ud_flip']: 108 | image = np.flipud(image) 109 | if tta_parameters['lr_flip']: 110 | image = np.fliplr(image) 111 | image = rotate(image, tta_parameters['rotation']) 112 | 113 | if tta_parameters['color_shift']: 114 | tta_intensity = reseed(tta_intensity_seq, deterministic=False) 115 | image = tta_intensity.augment_image(image) 116 | 117 | return image 118 | 119 | 120 | def test_time_augmentation_inverse_transform(image, tta_parameters): 121 | image = per_channel_rotation(image.copy(), -1 * tta_parameters['rotation']) 122 | 123 | if tta_parameters['lr_flip']: 124 | image = per_channel_fliplr(image.copy()) 125 | if tta_parameters['ud_flip']: 126 | image = per_channel_flipud(image.copy()) 127 | return image 128 | 129 | 130 | def per_channel_flipud(x): 131 | x_ = x.copy() 132 | for i, channel in enumerate(x): 133 | x_[i, :, :] = np.flipud(channel) 134 | return x_ 135 | 136 | 137 | def per_channel_fliplr(x): 138 | x_ = x.copy() 139 | for i, channel in enumerate(x): 140 | x_[i, :, :] = np.fliplr(channel) 141 | return x_ 142 | 143 | 144 | def per_channel_rotation(x, angle): 145 | return rotate(x, angle, axes=(1, 2)) 146 | 147 | 148 | def rotate(image, angle, axes=(0, 1)): 149 | if angle % 90 != 0: 150 | raise Exception('Angle must be a multiple of 90.') 151 | k = angle // 90 152 | return np.rot90(image, k, axes=axes) 153 | 154 | 155 | class RandomCropFixedSize(iaa.Augmenter): 156 | def __init__(self, px=None, name=None, deterministic=False, random_state=None): 157 | super(RandomCropFixedSize, self).__init__(name=name, deterministic=deterministic, random_state=random_state) 158 | self.px = px 159 | if isinstance(self.px, tuple): 160 | self.px_h, self.px_w = self.px 161 | elif isinstance(self.px, int): 162 | self.px_h = self.px 163 | self.px_w = self.px 164 | else: 165 | raise NotImplementedError 166 | 167 | def _augment_images(self, images, random_state, parents, hooks): 168 | 169 | result = [] 170 | seeds = random_state.randint(0, 10 ** 6, (len(images),)) 171 | for i, image in enumerate(images): 172 | seed = seeds[i] 173 | image_cr = self._random_crop(seed, image) 174 | result.append(image_cr) 175 | return result 176 | 177 | def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): 178 | result = [] 179 | return result 180 | 181 | def _random_crop(self, seed, image): 182 | height, width = image.shape[:2] 183 | 184 | np.random.seed(seed) 185 | if height > self.px_h: 186 | crop_top = np.random.randint(height - self.px_h) 187 | elif height == self.px_h: 188 | crop_top = 0 189 | else: 190 | raise ValueError("To big crop height") 191 | crop_bottom = crop_top + self.px_h 192 | 193 | np.random.seed(seed + 1) 194 | if width > self.px_w: 195 | crop_left = np.random.randint(width - self.px_w) 196 | elif width == self.px_w: 197 | crop_left = 0 198 | else: 199 | raise ValueError("To big crop width") 200 | crop_right = crop_left + self.px_w 201 | 202 | if len(image.shape) == 2: 203 | image_cropped = image[crop_top:crop_bottom, crop_left:crop_right] 204 | else: 205 | image_cropped = image[crop_top:crop_bottom, crop_left:crop_right, :] 206 | return image_cropped 207 | 208 | def get_parameters(self): 209 | return [] 210 | 211 | 212 | class InferencePad(iaa.Augmenter): 213 | def __init__(self, divisor=2, pad_mode='symmetric', name=None, deterministic=False, random_state=None): 214 | super(InferencePad, self).__init__(name=name, deterministic=deterministic, random_state=random_state) 215 | self.divisor = divisor 216 | self.pad_mode = pad_mode 217 | 218 | def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): 219 | return keypoints_on_images 220 | 221 | def _augment_images(self, images, random_state, parents, hooks): 222 | 223 | result = [] 224 | for i, image in enumerate(images): 225 | image_padded = self._pad_image(image) 226 | result.append(image_padded) 227 | return result 228 | 229 | def _pad_image(self, image): 230 | height = image.shape[0] 231 | width = image.shape[1] 232 | 233 | pad_sequence = self._get_pad_sequence(height, width) 234 | augmenter = iaa.Pad(px=pad_sequence, keep_size=False, pad_mode=self.pad_mode) 235 | return augmenter.augment_image(image) 236 | 237 | def _get_pad_sequence(self, height, width): 238 | pad_vertical = self._get_pad(height) 239 | pad_horizontal = self._get_pad(width) 240 | return get_crop_pad_sequence(pad_vertical, pad_horizontal) 241 | 242 | def _get_pad(self, dim): 243 | if dim % self.divisor == 0: 244 | return 0 245 | else: 246 | return self.divisor - dim % self.divisor 247 | 248 | def get_parameters(self): 249 | return [self.divisor, self.pad_mode] 250 | -------------------------------------------------------------------------------- /common_blocks/callbacks.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime, timedelta 3 | from functools import partial 4 | from tempfile import TemporaryDirectory 5 | 6 | import neptune 7 | import numpy as np 8 | import torch 9 | from PIL import Image 10 | from steppy.adapter import Adapter, E 11 | from steppy.base import Step 12 | from toolkit.pytorch_transformers.utils import Averager, persist_torch_model 13 | from toolkit.pytorch_transformers.validation import score_model 14 | from torch.autograd import Variable 15 | from torch.autograd import Variable as V 16 | from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau 17 | 18 | from common_blocks.utils.io import read_masks 19 | from common_blocks.utils.misc import OneCycle, get_list_of_image_predictions, get_logger, make_apply_transformer, \ 20 | sigmoid, softmax 21 | from .metrics import intersection_over_union_thresholds 22 | from .postprocessing import binarize, label, resize_image 23 | 24 | logger = get_logger() 25 | 26 | Y_COLUMN = 'file_path_mask' 27 | ORIGINAL_SIZE = (768, 768) 28 | THRESHOLD = 0.5 29 | NUM_THREADS = 300 30 | 31 | 32 | class Callback: 33 | def __init__(self): 34 | self.epoch_id = None 35 | self.batch_id = None 36 | 37 | self.model = None 38 | self.optimizer = None 39 | self.loss_function = None 40 | self.output_names = None 41 | self.validation_datagen = None 42 | self.lr_scheduler = None 43 | 44 | def set_params(self, transformer, validation_datagen, *args, **kwargs): 45 | self.model = transformer.model 46 | self.optimizer = transformer.optimizer 47 | self.loss_function = transformer.loss_function 48 | self.output_names = transformer.output_names 49 | self.validation_datagen = validation_datagen 50 | self.transformer = transformer 51 | 52 | def on_train_begin(self, *args, **kwargs): 53 | self.epoch_id = 0 54 | self.batch_id = 0 55 | 56 | def on_train_end(self, *args, **kwargs): 57 | pass 58 | 59 | def on_epoch_begin(self, *args, **kwargs): 60 | pass 61 | 62 | def on_epoch_end(self, *args, **kwargs): 63 | self.epoch_id += 1 64 | 65 | def training_break(self, *args, **kwargs): 66 | return False 67 | 68 | def on_batch_begin(self, *args, **kwargs): 69 | pass 70 | 71 | def on_batch_end(self, *args, **kwargs): 72 | self.batch_id += 1 73 | 74 | def get_validation_loss(self): 75 | if self.epoch_id not in self.transformer.validation_loss.keys(): 76 | self.transformer.validation_loss[self.epoch_id] = score_model(self.model, self.loss_function, 77 | self.validation_datagen) 78 | 79 | return self.transformer.validation_loss[self.epoch_id] 80 | 81 | 82 | class CallbackList: 83 | def __init__(self, callbacks=None): 84 | if callbacks is None: 85 | self.callbacks = [] 86 | elif isinstance(callbacks, Callback): 87 | self.callbacks = [callbacks] 88 | else: 89 | self.callbacks = callbacks 90 | 91 | def __len__(self): 92 | return len(self.callbacks) 93 | 94 | def set_params(self, *args, **kwargs): 95 | for callback in self.callbacks: 96 | callback.set_params(*args, **kwargs) 97 | 98 | def on_train_begin(self, *args, **kwargs): 99 | for callback in self.callbacks: 100 | callback.on_train_begin(*args, **kwargs) 101 | 102 | def on_train_end(self, *args, **kwargs): 103 | for callback in self.callbacks: 104 | callback.on_train_end(*args, **kwargs) 105 | 106 | def on_epoch_begin(self, *args, **kwargs): 107 | for callback in self.callbacks: 108 | callback.on_epoch_begin(*args, **kwargs) 109 | 110 | def on_epoch_end(self, *args, **kwargs): 111 | for callback in self.callbacks: 112 | callback.on_epoch_end(*args, **kwargs) 113 | 114 | def training_break(self, *args, **kwargs): 115 | callback_out = [callback.training_break(*args, **kwargs) for callback in self.callbacks] 116 | return any(callback_out) 117 | 118 | def on_batch_begin(self, *args, **kwargs): 119 | for callback in self.callbacks: 120 | callback.on_batch_begin(*args, **kwargs) 121 | 122 | def on_batch_end(self, *args, **kwargs): 123 | for callback in self.callbacks: 124 | callback.on_batch_end(*args, **kwargs) 125 | 126 | 127 | class TrainingMonitor(Callback): 128 | def __init__(self, epoch_every=None, batch_every=None): 129 | super().__init__() 130 | self.epoch_loss_averagers = {} 131 | if epoch_every == 0: 132 | self.epoch_every = False 133 | else: 134 | self.epoch_every = epoch_every 135 | if batch_every == 0: 136 | self.batch_every = False 137 | else: 138 | self.batch_every = batch_every 139 | 140 | def on_train_begin(self, *args, **kwargs): 141 | self.epoch_loss_averagers = {} 142 | self.epoch_id = 0 143 | self.batch_id = 0 144 | 145 | def on_epoch_end(self, *args, **kwargs): 146 | for name, averager in self.epoch_loss_averagers.items(): 147 | epoch_avg_loss = averager.value 148 | averager.reset() 149 | if self.epoch_every and ((self.epoch_id % self.epoch_every) == 0): 150 | logger.info('epoch {0} {1}: {2:.5f}'.format(self.epoch_id, name, epoch_avg_loss)) 151 | self.epoch_id += 1 152 | 153 | def on_batch_end(self, metrics, *args, **kwargs): 154 | for name, loss in metrics.items(): 155 | loss = loss.data.cpu().numpy()[0] 156 | 157 | if name in self.epoch_loss_averagers.keys(): 158 | self.epoch_loss_averagers[name].send(loss) 159 | else: 160 | self.epoch_loss_averagers[name] = Averager() 161 | self.epoch_loss_averagers[name].send(loss) 162 | 163 | if self.batch_every and ((self.batch_id % self.batch_every) == 0): 164 | logger.info('epoch {0} batch {1} {2}: {3:.5f}'.format(self.epoch_id, self.batch_id, name, loss)) 165 | self.batch_id += 1 166 | 167 | 168 | class ExponentialLRScheduler(Callback): 169 | def __init__(self, gamma, epoch_every=1, batch_every=None): 170 | super().__init__() 171 | self.gamma = gamma 172 | if epoch_every == 0: 173 | self.epoch_every = False 174 | else: 175 | self.epoch_every = epoch_every 176 | if batch_every == 0: 177 | self.batch_every = False 178 | else: 179 | self.batch_every = batch_every 180 | 181 | def set_params(self, transformer, validation_datagen, *args, **kwargs): 182 | self.validation_datagen = validation_datagen 183 | self.model = transformer.model 184 | self.optimizer = transformer.optimizer 185 | self.loss_function = transformer.loss_function 186 | self.lr_scheduler = ExponentialLR(self.optimizer, self.gamma, last_epoch=-1) 187 | 188 | def on_train_begin(self, *args, **kwargs): 189 | self.epoch_id = 0 190 | self.batch_id = 0 191 | logger.info('initial lr: {0}'.format(self.optimizer.state_dict()['param_groups'][0]['initial_lr'])) 192 | 193 | def on_epoch_end(self, *args, **kwargs): 194 | if self.epoch_every and (((self.epoch_id + 1) % self.epoch_every) == 0): 195 | self.lr_scheduler.step() 196 | logger.info('epoch {0} current lr: {1}'.format(self.epoch_id + 1, 197 | self.optimizer.state_dict()['param_groups'][0]['lr'])) 198 | self.epoch_id += 1 199 | 200 | def on_batch_end(self, *args, **kwargs): 201 | if self.batch_every and ((self.batch_id % self.batch_every) == 0): 202 | self.lr_scheduler.step() 203 | logger.info('epoch {0} batch {1} current lr: {2}'.format( 204 | self.epoch_id + 1, self.batch_id + 1, self.optimizer.state_dict()['param_groups'][0]['lr'])) 205 | self.batch_id += 1 206 | 207 | 208 | class ReduceLROnPlateauScheduler(Callback): 209 | def __init__(self, metric_name, minimize, reduce_factor, reduce_patience, min_lr): 210 | super().__init__() 211 | self.ctx = neptune.Context() 212 | self.ctx.channel_reset('Learning Rate') 213 | self.metric_name = metric_name 214 | self.minimize = minimize 215 | self.reduce_factor = reduce_factor 216 | self.reduce_patience = reduce_patience 217 | self.min_lr = min_lr 218 | 219 | def set_params(self, transformer, validation_datagen, *args, **kwargs): 220 | super().set_params(transformer, validation_datagen) 221 | self.validation_datagen = validation_datagen 222 | self.model = transformer.model 223 | self.optimizer = transformer.optimizer 224 | self.loss_function = transformer.loss_function 225 | self.lr_scheduler = ReduceLROnPlateau(optimizer=self.optimizer, 226 | mode='min' if self.minimize else 'max', 227 | factor=self.reduce_factor, 228 | patience=self.reduce_patience, 229 | min_lr=self.min_lr) 230 | 231 | def on_train_begin(self, *args, **kwargs): 232 | self.epoch_id = 0 233 | self.batch_id = 0 234 | 235 | def on_epoch_end(self, *args, **kwargs): 236 | self.model.eval() 237 | val_loss = self.get_validation_loss() 238 | metric = val_loss[self.metric_name] 239 | metric = metric.data.cpu().numpy()[0] 240 | self.model.train() 241 | 242 | self.lr_scheduler.step(metrics=metric, epoch=self.epoch_id) 243 | logger.info('epoch {0} current lr: {1}'.format(self.epoch_id + 1, 244 | self.optimizer.state_dict()['param_groups'][0]['lr'])) 245 | self.ctx.channel_send('Learning Rate', x=self.epoch_id, 246 | y=self.optimizer.state_dict()['param_groups'][0]['lr']) 247 | 248 | self.epoch_id += 1 249 | 250 | 251 | class InitialLearningRateFinder(Callback): 252 | def __init__(self, min_lr=1e-8, multipy_factor=1.05, add_factor=0.0): 253 | super().__init__() 254 | self.ctx = neptune.Context() 255 | self.ctx.channel_reset('Learning Rate Finder') 256 | self.min_lr = min_lr 257 | self.multipy_factor = multipy_factor 258 | self.add_factor = add_factor 259 | 260 | def set_params(self, transformer, validation_datagen, *args, **kwargs): 261 | super().set_params(transformer, validation_datagen) 262 | self.validation_datagen = validation_datagen 263 | self.model = transformer.model 264 | self.optimizer = transformer.optimizer 265 | self.loss_function = transformer.loss_function 266 | 267 | def on_train_begin(self, *args, **kwargs): 268 | self.epoch_id = 0 269 | self.batch_id = 0 270 | 271 | for param_group in self.optimizer.param_groups: 272 | param_group['lr'] = self.min_lr 273 | 274 | def on_batch_end(self, metrics, *args, **kwargs): 275 | for name, loss in metrics.items(): 276 | loss = loss.data.cpu().numpy()[0] 277 | current_lr = self.optimizer.state_dict()['param_groups'][0]['lr'] 278 | logger.info('Learning Rate {} Loss {})'.format(current_lr, loss)) 279 | self.ctx.channel_send('Learning Rate Finder', x=self.batch_id, y=current_lr) 280 | self.ctx.channel_send('Loss', x=self.batch_id, y=loss) 281 | 282 | for param_group in self.optimizer.param_groups: 283 | param_group['lr'] = current_lr * self.multipy_factor + self.add_factor 284 | self.batch_id += 1 285 | 286 | 287 | class ExperimentTiming(Callback): 288 | def __init__(self, epoch_every=None, batch_every=None): 289 | super().__init__() 290 | if epoch_every == 0: 291 | self.epoch_every = False 292 | else: 293 | self.epoch_every = epoch_every 294 | if batch_every == 0: 295 | self.batch_every = False 296 | else: 297 | self.batch_every = batch_every 298 | self.batch_start = None 299 | self.epoch_start = None 300 | self.current_sum = None 301 | self.current_mean = None 302 | 303 | def on_train_begin(self, *args, **kwargs): 304 | self.epoch_id = 0 305 | self.batch_id = 0 306 | logger.info('starting training...') 307 | 308 | def on_train_end(self, *args, **kwargs): 309 | logger.info('training finished') 310 | 311 | def on_epoch_begin(self, *args, **kwargs): 312 | if self.epoch_id > 0: 313 | epoch_time = datetime.now() - self.epoch_start 314 | if self.epoch_every: 315 | if (self.epoch_id % self.epoch_every) == 0: 316 | logger.info('epoch {0} time {1}'.format(self.epoch_id - 1, str(epoch_time)[:-7])) 317 | self.epoch_start = datetime.now() 318 | self.current_sum = timedelta() 319 | self.current_mean = timedelta() 320 | logger.info('epoch {0} ...'.format(self.epoch_id)) 321 | 322 | def on_batch_begin(self, *args, **kwargs): 323 | if self.batch_id > 0: 324 | current_delta = datetime.now() - self.batch_start 325 | self.current_sum += current_delta 326 | self.current_mean = self.current_sum / self.batch_id 327 | if self.batch_every: 328 | if self.batch_id > 0 and (((self.batch_id - 1) % self.batch_every) == 0): 329 | logger.info('epoch {0} average batch time: {1}'.format(self.epoch_id, str(self.current_mean)[:-5])) 330 | if self.batch_every: 331 | if self.batch_id == 0 or self.batch_id % self.batch_every == 0: 332 | logger.info('epoch {0} batch {1} ...'.format(self.epoch_id, self.batch_id)) 333 | self.batch_start = datetime.now() 334 | 335 | 336 | class NeptuneMonitor(Callback): 337 | def __init__(self, image_nr, image_resize, image_every, model_name): 338 | super().__init__() 339 | self.model_name = model_name 340 | self.ctx = neptune.Context() 341 | self.epoch_loss_averager = Averager() 342 | self.image_resize = image_resize 343 | self.image_every = image_every 344 | self.image_nr = image_nr 345 | 346 | def on_train_begin(self, *args, **kwargs): 347 | self.epoch_loss_averagers = {} 348 | self.epoch_id = 0 349 | self.batch_id = 0 350 | 351 | def on_batch_end(self, metrics, *args, **kwargs): 352 | for name, loss in metrics.items(): 353 | loss = loss.data.cpu().numpy()[0] 354 | 355 | if name in self.epoch_loss_averagers.keys(): 356 | self.epoch_loss_averagers[name].send(loss) 357 | else: 358 | self.epoch_loss_averagers[name] = Averager() 359 | self.epoch_loss_averagers[name].send(loss) 360 | 361 | self.ctx.channel_send(name, loss) 362 | self.batch_id += 1 363 | 364 | def on_epoch_end(self, *args, **kwargs): 365 | self._send_numeric_channels() 366 | if self.image_every is not None and self.epoch_id % self.image_every == 0: 367 | self._send_image_channels() 368 | self.epoch_id += 1 369 | 370 | def _send_numeric_channels(self, *args, **kwargs): 371 | for name, averager in self.epoch_loss_averagers.items(): 372 | epoch_avg_loss = averager.value 373 | averager.reset() 374 | self.ctx.channel_send('{} epoch {} loss'.format(self.model_name, name), x=self.epoch_id, y=epoch_avg_loss) 375 | 376 | self.model.eval() 377 | val_loss = self.get_validation_loss() 378 | self.model.train() 379 | for name, loss in val_loss.items(): 380 | loss = loss.data.cpu().numpy()[0] 381 | self.ctx.channel_send('{} epoch_val {} loss'.format(self.model_name, name), x=self.epoch_id, y=loss) 382 | 383 | def _send_image_channels(self): 384 | self.model.eval() 385 | image_triplets = self._get_image_triplets() 386 | if self.image_nr is not None: 387 | image_triplets = image_triplets[:self.image_nr] 388 | self.model.train() 389 | 390 | for i, (raw, pred, truth) in enumerate(image_triplets): 391 | h, w, _ = raw.shape 392 | image_glued = np.zeros((h, 3 * w + 20, 3)) 393 | image_glued[:, :w, :] = raw 394 | image_glued[:, (w + 10):(2 * w + 10), :] = pred 395 | image_glued[:, (2 * w + 20):, :] = truth 396 | 397 | pill_image = Image.fromarray((image_glued * 255.).astype(np.uint8)) 398 | h_, w_, _ = image_glued.shape 399 | pill_image = pill_image.resize((int(self.image_resize * w_), int(self.image_resize * h_)), 400 | Image.ANTIALIAS) 401 | 402 | self.ctx.channel_send('{} predictions'.format(self.model_name), neptune.Image( 403 | name='epoch{}_batch{}_idx{}'.format(self.epoch_id, self.batch_id, i), 404 | description="image, prediction, ground truth", 405 | data=pill_image)) 406 | 407 | def _get_image_triplets(self): 408 | image_triplets = [] 409 | batch_gen, steps = self.validation_datagen 410 | for batch_id, data in enumerate(batch_gen): 411 | predictions, targets_tensors = self._get_predictions_targets(data) 412 | 413 | raw_images = data[0].numpy() 414 | ground_truth_masks = targets_tensors[0].cpu().numpy() 415 | 416 | for image, prediction, target in zip(raw_images, predictions, ground_truth_masks): 417 | raw = denormalize(image).transpose(1, 2, 0) 418 | pred = np.tile(prediction[1, :, :], (3, 1, 1)).transpose(1, 2, 0) 419 | truth = np.tile(target[1, :, :], (3, 1, 1)).transpose(1, 2, 0) 420 | image_triplets.append((raw, pred, truth)) 421 | break 422 | return image_triplets 423 | 424 | def _get_predictions_targets(self, data): 425 | X = data[0] 426 | targets_tensors = data[1:] 427 | 428 | if torch.cuda.is_available(): 429 | X = Variable(X, volatile=True).cuda() 430 | else: 431 | X = Variable(X, volatile=True) 432 | 433 | predictions = sigmoid(self.model(X).data.cpu().numpy()) 434 | return predictions, targets_tensors 435 | 436 | 437 | class SNS_ValidationMonitor(Callback): 438 | def __init__(self): 439 | super().__init__() 440 | self.ctx = neptune.Context() 441 | self.best_loss = None 442 | 443 | def set_params(self, transformer, validation_datagen, *args, **kwargs): 444 | self.transformer = transformer 445 | self.validation_datagen = validation_datagen 446 | self.model = transformer.model 447 | self.loss_function = transformer.loss_function 448 | 449 | def on_batch_end(self, metrics, *args, **kwargs): 450 | for name, loss in metrics.items(): 451 | loss = loss.data.cpu().numpy()[0] 452 | self.ctx.channel_send(name, loss) 453 | 454 | def on_epoch_end(self, *args, **kwargs): 455 | self.model.eval() 456 | self.epoch_id += 1 457 | self.validation_loss = self.calculate_epoch_end_metrics() 458 | epoch_end_loss = self.validation_loss['sum'] 459 | epoch_end_acc = self.validation_loss['acc'] 460 | 461 | self.transformer.validation_loss[self.epoch_id] = {'acc': V(torch.Tensor([epoch_end_acc])), 462 | 'sum': V(torch.Tensor([float(epoch_end_loss)]))} 463 | logger.info('epoch {0} ship no ship epoch end validation loss: {1}'.format(self.epoch_id, epoch_end_loss)) 464 | logger.info('epoch {0} ship no ship epoch end accuracy: {1}'.format(self.epoch_id, epoch_end_acc)) 465 | self.ctx.channel_send("ship_no_ship_epoch_end_acc", epoch_end_acc) 466 | self.ctx.channel_send("ship_no_ship_epoch_end_loss", epoch_end_loss) 467 | 468 | def calculate_epoch_end_metrics(self): 469 | self.model.eval() 470 | batch_gen, steps = self.validation_datagen 471 | sum_loss = 0 472 | Ys = [] 473 | Ypreds = [] 474 | for batch in batch_gen: 475 | X, y = batch 476 | X, y = V(X, volatile=True), V(y, volatile=True) 477 | if torch.cuda.is_available(): 478 | X, y = X.cuda(), y.cuda() 479 | 480 | y_pred = self.model(X) 481 | loss_val = self.loss_function[0][1](y_pred, y.long()) 482 | y_pred = y_pred.max(1)[1] 483 | y_pred = y_pred.data.cpu().numpy().astype(int).ravel() 484 | y = y.data.cpu().numpy() 485 | Ys.append(y) 486 | Ypreds.append(y_pred) 487 | sum_loss += loss_val.data.cpu().numpy() 488 | 489 | Ys = np.concatenate(Ys) 490 | Ypreds = (np.concatenate(Ypreds) > 0.5).astype(int) 491 | matches = sum(Ys == Ypreds) 492 | acc = matches / float(len(Ys)) 493 | 494 | self.model.train() 495 | 496 | return {'sum': sum_loss / steps, "acc": acc} 497 | 498 | def get_validation_loss(self): 499 | if not self.transformer.validation_loss: 500 | self.transformer.validation_loss = {} 501 | return self.transformer.validation_loss[self.epoch_id] 502 | 503 | 504 | class ValidationMonitor(Callback): 505 | def __init__(self, data_dir, loader_mode, epoch_every=None, batch_every=None): 506 | super().__init__() 507 | if epoch_every == 0: 508 | self.epoch_every = False 509 | else: 510 | self.epoch_every = epoch_every 511 | if batch_every == 0: 512 | self.batch_every = False 513 | else: 514 | self.batch_every = batch_every 515 | 516 | self.data_dir = data_dir 517 | self.validation_pipeline = postprocessing_pipeline_simplified 518 | self.loader_mode = loader_mode 519 | self.meta_valid = None 520 | self.y_true = None 521 | self.activation_func = None 522 | 523 | def set_params(self, transformer, validation_datagen, meta_valid=None, *args, **kwargs): 524 | self.model = transformer.model 525 | self.optimizer = transformer.optimizer 526 | self.loss_function = transformer.loss_function 527 | self.output_names = transformer.output_names 528 | self.validation_datagen = validation_datagen 529 | self.meta_valid = meta_valid 530 | self.y_true = read_masks(self.meta_valid[Y_COLUMN].values) 531 | self.activation_func = transformer.activation_func 532 | self.transformer = transformer 533 | 534 | def get_validation_loss(self): 535 | return self._get_validation_loss() 536 | 537 | def on_epoch_end(self, *args, **kwargs): 538 | if self.epoch_every and ((self.epoch_id % self.epoch_every) == 0): 539 | self.model.eval() 540 | val_loss = self.get_validation_loss() 541 | self.model.train() 542 | for name, loss in val_loss.items(): 543 | loss = loss.data.cpu().numpy()[0] 544 | logger.info('epoch {0} validation {1}: {2:.5f}'.format(self.epoch_id, name, loss)) 545 | self.epoch_id += 1 546 | 547 | def _get_validation_loss(self): 548 | output, epoch_loss = self._transform() 549 | 550 | logger.info('Calculating F2 Score') 551 | y_pred = self._generate_prediction(output) 552 | f2_score = intersection_over_union_thresholds(self.y_true, y_pred) 553 | logger.info('F2 score on validation is {}'.format(f2_score)) 554 | 555 | if not self.transformer.validation_loss: 556 | self.transformer.validation_loss = {} 557 | self.transformer.validation_loss.setdefault(self.epoch_id, {'sum': epoch_loss, 558 | 'f2': Variable(torch.Tensor([f2_score])), 559 | }) 560 | return self.transformer.validation_loss[self.epoch_id] 561 | 562 | def _transform(self): 563 | self.model.eval() 564 | batch_gen, steps = self.validation_datagen 565 | partial_batch_losses = [] 566 | outputs = {} 567 | for batch_id, data in enumerate(batch_gen): 568 | targets_var, outputs_batch = self._get_targets_and_output(data) 569 | 570 | if len(self.output_names) == 1: 571 | for (name, loss_function_one, weight), target in zip(self.loss_function, targets_var): 572 | loss_sum = loss_function_one(outputs_batch, target) * weight 573 | outputs.setdefault(self.output_names[0], []).append(outputs_batch.data.cpu().numpy()) 574 | else: 575 | batch_losses = [] 576 | for (name, loss_function_one, weight), output, target in zip(self.loss_function, outputs_batch, 577 | targets_var): 578 | loss = loss_function_one(output, target) * weight 579 | batch_losses.append(loss) 580 | partial_batch_losses.setdefault(name, []).append(loss) 581 | output_ = output.data.cpu().numpy() 582 | outputs.setdefault(name, []).append(output_) 583 | loss_sum = sum(batch_losses) 584 | partial_batch_losses.append(loss_sum) 585 | if batch_id == steps: 586 | break 587 | self.model.train() 588 | average_losses = sum(partial_batch_losses) / steps 589 | outputs = {'{}_prediction'.format(name): get_list_of_image_predictions(outputs_) for name, outputs_ in 590 | outputs.items()} 591 | for name, prediction in outputs.items(): 592 | if self.activation_func == 'softmax': 593 | outputs[name] = [softmax(single_prediction, axis=0) for single_prediction in prediction] 594 | elif self.activation_func == 'sigmoid': 595 | outputs[name] = [sigmoid(np.squeeze(mask)) for mask in prediction] 596 | else: 597 | raise Exception('Only softmax and sigmoid activations are allowed') 598 | 599 | return outputs, average_losses 600 | 601 | def _get_targets_and_output(self, data): 602 | X = data[0] 603 | targets_tensors = data[1:] 604 | 605 | if torch.cuda.is_available(): 606 | X = Variable(X, volatile=True).cuda() 607 | targets_var = [] 608 | for target_tensor in targets_tensors: 609 | targets_var.append(Variable(target_tensor, volatile=True).cuda()) 610 | else: 611 | X = Variable(X, volatile=True) 612 | targets_var = [] 613 | for target_tensor in targets_tensors: 614 | targets_var.append(Variable(target_tensor, volatile=True)) 615 | outputs_batch = self.model(X) 616 | 617 | return targets_var, outputs_batch 618 | 619 | def _generate_prediction(self, outputs): 620 | data = {'callback_input': {'meta': self.meta_valid, 621 | 'meta_valid': None, 622 | }, 623 | 'network_output': {**outputs} 624 | } 625 | with TemporaryDirectory() as cache_dirpath: 626 | pipeline = self.validation_pipeline(cache_dirpath, self.loader_mode) 627 | output = pipeline.transform(data) 628 | y_pred = output['labeled_images'] 629 | return y_pred 630 | 631 | 632 | class ModelCheckpoint(Callback): 633 | def __init__(self, filepath, metric_name='sum', epoch_every=1, minimize=True): 634 | self.filepath = filepath 635 | self.minimize = minimize 636 | self.best_score = None 637 | 638 | if epoch_every == 0: 639 | self.epoch_every = False 640 | else: 641 | self.epoch_every = epoch_every 642 | 643 | self.metric_name = metric_name 644 | 645 | def on_train_begin(self, *args, **kwargs): 646 | self.epoch_id = 0 647 | self.batch_id = 0 648 | os.makedirs(os.path.dirname(self.filepath), exist_ok=True) 649 | 650 | def on_epoch_end(self, *args, **kwargs): 651 | if self.epoch_every and ((self.epoch_id % self.epoch_every) == 0): 652 | self.model.eval() 653 | val_loss = self.get_validation_loss() 654 | loss_sum = val_loss[self.metric_name] 655 | loss_sum = loss_sum.data.cpu().numpy()[0] 656 | 657 | self.model.train() 658 | 659 | if self.best_score is None: 660 | self.best_score = loss_sum 661 | 662 | if (self.minimize and loss_sum < self.best_score) or (not self.minimize and loss_sum > self.best_score) or ( 663 | self.epoch_id == 0): 664 | self.best_score = loss_sum 665 | persist_torch_model(self.model, self.filepath) 666 | logger.info('epoch {0} model saved to {1}'.format(self.epoch_id, self.filepath)) 667 | 668 | self.epoch_id += 1 669 | 670 | 671 | class OneCycleCallback(Callback): 672 | def __init__(self, number_of_batches_per_full_cycle, max_lr, enabled=1, momentum_range=(0.95, 0.8), 673 | prcnt_annihilate=10, 674 | div=10): 675 | super().__init__() 676 | 677 | self.enabled = enabled 678 | self.number_of_batches_per_full_cycle = number_of_batches_per_full_cycle 679 | self.max_lr = max_lr 680 | self.momentum_range = momentum_range 681 | self.prcnt_annihilate = prcnt_annihilate 682 | self.div = div 683 | self.ctx = neptune.Context() 684 | 685 | def set_params(self, transformer, validation_datagen, *args, **kwargs): 686 | super().set_params(transformer, validation_datagen) 687 | self.optimizer = transformer.optimizer 688 | self.onecycle = OneCycle(self.number_of_batches_per_full_cycle, 689 | max_lr=self.max_lr, 690 | optimizer=self.optimizer, 691 | prcnt=self.prcnt_annihilate, 692 | div=self.div 693 | ) 694 | 695 | def on_batch_end(self, *args, **kwargs): 696 | if self.enabled: 697 | lr, mom = self.onecycle.batch_step() 698 | self.ctx.channel_send("lr", lr) 699 | self.ctx.channel_send("momentum", mom) 700 | 701 | 702 | class EarlyStopping(Callback): 703 | def __init__(self, metric_name='sum', patience=1000, minimize=True): 704 | self.patience = patience 705 | self.minimize = minimize 706 | self.best_score = None 707 | self.epoch_since_best = 0 708 | self._training_break = False 709 | self.metric_name = metric_name 710 | 711 | def training_break(self, *args, **kwargs): 712 | return self._training_break 713 | 714 | def on_epoch_end(self, *args, **kwargs): 715 | self.model.eval() 716 | val_loss = self.get_validation_loss() 717 | loss_sum = val_loss[self.metric_name] 718 | loss_sum = loss_sum.data.cpu().numpy()[0] 719 | 720 | self.model.train() 721 | 722 | if not self.best_score: 723 | self.best_score = loss_sum 724 | 725 | if (self.minimize and loss_sum < self.best_score) or (not self.minimize and loss_sum > self.best_score): 726 | self.best_score = loss_sum 727 | self.epoch_since_best = 0 728 | else: 729 | self.epoch_since_best += 1 730 | 731 | if self.epoch_since_best > self.patience: 732 | self._training_break = True 733 | self.epoch_id += 1 734 | 735 | 736 | def denormalize(x): 737 | mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) 738 | std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) 739 | return x * std + mean 740 | 741 | 742 | def postprocessing_pipeline_simplified(cache_dirpath, loader_mode): 743 | if loader_mode == 'resize': 744 | size_adjustment_function = partial(resize_image, target_size=ORIGINAL_SIZE) 745 | else: 746 | raise NotImplementedError 747 | 748 | mask_resize = Step(name='mask_resize', 749 | transformer=make_apply_transformer(size_adjustment_function, 750 | output_name='resized_images', 751 | apply_on=['images'], 752 | n_threads=NUM_THREADS), 753 | input_data=['network_output'], 754 | adapter=Adapter({'images': E('network_output', 'mask_prediction'), 755 | })) 756 | 757 | binarizer = Step(name='binarizer', 758 | transformer=make_apply_transformer( 759 | partial(binarize, threshold=THRESHOLD), 760 | output_name='binarized_images', 761 | apply_on=['images'], 762 | n_threads=NUM_THREADS), 763 | input_steps=[mask_resize], 764 | adapter=Adapter({'images': E(mask_resize.name, 'resized_images'), 765 | })) 766 | 767 | labeler = Step(name='labeler', 768 | transformer=make_apply_transformer( 769 | label, 770 | output_name='labeled_images', 771 | apply_on=['images'], 772 | n_threads=NUM_THREADS), 773 | input_steps=[binarizer], 774 | adapter=Adapter({'images': E(binarizer.name, 'binarized_images'), 775 | })) 776 | 777 | labeler.set_mode_inference() 778 | labeler.set_parameters_upstream({'experiment_directory': cache_dirpath, 779 | 'is_fittable': False 780 | }) 781 | return labeler 782 | -------------------------------------------------------------------------------- /common_blocks/loaders.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torchvision.transforms as transforms 4 | from PIL import Image 5 | from attrdict import AttrDict 6 | from sklearn.externals import joblib 7 | from torch.utils.data import Dataset, DataLoader 8 | from torch.utils.data.sampler import Sampler 9 | from imgaug import augmenters as iaa 10 | from functools import partial 11 | from itertools import product 12 | import multiprocessing as mp 13 | from scipy.stats import gmean 14 | import json 15 | from steppy.base import BaseTransformer 16 | from toolkit.utils import from_pil, to_pil, ImgAug, reseed 17 | 18 | from .utils.masks import coco_binary_from_rle as binary_from_rle 19 | 20 | 21 | class MetaReader(BaseTransformer): 22 | def __init__(self, train_mode, x_columns, y_columns): 23 | self.train_mode = train_mode 24 | super().__init__() 25 | if len(x_columns) == 1: 26 | self.x_columns = x_columns[0] 27 | else: 28 | self.x_columns = x_columns 29 | 30 | if len(y_columns) == 1: 31 | self.y_columns = y_columns[0] 32 | else: 33 | self.y_columns = y_columns 34 | self.columns_to_get = None 35 | self.target_columns = None 36 | 37 | def transform(self, meta): 38 | X = meta[self.x_columns].values 39 | if self.train_mode: 40 | y = meta[self.y_columns].values 41 | else: 42 | y = None 43 | 44 | return {'X': X, 45 | 'y': y} 46 | 47 | 48 | class BalancedSubsetSampler(Sampler): 49 | def __init__(self, data_source, data_size, sample_size, empty_fraction, shuffle): 50 | super().__init__(data_source) 51 | 52 | self.data_source_with_ships = np.where(data_source == 1)[0] 53 | self.data_source_empty = np.where(data_source == 0)[0] 54 | self.data_size = data_size 55 | self.sample_size = sample_size 56 | self.shuffle = shuffle 57 | self.empty_fraction = empty_fraction 58 | self._check_sizes() 59 | 60 | def __iter__(self): 61 | return iter(self._get_indices(self._get_sample())) 62 | 63 | def __len__(self): 64 | return self.sample_size 65 | 66 | def _get_indices(self, sample): 67 | if self.shuffle: 68 | np.random.shuffle(sample) 69 | return sample 70 | 71 | def _get_sample(self): 72 | empty_count = int(self.empty_fraction * self.sample_size) 73 | full_count = self.sample_size - empty_count 74 | sample_empty = np.random.choice(self.data_source_empty, empty_count) 75 | sample_with_ships = np.random.choice(self.data_source_with_ships, full_count) 76 | return np.concatenate([sample_empty, sample_with_ships]) 77 | 78 | def _check_sizes(self): 79 | if self.sample_size > self.data_size: 80 | self.sample_size = 0.5 * self.data_size 81 | raise Warning('Sample size is bigger than data size. Using sample size = 1/2 data size') 82 | 83 | 84 | class ImageSegmentationDataset(Dataset): 85 | def __init__(self, X, y, train_mode, 86 | image_transform, image_augment_with_target, 87 | mask_transform, image_augment, 88 | image_source='memory'): 89 | super().__init__() 90 | self.X = X 91 | if y is not None: 92 | self.y = y 93 | else: 94 | self.y = None 95 | 96 | self.train_mode = train_mode 97 | self.image_transform = image_transform 98 | self.mask_transform = mask_transform 99 | self.image_augment = image_augment if image_augment is not None else ImgAug(iaa.Noop()) 100 | self.image_augment_with_target = image_augment_with_target if image_augment_with_target is not None else ImgAug( 101 | iaa.Noop()) 102 | 103 | self.image_source = image_source 104 | 105 | def __len__(self): 106 | if self.image_source == 'memory': 107 | return len(self.X[0]) 108 | elif self.image_source == 'disk': 109 | return self.X.shape[0] 110 | 111 | def __getitem__(self, index): 112 | if self.image_source == 'memory': 113 | load_func = self.load_from_memory 114 | elif self.image_source == 'disk': 115 | load_func = self.load_from_disk 116 | else: 117 | raise NotImplementedError("Possible loading options: 'memory' and 'disk'!") 118 | 119 | Xi = load_func(self.X, index, filetype='png', grayscale=False) 120 | 121 | if self.y is not None: 122 | Mi = self.load_target(self.y, index, load_func) 123 | Xi, *Mi = from_pil(Xi, *Mi) 124 | Xi, *Mi = self.image_augment_with_target(Xi, *Mi) 125 | Xi = self.image_augment(Xi) 126 | Xi, *Mi = to_pil(Xi, *Mi) 127 | 128 | if self.mask_transform is not None: 129 | Mi = [self.mask_transform(m) for m in Mi] 130 | 131 | if self.image_transform is not None: 132 | Xi = self.image_transform(Xi) 133 | 134 | Mi = torch.cat(Mi, dim=0) 135 | return Xi, Mi 136 | else: 137 | Xi = from_pil(Xi) 138 | Xi = self.image_augment(Xi) 139 | Xi = to_pil(Xi) 140 | 141 | if self.image_transform is not None: 142 | Xi = self.image_transform(Xi) 143 | return Xi 144 | 145 | def load_from_memory(self, data_source, index, **kwargs): 146 | return data_source[0][index] 147 | 148 | def load_from_disk(self, data_source, index, *, filetype, grayscale=False): 149 | if filetype == 'png': 150 | img_filepath = data_source[index] 151 | return self.load_image(img_filepath, grayscale=grayscale) 152 | elif filetype == 'json': 153 | json_filepath = data_source[index] 154 | return self.read_json(json_filepath) 155 | elif filetype == 'joblib': 156 | img_filepath = data_source[index] 157 | return self.load_joblib(img_filepath) 158 | else: 159 | raise Exception('files must be png or json or joblib') 160 | 161 | def load_image(self, img_filepath, grayscale): 162 | image = Image.open(img_filepath, 'r') 163 | if not grayscale: 164 | image = image.convert('RGB') 165 | else: 166 | image = image.convert('L').point(lambda x: 0 if x < 128 else 1) 167 | return image 168 | 169 | def load_joblib(self, img_filepath): 170 | target = joblib.load(img_filepath) 171 | if isinstance(target, tuple): 172 | target = np.zeros(target, np.uint8) 173 | return target 174 | 175 | def read_json(self, path): 176 | with open(path, 'r') as file: 177 | data = json.load(file) 178 | masks = [to_pil(binary_from_rle(rle)) for rle in data] 179 | return masks 180 | 181 | def load_target(self, data_source, index, load_func): 182 | raise NotImplementedError 183 | 184 | 185 | class ImageSegmentationJsonDataset(ImageSegmentationDataset): 186 | def load_target(self, data_source, index, load_func): 187 | Mi = load_func(data_source, index, filetype='json') 188 | return Mi 189 | 190 | 191 | class ImageSegmentationPngDataset(ImageSegmentationDataset): 192 | def load_target(self, data_source, index, load_func): 193 | Mi = load_func(data_source, index, filetype='png', grayscale=True) 194 | Mi = from_pil(Mi) 195 | target = [to_pil(Mi == class_nr) for class_nr in [0, 1]] 196 | return target 197 | 198 | 199 | class ImageSegmentationJoblibDataset(ImageSegmentationDataset): 200 | def load_target(self, data_source, index, load_func): 201 | Mi = load_func(data_source, index, filetype='joblib') 202 | target = [(Mi == class_nr).astype(np.uint8) for class_nr in [0, 1]] 203 | return target 204 | 205 | 206 | class ImageSegmentationTTADataset(ImageSegmentationDataset): 207 | def __init__(self, tta_params, tta_transform, *args, **kwargs): 208 | super().__init__(*args, **kwargs) 209 | self.tta_params = tta_params 210 | self.tta_transform = tta_transform 211 | 212 | def __getitem__(self, index): 213 | if self.image_source == 'memory': 214 | load_func = self.load_from_memory 215 | elif self.image_source == 'disk': 216 | load_func = self.load_from_disk 217 | else: 218 | raise NotImplementedError("Possible loading options: 'memory' and 'disk'!") 219 | 220 | Xi = load_func(self.X, index, filetype='png', grayscale=False) 221 | Xi = from_pil(Xi) 222 | 223 | if self.image_augment is not None: 224 | Xi = self.image_augment(Xi) 225 | 226 | if self.tta_params is not None: 227 | tta_transform_specs = self.tta_params[index] 228 | Xi = self.tta_transform(Xi, tta_transform_specs) 229 | Xi = to_pil(Xi) 230 | 231 | if self.image_transform is not None: 232 | Xi = self.image_transform(Xi) 233 | 234 | return Xi 235 | 236 | 237 | class ImageSegmentationLoader(BaseTransformer): 238 | def __init__(self, train_mode, loader_params, dataset_params, augmentation_params): 239 | super().__init__() 240 | self.train_mode = train_mode 241 | self.loader_params = AttrDict(loader_params) 242 | self.dataset_params = AttrDict(dataset_params) 243 | self.augmentation_params = AttrDict(augmentation_params) 244 | 245 | self.mask_transform = None 246 | self.image_transform = None 247 | 248 | self.image_augment_train = None 249 | self.image_augment_inference = None 250 | self.image_augment_with_target_train = None 251 | self.image_augment_with_target_inference = None 252 | 253 | self.dataset = None 254 | 255 | def transform(self, X, y, X_valid=None, y_valid=None, **kwargs): 256 | if self.train_mode and y is not None: 257 | flow, steps = self.get_datagen(X, y, True, self.loader_params.training) 258 | else: 259 | flow, steps = self.get_datagen(X, None, False, self.loader_params.inference) 260 | 261 | if X_valid is not None and y_valid is not None: 262 | valid_flow, valid_steps = self.get_datagen(X_valid, y_valid, False, self.loader_params.inference) 263 | else: 264 | valid_flow = None 265 | valid_steps = None 266 | 267 | return {'datagen': (flow, steps), 268 | 'validation_datagen': (valid_flow, valid_steps)} 269 | 270 | def get_datagen(self, X, y, train_mode, loader_params): 271 | if train_mode: 272 | dataset = self.dataset(X, y[:, 0], 273 | train_mode=True, 274 | image_augment=self.image_augment_train, 275 | image_augment_with_target=self.image_augment_with_target_train, 276 | mask_transform=self.mask_transform, 277 | image_transform=self.image_transform, 278 | image_source=self.dataset_params.image_source) 279 | sampler = BalancedSubsetSampler(data_source=y[:, 1], 280 | data_size=len(y), 281 | sample_size=self.dataset_params.sample_size, 282 | empty_fraction=self.dataset_params.empty_fraction, 283 | shuffle=True) 284 | datagen = DataLoader(dataset, **loader_params, sampler=sampler) 285 | else: 286 | if y is not None: 287 | y = y[:, 0] 288 | dataset = self.dataset(X, y, 289 | train_mode=False, 290 | image_augment=self.image_augment_inference, 291 | image_augment_with_target=self.image_augment_with_target_inference, 292 | mask_transform=self.mask_transform, 293 | image_transform=self.image_transform, 294 | image_source=self.dataset_params.image_source) 295 | datagen = DataLoader(dataset, **loader_params) 296 | 297 | steps = len(datagen) 298 | return datagen, steps 299 | 300 | 301 | class ImageSegmentationLoaderTTA(ImageSegmentationLoader): 302 | def __init__(self, loader_params, dataset_params, augmentation_params): 303 | self.loader_params = AttrDict(loader_params) 304 | self.dataset_params = AttrDict(dataset_params) 305 | self.augmentation_params = AttrDict(augmentation_params) 306 | 307 | self.mask_transform = None 308 | self.image_transform = None 309 | 310 | self.image_augment_train = None 311 | self.image_augment_inference = None 312 | self.image_augment_with_target_train = None 313 | self.image_augment_with_target_inference = None 314 | 315 | self.dataset = None 316 | 317 | def transform(self, X, tta_params, **kwargs): 318 | flow, steps = self.get_datagen(X, tta_params, self.loader_params.inference) 319 | valid_flow = None 320 | valid_steps = None 321 | return {'datagen': (flow, steps), 322 | 'validation_datagen': (valid_flow, valid_steps)} 323 | 324 | def get_datagen(self, X, tta_params, loader_params): 325 | dataset = self.dataset(tta_params=tta_params, 326 | tta_transform=self.augmentation_params.tta_transform, 327 | X=X, 328 | y=None, 329 | train_mode=False, 330 | image_augment=self.image_augment_inference, 331 | image_augment_with_target=self.image_augment_with_target_inference, 332 | mask_transform=self.mask_transform, 333 | image_transform=self.image_transform, 334 | image_source=self.dataset_params.image_source) 335 | 336 | datagen = DataLoader(dataset, **loader_params) 337 | steps = len(datagen) 338 | return datagen, steps 339 | 340 | 341 | class ImageSegmentationLoaderResize(ImageSegmentationLoader): 342 | def __init__(self, train_mode, loader_params, dataset_params, augmentation_params): 343 | super().__init__(train_mode, loader_params, dataset_params, augmentation_params) 344 | 345 | self.image_transform = transforms.Compose([transforms.ToTensor(), 346 | transforms.Normalize(mean=self.dataset_params.MEAN, 347 | std=self.dataset_params.STD), 348 | ]) 349 | self.mask_transform = transforms.Compose([transforms.Lambda(preprocess_target), 350 | ]) 351 | 352 | self.image_augment_train = ImgAug(self.augmentation_params['image_augment_train']) 353 | self.image_augment_with_target_train = ImgAug(self.augmentation_params['image_augment_with_target_train']) 354 | self.image_augment_inference = ImgAug(self.augmentation_params['image_augment_inference']) 355 | self.image_augment_with_target_inference = ImgAug( 356 | self.augmentation_params['image_augment_with_target_inference']) 357 | 358 | if self.dataset_params.target_format == 'png': 359 | self.dataset = ImageSegmentationPngDataset 360 | elif self.dataset_params.target_format == 'json': 361 | self.dataset = ImageSegmentationJsonDataset 362 | elif self.dataset_params.target_format == 'joblib': 363 | self.dataset = ImageSegmentationJoblibDataset 364 | else: 365 | raise Exception('files must be png or json') 366 | 367 | 368 | class ImageSegmentationLoaderResizeTTA(ImageSegmentationLoaderTTA): 369 | def __init__(self, loader_params, dataset_params, augmentation_params): 370 | super().__init__(loader_params, dataset_params, augmentation_params) 371 | 372 | self.image_transform = transforms.Compose([transforms.ToTensor(), 373 | transforms.Normalize(mean=self.dataset_params.MEAN, 374 | std=self.dataset_params.STD), 375 | ]) 376 | 377 | self.image_augment_inference = ImgAug(self.augmentation_params['image_augment_inference']) 378 | self.image_augment_with_target_inference = ImgAug( 379 | self.augmentation_params['image_augment_with_target_inference']) 380 | 381 | self.dataset = ImageSegmentationTTADataset 382 | 383 | 384 | class MetaTestTimeAugmentationGenerator(BaseTransformer): 385 | def __init__(self, **kwargs): 386 | self.tta_transformations = AttrDict(kwargs) 387 | 388 | def transform(self, X, **kwargs): 389 | X_tta_rows, tta_params, img_ids = [], [], [] 390 | for i in range(len(X)): 391 | rows, params, ids = self._get_tta_data(i, X[i]) 392 | tta_params.extend(params) 393 | img_ids.extend(ids) 394 | X_tta_rows.extend(rows) 395 | X_tta = np.array(X_tta_rows) 396 | return {'X_tta': X_tta, 'tta_params': tta_params, 'img_ids': img_ids} 397 | 398 | def _get_tta_data(self, i, row): 399 | original_specs = {'ud_flip': False, 'lr_flip': False, 'rotation': 0, 'color_shift': False} 400 | tta_specs = [original_specs] 401 | 402 | ud_options = [True, False] if self.tta_transformations.flip_ud else [False] 403 | lr_options = [True, False] if self.tta_transformations.flip_lr else [False] 404 | rot_options = [0, 90, 180, 270] if self.tta_transformations.rotation else [0] 405 | if self.tta_transformations.color_shift_runs: 406 | color_shift_options = list(range(1, self.tta_transformations.color_shift_runs + 1, 1)) 407 | else: 408 | color_shift_options = [False] 409 | 410 | for ud, lr, rot, color in product(ud_options, lr_options, rot_options, color_shift_options): 411 | if ud is False and lr is False and rot == 0 and color is False: 412 | continue 413 | else: 414 | tta_specs.append({'ud_flip': ud, 'lr_flip': lr, 'rotation': rot, 'color_shift': color}) 415 | 416 | img_ids = [i] * len(tta_specs) 417 | X_rows = [row] * len(tta_specs) 418 | return X_rows, tta_specs, img_ids 419 | 420 | 421 | class TestTimeAugmentationGenerator(BaseTransformer): 422 | def __init__(self, **kwargs): 423 | self.tta_transformations = AttrDict(kwargs) 424 | 425 | def transform(self, X, **kwargs): 426 | X_tta, tta_params, img_ids = [], [], [] 427 | X = X[0] 428 | for i in range(len(X)): 429 | images, params, ids = self._get_tta_data(i, X[i]) 430 | tta_params.extend(params) 431 | img_ids.extend(ids) 432 | X_tta.extend(images) 433 | return {'X_tta': [X_tta], 'tta_params': tta_params, 'img_ids': img_ids} 434 | 435 | def _get_tta_data(self, i, row): 436 | original_specs = {'ud_flip': False, 'lr_flip': False, 'rotation': 0, 'color_shift': False} 437 | tta_specs = [original_specs] 438 | 439 | ud_options = [True, False] if self.tta_transformations.flip_ud else [False] 440 | lr_options = [True, False] if self.tta_transformations.flip_lr else [False] 441 | rot_options = [0, 90, 180, 270] if self.tta_transformations.rotation else [0] 442 | if self.tta_transformations.color_shift_runs: 443 | color_shift_options = list(range(1, self.tta_transformations.color_shift_runs + 1, 1)) 444 | else: 445 | color_shift_options = [False] 446 | 447 | for ud, lr, rot, color in product(ud_options, lr_options, rot_options, color_shift_options): 448 | if ud is False and lr is False and rot == 0 and color is False: 449 | continue 450 | else: 451 | tta_specs.append({'ud_flip': ud, 'lr_flip': lr, 'rotation': rot, 'color_shift': color}) 452 | 453 | img_ids = [i] * len(tta_specs) 454 | X_rows = [row] * len(tta_specs) 455 | return X_rows, tta_specs, img_ids 456 | 457 | 458 | class TestTimeAugmentationAggregator(BaseTransformer): 459 | def __init__(self, tta_inverse_transform, method, nthreads): 460 | self.tta_inverse_transform = tta_inverse_transform 461 | self.method = method 462 | self.nthreads = nthreads 463 | 464 | @property 465 | def agg_method(self): 466 | methods = {'mean': np.mean, 467 | 'max': np.max, 468 | 'min': np.min, 469 | 'gmean': gmean 470 | } 471 | return partial(methods[self.method], axis=-1) 472 | 473 | def transform(self, images, tta_params, img_ids, **kwargs): 474 | _aggregate_augmentations = partial(aggregate_augmentations, 475 | images=images, 476 | tta_params=tta_params, 477 | tta_inverse_transform=self.tta_inverse_transform, 478 | img_ids=img_ids, 479 | agg_method=self.agg_method) 480 | unique_img_ids = set(img_ids) 481 | threads = min(self.nthreads, len(unique_img_ids)) 482 | with mp.pool.ThreadPool(threads) as executor: 483 | averages_images = executor.map(_aggregate_augmentations, unique_img_ids) 484 | return {'aggregated_prediction': averages_images} 485 | 486 | 487 | class OneClassImageClassificationDataset(Dataset): 488 | def __init__(self, 489 | X, 490 | y, 491 | image_transform=None, 492 | fixed_resize=300, 493 | path_column='file_path_image', 494 | target_column='is_not_empty', 495 | train_mode=True, 496 | image_augment=None): 497 | super().__init__() 498 | 499 | self.X = X 500 | if y is not None: 501 | self.y = y 502 | else: 503 | self.y = None 504 | self.image_transform = image_transform 505 | self.image_augment = image_augment 506 | self.train_mode = train_mode 507 | self.fixed_resize = fixed_resize 508 | self.path_column = path_column 509 | self.target_column = target_column 510 | 511 | def __len__(self): 512 | return len(self.X) 513 | 514 | def __getitem__(self, index): 515 | try: 516 | Xi = self.load_from_disk(index) 517 | except Exception as e: 518 | print(e) 519 | print("Failed loading image {}".format(index)) 520 | index = 0 521 | Xi = self.load_from_disk(index) 522 | 523 | if self.fixed_resize: 524 | Xi = transforms.Resize((self.fixed_resize, self.fixed_resize))(Xi) 525 | if self.train_mode or self.y is not None: 526 | yi = self.load_target(index) 527 | if self.image_augment is not None: 528 | Xi = self.augment(self.image_augment, Xi) 529 | return Xi, yi 530 | else: 531 | return Xi 532 | 533 | def augment(self, augmenter, image): 534 | augmenter = augmenter.to_deterministic() 535 | img_aug = augmenter.augment_image(np.array(image)) 536 | img_aug = Image.fromarray(img_aug) 537 | return img_aug 538 | 539 | def load_from_disk(self, index): 540 | image_path = self.X[index] 541 | return self.load_image(image_path) 542 | 543 | def load_target(self, index): 544 | label = self.y[index] 545 | 546 | return label 547 | 548 | def load_image(self, img_filepath, grayscale=False): 549 | image = Image.open(img_filepath, 'r') 550 | if not grayscale: 551 | image = image.convert('RGB') 552 | else: 553 | image = image.convert('L') 554 | return image 555 | 556 | def align_images(self, images): 557 | max_h, max_w = 0, 0 558 | min_h, min_w = 1e10, 1e10 559 | for image in images: 560 | w, h = image.size 561 | max_h, max_w = max(h, max_h), max(w, max_w) 562 | min_h, min_w = min(h, min_h), min(w, min_w) 563 | resize = transforms.Resize((max_h, max_w)) 564 | allinged_images = [] 565 | for image in images: 566 | allinged_images.append(resize(image)) 567 | 568 | return allinged_images 569 | 570 | def collate_fn(self, batch): 571 | """Encode targets. 572 | Args: 573 | batch: (list) of images, bbox_targets, clf_targets. 574 | Returns: 575 | images, stacked bbox_targets, stacked clf_targets. 576 | """ 577 | 578 | if self.train_mode or self.y is not None: 579 | imgs = [x[0] for x in batch] 580 | labels = [int(x[1]) for x in batch] 581 | imgs = [self.image_transform(img) for img in imgs] 582 | return torch.stack(imgs), torch.LongTensor(labels) 583 | 584 | else: 585 | imgs = [self.image_transform(img) for img in batch] 586 | return torch.stack(imgs) 587 | 588 | 589 | class OneClassImageClassificatioLoader(BaseTransformer): 590 | def __init__(self, train_mode, loader_params, dataset_params, augmentation_params): 591 | super().__init__() 592 | self.train_mode = train_mode 593 | self.loader_params = AttrDict(loader_params) 594 | self.dataset_params = AttrDict(dataset_params) 595 | self.augmentation_params = AttrDict(augmentation_params) 596 | self.dataset = OneClassImageClassificationDataset 597 | 598 | self.image_transform = transforms.Compose([ 599 | transforms.ToTensor(), 600 | transforms.Normalize(mean=self.dataset_params.MEAN, std=self.dataset_params.STD), 601 | ]) 602 | 603 | def transform(self, X, y=None, X_valid=None, y_valid=None, **kwargs): 604 | if self.train_mode and y is not None: 605 | flow, steps = self.get_datagen(X, y, True, self.loader_params.training) 606 | else: 607 | flow, steps = self.get_datagen(X, None, False, self.loader_params.inference) 608 | 609 | if X_valid is not None and y_valid is not None: 610 | valid_flow, valid_steps = self.get_datagen(X_valid, y_valid, False, self.loader_params.inference) 611 | else: 612 | valid_flow = None 613 | valid_steps = None 614 | 615 | return {'datagen': (flow, steps), 616 | 'validation_datagen': (valid_flow, valid_steps)} 617 | 618 | def get_datagen(self, X, y, train_mode, loader_params): 619 | if train_mode: 620 | 621 | sampler = BalancedSubsetSampler(data_source=y, 622 | data_size=len(y), 623 | sample_size=self.dataset_params.sample_size, 624 | empty_fraction=self.dataset_params.sns_empty_fraction, 625 | shuffle=True) 626 | dataset = self.dataset(X, y, 627 | train_mode=True, 628 | fixed_resize=self.dataset_params.sns_h, 629 | image_transform=self.image_transform) 630 | 631 | datagen = DataLoader(dataset, collate_fn=dataset.collate_fn, **loader_params, sampler=sampler) 632 | else: 633 | dataset = self.dataset(X, y, 634 | train_mode=False, 635 | fixed_resize=self.dataset_params.sns_h, 636 | image_transform=self.image_transform) 637 | datagen = DataLoader(dataset, collate_fn=dataset.collate_fn, **loader_params) 638 | 639 | steps = len(datagen) 640 | return datagen, steps 641 | 642 | 643 | def aggregate_augmentations(img_id, images, tta_params, tta_inverse_transform, img_ids, agg_method): 644 | tta_predictions_for_id = [] 645 | for image, tta_param, ids in zip(images, tta_params, img_ids): 646 | if ids == img_id: 647 | tta_prediction = tta_inverse_transform(image, tta_param) 648 | tta_predictions_for_id.append(tta_prediction) 649 | else: 650 | continue 651 | tta_averaged = agg_method(np.stack(tta_predictions_for_id, axis=-1)) 652 | return tta_averaged 653 | 654 | 655 | def per_channel_flipud(x): 656 | x_ = x.copy() 657 | for i, channel in enumerate(x): 658 | x_[i, :, :] = np.flipud(channel) 659 | return x_ 660 | 661 | 662 | def per_channel_fliplr(x): 663 | x_ = x.copy() 664 | for i, channel in enumerate(x): 665 | x_[i, :, :] = np.fliplr(channel) 666 | return x_ 667 | 668 | 669 | def per_channel_rotation(x, angle): 670 | return rotate(x, angle, axes=(1, 2)) 671 | 672 | 673 | def rotate(image, angle, axes=(0, 1)): 674 | if angle % 90 != 0: 675 | raise Exception('Angle must be a multiple of 90.') 676 | k = angle // 90 677 | return np.rot90(image, k, axes=axes) 678 | 679 | 680 | def preprocess_target(x): 681 | x_ = x.convert('L') # convert image to monochrome 682 | x_ = np.array(x_) 683 | x_ = x_.astype(np.float32) 684 | x_ = np.expand_dims(x_, axis=0) 685 | x_ = torch.from_numpy(x_) 686 | return x_ 687 | -------------------------------------------------------------------------------- /common_blocks/lovasz_losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lovasz-Softmax and Jaccard hinge loss in PyTorch 3 | Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License) 4 | """ 5 | 6 | from __future__ import print_function, division 7 | 8 | import torch 9 | from torch.autograd import Variable 10 | import torch.nn.functional as F 11 | import numpy as np 12 | 13 | try: 14 | from itertools import ifilterfalse 15 | except ImportError: # py3k 16 | from itertools import filterfalse 17 | 18 | 19 | def lovasz_grad(gt_sorted): 20 | """ 21 | Computes gradient of the Lovasz extension w.r.t sorted errors 22 | See Alg. 1 in paper 23 | """ 24 | p = len(gt_sorted) 25 | gts = gt_sorted.sum() 26 | intersection = gts.float() - gt_sorted.float().cumsum(0) 27 | union = gts.float() + (1 - gt_sorted).float().cumsum(0) 28 | jaccard = 1. - intersection / union 29 | if p > 1: # cover 1-pixel case 30 | jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] 31 | return jaccard 32 | 33 | 34 | def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True): 35 | """ 36 | IoU for foreground class 37 | binary: 1 foreground, 0 background 38 | """ 39 | if not per_image: 40 | preds, labels = (preds,), (labels,) 41 | ious = [] 42 | for pred, label in zip(preds, labels): 43 | intersection = ((label == 1) & (pred == 1)).sum() 44 | union = ((label == 1) | ((pred == 1) & (label != ignore))).sum() 45 | if not union: 46 | iou = EMPTY 47 | else: 48 | iou = float(intersection) / union 49 | ious.append(iou) 50 | iou = mean(ious) # mean accross images if per_image 51 | return 100 * iou 52 | 53 | 54 | def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False): 55 | """ 56 | Array of IoU for each (non ignored) class 57 | """ 58 | if not per_image: 59 | preds, labels = (preds,), (labels,) 60 | ious = [] 61 | for pred, label in zip(preds, labels): 62 | iou = [] 63 | for i in range(C): 64 | if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes) 65 | intersection = ((label == i) & (pred == i)).sum() 66 | union = ((label == i) | ((pred == i) & (label != ignore))).sum() 67 | if not union: 68 | iou.append(EMPTY) 69 | else: 70 | iou.append(float(intersection) / union) 71 | ious.append(iou) 72 | ious = map(mean, zip(*ious)) # mean accross images if per_image 73 | return 100 * np.array(ious) 74 | 75 | 76 | # --------------------------- BINARY LOSSES --------------------------- 77 | 78 | 79 | def lovasz_hinge(logits, labels, per_image=True, ignore=None): 80 | """ 81 | Binary Lovasz hinge loss 82 | logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) 83 | labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) 84 | per_image: compute the loss per image instead of per batch 85 | ignore: void class id 86 | """ 87 | if per_image: 88 | loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore)) 89 | for log, lab in zip(logits, labels)) 90 | else: 91 | loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore)) 92 | return loss 93 | 94 | 95 | def lovasz_hinge_flat(logits, labels): 96 | """ 97 | Binary Lovasz hinge loss 98 | logits: [P] Variable, logits at each prediction (between -\infty and +\infty) 99 | labels: [P] Tensor, binary ground truth labels (0 or 1) 100 | ignore: label to ignore 101 | """ 102 | if len(labels) == 0: 103 | # only void pixels, the gradients should be 0 104 | return logits.sum() * 0. 105 | signs = 2. * labels.float() - 1. 106 | errors = (1. - logits * signs) 107 | 108 | errors_sorted, perm = torch.sort(errors, dim=0, descending=True) 109 | perm = perm.data 110 | gt_sorted = labels[perm] 111 | grad = lovasz_grad(gt_sorted) 112 | loss = torch.dot(F.elu(errors_sorted), grad) 113 | return loss 114 | 115 | 116 | def flatten_binary_scores(scores, labels, ignore=None): 117 | """ 118 | Flattens predictions in the batch (binary case) 119 | Remove labels equal to 'ignore' 120 | """ 121 | scores = scores.view(-1) 122 | labels = labels.view(-1) 123 | if ignore is None: 124 | return scores, labels 125 | valid = (labels != ignore) 126 | vscores = scores[valid] 127 | vlabels = labels[valid] 128 | return vscores, vlabels 129 | 130 | 131 | class StableBCELoss(torch.nn.modules.Module): 132 | def __init__(self): 133 | super(StableBCELoss, self).__init__() 134 | 135 | def forward(self, input, target): 136 | neg_abs = - input.abs() 137 | loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() 138 | return loss.mean() 139 | 140 | 141 | def binary_xloss(logits, labels, ignore=None): 142 | """ 143 | Binary Cross entropy loss 144 | logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) 145 | labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) 146 | ignore: void class id 147 | """ 148 | logits, labels = flatten_binary_scores(logits, labels, ignore) 149 | loss = StableBCELoss()(logits, Variable(labels.float())) 150 | return loss 151 | 152 | 153 | # --------------------------- MULTICLASS LOSSES --------------------------- 154 | 155 | 156 | def lovasz_softmax(probas, labels, only_present=False, per_image=False, ignore=None): 157 | """ 158 | Multi-class Lovasz-Softmax loss 159 | probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1) 160 | labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) 161 | only_present: average only on classes present in ground truth 162 | per_image: compute the loss per image instead of per batch 163 | ignore: void class labels 164 | """ 165 | if per_image: 166 | loss = mean( 167 | lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), only_present=only_present) 168 | for prob, lab in zip(probas, labels)) 169 | else: 170 | loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), only_present=only_present) 171 | return loss 172 | 173 | 174 | def lovasz_softmax_flat(probas, labels, only_present=False): 175 | """ 176 | Multi-class Lovasz-Softmax loss 177 | probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) 178 | labels: [P] Tensor, ground truth labels (between 0 and C - 1) 179 | only_present: average only on classes present in ground truth 180 | """ 181 | C = probas.size(1) 182 | losses = [] 183 | for c in range(C): 184 | fg = (labels == c).float() # foreground for class c 185 | if only_present and fg.sum() == 0: 186 | continue 187 | 188 | errors = (fg - probas[:, c]).abs() 189 | errors_sorted, perm = torch.sort(errors, 0, descending=True) 190 | perm = perm.data 191 | fg_sorted = fg[perm] 192 | losses.append(torch.dot(errors_sorted, lovasz_grad(fg_sorted))) 193 | return mean(losses) 194 | 195 | 196 | def flatten_probas(probas, labels, ignore=None): 197 | """ 198 | Flattens predictions in the batch 199 | """ 200 | B, C, H, W = probas.size() 201 | probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C 202 | labels = labels.view(-1) 203 | if ignore is None: 204 | return probas, labels 205 | valid = (labels != ignore) 206 | vprobas = probas[valid.nonzero().squeeze()] 207 | vlabels = labels[valid] 208 | return vprobas, vlabels 209 | 210 | 211 | def xloss(logits, labels, ignore=None): 212 | """ 213 | Cross entropy loss 214 | """ 215 | return F.cross_entropy(logits, Variable(labels), ignore_index=255) 216 | 217 | 218 | # --------------------------- HELPER FUNCTIONS --------------------------- 219 | 220 | def mean(l, ignore_nan=False, empty=0): 221 | """ 222 | nanmean compatible with generators. 223 | """ 224 | l = iter(l) 225 | if ignore_nan: 226 | l = ifilterfalse(np.isnan, l) 227 | try: 228 | n = 1 229 | acc = next(l) 230 | except StopIteration: 231 | if empty == 'raise': 232 | raise ValueError('Empty mean') 233 | return empty 234 | for n, v in enumerate(l, 2): 235 | acc += v 236 | if n == 1: 237 | return acc 238 | return acc / n 239 | -------------------------------------------------------------------------------- /common_blocks/metrics.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import numpy as np 4 | from tqdm import tqdm 5 | from pycocotools import mask as cocomask 6 | 7 | from common_blocks.utils.masks import get_segmentations, get_overlayed_mask 8 | 9 | ORIGINAL_SIZE = (768, 768) 10 | 11 | 12 | def iou(gt, pred): 13 | gt[gt > 0] = 1. 14 | pred[pred > 0] = 1. 15 | intersection = gt * pred 16 | union = gt + pred 17 | union[union > 0] = 1. 18 | intersection = np.sum(intersection) 19 | union = np.sum(union) 20 | if union == 0: 21 | union = 1e-09 22 | return intersection / union 23 | 24 | 25 | def compute_ious(gt, predictions): 26 | gt_ = get_segmentations(gt) 27 | predictions_ = get_segmentations(predictions) 28 | 29 | if len(gt_) == 0 and len(predictions_) == 0: 30 | return np.ones((1, 1)) 31 | elif len(gt_) != 0 and len(predictions_) == 0: 32 | return np.zeros((1, 1)) 33 | else: 34 | iscrowd = [0 for _ in predictions_] 35 | ious = cocomask.iou(gt_, predictions_, iscrowd) 36 | if not np.array(ious).size: 37 | ious = np.zeros((1, 1)) 38 | return ious 39 | 40 | 41 | def compute_precision_at(ious, threshold): 42 | mx1 = np.max(ious, axis=1) 43 | mx2 = np.max(ious, axis=0) 44 | tp = np.sum(mx2 >= threshold) 45 | fp = np.sum(mx2 < threshold) 46 | fn = np.sum(mx1 < threshold) 47 | return float(tp) / (tp + fp + fn) 48 | 49 | 50 | def compute_eval_metric_per_image(gt, predictions, metric_to_average='precision'): 51 | thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] 52 | ious = compute_ious(gt, predictions) 53 | if metric_to_average == 'precision': 54 | metric_function = compute_precision_at 55 | elif metric_to_average[0] == 'f': 56 | beta = float(metric_to_average[1:]) 57 | metric_function = partial(compute_f_beta_at, beta=beta) 58 | else: 59 | raise NotImplementedError 60 | metric_per_image = [metric_function(ious, th) for th in thresholds] 61 | return sum(metric_per_image) / len(metric_per_image) 62 | 63 | 64 | def intersection_over_union(y_true, y_pred): 65 | ious = [] 66 | for y_t, y_p in tqdm(list(zip(y_true, y_pred))): 67 | iou = compute_ious(y_t, y_p) 68 | iou_mean = 1.0 * np.sum(iou) / len(iou) 69 | ious.append(iou_mean) 70 | return np.mean(ious) 71 | 72 | 73 | def intersection_over_union_thresholds(y_true, y_pred): 74 | iouts = [] 75 | for y_t, y_p in tqdm(list(zip(y_true, y_pred))): 76 | iouts.append(compute_eval_metric_per_image(y_t, y_p)) 77 | return np.mean(iouts) 78 | 79 | 80 | def f_beta_metric(gt, prediction, beta=2, apply_mean=True): 81 | f_betas, image_ids = [], [] 82 | check_ids(gt, prediction) 83 | for image_id in gt['ImageId'].unique(): 84 | y_t = get_overlayed_mask(gt.query('ImageId==@image_id'), ORIGINAL_SIZE, labeled=True) 85 | y_p = get_overlayed_mask(prediction.query('ImageId==@image_id'), ORIGINAL_SIZE, labeled=True) 86 | f_beta = compute_eval_metric_per_image(y_t, y_p, "f{}".format(beta)) 87 | f_betas.append(f_beta) 88 | image_ids.append(image_id) 89 | if apply_mean: 90 | return np.mean(f_betas) 91 | else: 92 | return f_betas, image_ids 93 | 94 | 95 | def check_ids(gt, prediction): 96 | gt_ids = set(gt['ImageId'].unique()) 97 | prediction_ids = set(prediction['ImageId'].unique()) 98 | if gt_ids - prediction_ids != set(): 99 | raise ValueError('Predictions for some images are missing') 100 | elif prediction_ids - gt_ids != set(): 101 | raise ValueError('Prediction calculated for too many images') 102 | 103 | 104 | def compute_f_beta_at(ious, threshold, beta): 105 | mx1 = np.max(ious, axis=0) 106 | mx2 = np.max(ious, axis=1) 107 | tp = np.sum(mx2 >= threshold) 108 | fp = np.sum(mx2 < threshold) 109 | fn = np.sum(mx1 < threshold) 110 | return (1 + beta ** 2) * tp / ((1 + beta ** 2) * tp + (beta ** 2) * fn + fp) 111 | -------------------------------------------------------------------------------- /common_blocks/models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | import torch.optim as optim 6 | from toolkit.pytorch_transformers.models import Model 7 | from torch.autograd import Variable 8 | 9 | from common_blocks.architectures.classification import Densenet 10 | from common_blocks.utils.misc import get_list_of_image_predictions, sigmoid, softmax 11 | from .architectures import encoders, unet, large_kernel_matters, pspnet 12 | from . import callbacks as cbk 13 | from .lovasz_losses import lovasz_hinge 14 | 15 | ENCODERS = {'ResNet': {'model': encoders.ResNetEncoders, 16 | 'model_config': {'encoder_depth': 34, 'pretrained': True, 'pool0': True 17 | } 18 | }, 19 | 'SeResNet': {'model': encoders.SeResNetEncoders, 20 | 'model_config': {'encoder_depth': 50, 'pretrained': 'imagenet', 'pool0': True 21 | } 22 | }, 23 | 'SeResNetXt': {'model': encoders.SeResNetXtEncoders, 24 | 'model_config': {'encoder_depth': 101, 'pretrained': 'imagenet', 'pool0': True 25 | } 26 | }, 27 | 'DenseNet': {'model': encoders.DenseNetEncoders, 28 | 'model_config': {'encoder_depth': 201, 'pretrained': 'imagenet', 'pool0': True 29 | } 30 | }, 31 | } 32 | 33 | ARCHITECTURES = {'UNet': {'model': unet.UNet, 34 | 'model_config': {'use_hypercolumn': False, 'dropout_2d': 0.0, 'pool0': True 35 | }}, 36 | 'LargeKernelMatters': {'model': large_kernel_matters.LargeKernelMatters, 37 | 'model_config': {'kernel_size': 9, 'internal_channels': 21, 38 | 'dropout_2d': 0.0, 'use_relu': False, 'pool0': True, 39 | 'use_channel_se': True, 'use_spatial_se': True, 40 | 'reduction_se': 4 41 | }, 42 | }, 43 | 'PSPNet': {'model': pspnet.PSPNet, 44 | 'model_config': {'use_hypercolumn': False, 'pool0': True 45 | }, 46 | }, 47 | } 48 | 49 | SNS_ARCHITECTURES = { 50 | "Densenet": {'model': Densenet, 51 | 'model_config': {'pretrained': 'imagenet'} 52 | }} 53 | 54 | 55 | class SegmentationModel(Model): 56 | def __init__(self, architecture_config, training_config, callbacks_config): 57 | super().__init__(architecture_config, training_config, callbacks_config) 58 | self.activation_func = self.architecture_config['model_params']['activation'] 59 | self.set_model() 60 | self.set_loss() 61 | self.weight_regularization = weight_regularization 62 | self.optimizer = optim.SGD(self.weight_regularization(self.model, **architecture_config['regularizer_params']), 63 | **architecture_config['optimizer_params']) 64 | self.callbacks = callbacks_network(self.callbacks_config) 65 | 66 | def fit(self, datagen, validation_datagen=None, meta_valid=None): 67 | self._initialize_model_weights() 68 | 69 | if not isinstance(self.model, nn.DataParallel): 70 | self.model = nn.DataParallel(self.model) 71 | 72 | if torch.cuda.is_available(): 73 | self.model = self.model.cuda() 74 | 75 | self.callbacks.set_params(self, validation_datagen=validation_datagen, meta_valid=meta_valid) 76 | self.callbacks.on_train_begin() 77 | 78 | batch_gen, steps = datagen 79 | for epoch_id in range(self.training_config['epochs']): 80 | self.callbacks.on_epoch_begin() 81 | for batch_id, data in enumerate(batch_gen): 82 | self.callbacks.on_batch_begin() 83 | self.freeze_weights() 84 | metrics = self._fit_loop(data) 85 | self.callbacks.on_batch_end(metrics=metrics) 86 | if batch_id == steps: 87 | break 88 | self.callbacks.on_epoch_end() 89 | if self.callbacks.training_break(): 90 | break 91 | self.callbacks.on_train_end() 92 | return self 93 | 94 | def _fit_loop(self, data): 95 | X = data[0] 96 | targets_tensors = data[1:] 97 | 98 | if torch.cuda.is_available(): 99 | X = Variable(X).cuda() 100 | targets_var = [] 101 | for target_tensor in targets_tensors: 102 | targets_var.append(Variable(target_tensor).cuda()) 103 | else: 104 | X = Variable(X) 105 | targets_var = [] 106 | for target_tensor in targets_tensors: 107 | targets_var.append(Variable(target_tensor)) 108 | 109 | self.optimizer.zero_grad() 110 | outputs_batch = self.model(X) 111 | partial_batch_losses = {} 112 | 113 | if len(self.output_names) == 1: 114 | for (name, loss_function, weight), target in zip(self.loss_function, targets_var): 115 | batch_loss = loss_function(outputs_batch, target) * weight 116 | else: 117 | for (name, loss_function, weight), output, target in zip(self.loss_function, outputs_batch, targets_var): 118 | partial_batch_losses[name] = loss_function(output, target) * weight 119 | batch_loss = sum(partial_batch_losses.values()) 120 | partial_batch_losses['sum'] = batch_loss 121 | 122 | batch_loss.backward() 123 | self.optimizer.step() 124 | 125 | return partial_batch_losses 126 | 127 | def transform(self, datagen, validation_datagen=None, *args, **kwargs): 128 | outputs = self._transform(datagen, validation_datagen) 129 | for name, prediction in outputs.items(): 130 | if self.activation_func == 'softmax': 131 | outputs[name] = [softmax(single_prediction, axis=0) for single_prediction in prediction] 132 | elif self.activation_func == 'sigmoid': 133 | outputs[name] = [sigmoid(np.squeeze(mask)) for mask in prediction] 134 | else: 135 | raise Exception('Only softmax and sigmoid activations are allowed') 136 | return outputs 137 | 138 | def _transform(self, datagen, validation_datagen=None, **kwargs): 139 | self.model.eval() 140 | 141 | batch_gen, steps = datagen 142 | outputs = {} 143 | for batch_id, data in enumerate(batch_gen): 144 | if isinstance(data, (list, tuple)): 145 | X = data[0] 146 | else: 147 | X = data 148 | 149 | if torch.cuda.is_available(): 150 | X = Variable(X, volatile=True).cuda() 151 | else: 152 | X = Variable(X, volatile=True) 153 | outputs_batch = self.model(X) 154 | 155 | if len(self.output_names) == 1: 156 | outputs.setdefault(self.output_names[0], []).append(outputs_batch.data.cpu().numpy()) 157 | else: 158 | for name, output in zip(self.output_names, outputs_batch): 159 | output_ = output.data.cpu().numpy() 160 | outputs.setdefault(name, []).append(output_) 161 | if batch_id == steps: 162 | break 163 | self.model.train() 164 | outputs = {'{}_prediction'.format(name): get_list_of_image_predictions(outputs_) for name, outputs_ in 165 | outputs.items()} 166 | return outputs 167 | 168 | def set_model(self): 169 | architecture_name = self.architecture_config['model_params']['architecture'] 170 | encoder_name = self.architecture_config['model_params']['encoder'] 171 | encoder = ENCODERS[encoder_name] 172 | architecture = ARCHITECTURES[architecture_name] 173 | 174 | self.model = architecture['model'](encoder=encoder['model'](**encoder['model_config']), 175 | num_classes=self.architecture_config['model_params']['out_channels'], 176 | **architecture['model_config']) 177 | self._initialize_model_weights = lambda: None 178 | 179 | def set_loss(self): 180 | if self.activation_func == 'softmax': 181 | raise NotImplementedError('No softmax loss defined') 182 | elif self.activation_func == 'sigmoid': 183 | 184 | loss_function = focal_lovasz 185 | # loss_function = weighted_sum_loss 186 | # loss_function = nn.BCEWithLogitsLoss() 187 | # loss_function = DiceWithLogitsLoss() 188 | # loss_function = lovasz_loss 189 | # loss_function = FocalWithLogitsLoss() 190 | else: 191 | raise Exception('Only softmax and sigmoid activations are allowed') 192 | self.loss_function = [('mask', loss_function, 1.0)] 193 | 194 | def freeze_weights(self): 195 | # # freeze encoder 196 | # if isinstance(self.model, nn.DataParallel): 197 | # encoder_params = self.model.module.encoder.parameters() 198 | # else: 199 | # encoder_params = self.model.encoder.parameters() 200 | # 201 | # for parameter in encoder_params: 202 | # parameter.requires_grad = False 203 | # 204 | # # freeze batchnorm 205 | # for m in self.model.modules(): 206 | # if isinstance(m, nn.BatchNorm2d): 207 | # m.eval() 208 | # m.weight.requires_grad = False 209 | # m.bias.requires_grad = False 210 | pass 211 | 212 | def load(self, filepath): 213 | self.model.eval() 214 | 215 | if not isinstance(self.model, nn.DataParallel): 216 | self.model = nn.DataParallel(self.model) 217 | 218 | if torch.cuda.is_available(): 219 | self.model.cpu() 220 | self.model.load_state_dict(torch.load(filepath)) 221 | self.model = self.model.cuda() 222 | else: 223 | self.model.load_state_dict(torch.load(filepath, map_location=lambda storage, loc: storage)) 224 | return self 225 | 226 | 227 | class BinaryModel(SegmentationModel): 228 | def __init__(self, architecture_config, training_config, callbacks_config, **kwargs): 229 | super().__init__(architecture_config, training_config, callbacks_config) 230 | self.weight_regularization = weight_regularization 231 | self.set_model() 232 | self.optimizer = optim.Adam(self.weight_regularization(self.model, **architecture_config['regularizer_params']), 233 | **architecture_config['optimizer_params']) 234 | 235 | self.epochs = 10 236 | self.callbacks_config = callbacks_config 237 | self.callbacks = callbacks_ship_no_ship(self.callbacks_config) 238 | self.activation_func = 'sigmoid' 239 | self.validation_loss = {} 240 | 241 | def set_model(self): 242 | architecture = self.architecture_config['model_params']['architecture'] 243 | config = SNS_ARCHITECTURES[architecture] 244 | self.model = config['model'](**config['model_config']) 245 | self._initialize_model_weights = lambda: None 246 | 247 | def set_loss(self): 248 | self.loss_function = [('ship_no_ship', nn.CrossEntropyLoss(), 1.0)] 249 | 250 | def freeze_weights(self): 251 | pass 252 | 253 | 254 | class FocalWithLogitsLoss(nn.Module): 255 | def __init__(self, alpha=1.0, gamma=1.0, reduction='elementwise_mean'): 256 | super().__init__() 257 | self.alpha = alpha 258 | self.gamma = gamma 259 | self.reduction = reduction 260 | 261 | def forward(self, output, target): 262 | if not (target.size() == output.size()): 263 | raise ValueError( 264 | "Target size ({}) must be the same as input size ({})".format(target.size(), output.size())) 265 | 266 | max_val = (-output).clamp(min=0) 267 | logpt = output - output * target + max_val + ((-max_val).exp() + (-output - max_val).exp()).log() 268 | pt = torch.exp(-logpt) 269 | at = self.alpha * target + (1 - target) 270 | loss = at * ((1 - pt).pow(self.gamma)) * logpt 271 | 272 | if self.reduction == 'none': 273 | return loss 274 | elif self.reduction == 'elementwise_mean': 275 | return loss.mean() 276 | else: 277 | return loss.sum() 278 | 279 | 280 | class DiceWithLogitsLoss(nn.Module): 281 | def __init__(self, smooth=0, eps=1e-7): 282 | super().__init__() 283 | self.smooth = smooth 284 | self.eps = eps 285 | 286 | def forward(self, output, target): 287 | output = F.sigmoid(output) 288 | return 1 - (2 * torch.sum(output * target) + self.smooth) / ( 289 | torch.sum(output) + torch.sum(target) + self.smooth + self.eps) 290 | 291 | 292 | def weighted_sum_loss(output, target): 293 | bce = nn.BCEWithLogitsLoss()(output, target) 294 | dice = DiceWithLogitsLoss()(output, target) 295 | return bce + 0.25 * dice 296 | 297 | 298 | def focal_lovasz(output, target): 299 | focal = FocalWithLogitsLoss(alpha=1.0, gamma=2.0)(output, target) 300 | lovasz = lovasz_loss(output, target) 301 | return focal + lovasz 302 | 303 | 304 | def lovasz_loss(output, target): 305 | target = target.long() 306 | return lovasz_hinge(output, target) 307 | 308 | 309 | def weight_regularization(model, regularize, weight_decay_conv2d): 310 | if regularize: 311 | parameter_list = [ 312 | {'params': filter(lambda p: p.requires_grad, model.parameters()), 313 | 'weight_decay': weight_decay_conv2d}, 314 | ] 315 | else: 316 | parameter_list = [filter(lambda p: p.requires_grad, model.parameters())] 317 | return parameter_list 318 | 319 | 320 | def callbacks_network(callbacks_config): 321 | experiment_timing = cbk.ExperimentTiming(**callbacks_config['experiment_timing']) 322 | model_checkpoints = cbk.ModelCheckpoint(**callbacks_config['model_checkpoint']) 323 | lr_scheduler = cbk.ReduceLROnPlateauScheduler(**callbacks_config['reduce_lr_on_plateau_scheduler']) 324 | training_monitor = cbk.TrainingMonitor(**callbacks_config['training_monitor']) 325 | validation_monitor = cbk.ValidationMonitor(**callbacks_config['validation_monitor']) 326 | neptune_monitor = cbk.NeptuneMonitor(**callbacks_config['neptune_monitor']) 327 | early_stopping = cbk.EarlyStopping(**callbacks_config['early_stopping']) 328 | init_lr_finder = cbk.InitialLearningRateFinder() 329 | one_cycle_callback = cbk.OneCycleCallback(**callbacks_config['one_cycle_scheduler']) 330 | 331 | return cbk.CallbackList( 332 | callbacks=[experiment_timing, training_monitor, validation_monitor, 333 | model_checkpoints, neptune_monitor, 334 | one_cycle_callback, 335 | # early_stopping, 336 | # lr_scheduler, 337 | # init_lr_finder, 338 | ]) 339 | 340 | 341 | def callbacks_ship_no_ship(callbacks_config): 342 | training_monitor = cbk.TrainingMonitor(**callbacks_config['training_monitor']) 343 | validation_monitor = cbk.SNS_ValidationMonitor() 344 | model_checkpoints = cbk.ModelCheckpoint(**callbacks_config['model_checkpoint']) 345 | one_cycle_callback = cbk.OneCycleCallback(**callbacks_config['one_cycle_scheduler']) 346 | 347 | return cbk.CallbackList([training_monitor, validation_monitor, model_checkpoints, 348 | # one_cycle_callback 349 | ]) 350 | -------------------------------------------------------------------------------- /common_blocks/pipelines.py: -------------------------------------------------------------------------------- 1 | from steppy.adapter import Adapter, E 2 | from steppy.base import Step 3 | 4 | from common_blocks.loaders import OneClassImageClassificatioLoader 5 | from . import loaders 6 | 7 | 8 | def preprocessing_train(config, model_name='network'): 9 | if config.general.loader_mode == 'resize': 10 | loader_config = config.loaders.resize 11 | LOADER = loaders.ImageSegmentationLoaderResize 12 | else: 13 | raise NotImplementedError 14 | 15 | reader_train = Step(name='xy_train', 16 | transformer=loaders.MetaReader(train_mode=True, **config.meta_reader[model_name]), 17 | input_data=['input'], 18 | adapter=Adapter({'meta': E('input', 'meta')})) 19 | 20 | reader_inference = Step(name='xy_inference', 21 | transformer=loaders.MetaReader(train_mode=True, **config.meta_reader[model_name]), 22 | input_data=['callback_input'], 23 | adapter=Adapter({'meta': E('callback_input', 'meta_valid')})) 24 | 25 | loader = Step(name='loader', 26 | transformer=LOADER(train_mode=True, **loader_config), 27 | input_steps=[reader_train, reader_inference], 28 | adapter=Adapter({'X': E(reader_train.name, 'X'), 29 | 'y': E(reader_train.name, 'y'), 30 | 'X_valid': E(reader_inference.name, 'X'), 31 | 'y_valid': E(reader_inference.name, 'y'), 32 | })) 33 | return loader 34 | 35 | 36 | def preprocessing_inference(config, model_name='network'): 37 | if config.general.loader_mode == 'resize': 38 | loader_config = config.loaders.resize 39 | LOADER = loaders.ImageSegmentationLoaderResize 40 | else: 41 | raise NotImplementedError 42 | 43 | reader_inference = Step(name='xy_inference', 44 | transformer=loaders.MetaReader(train_mode=False, **config.meta_reader[model_name]), 45 | input_data=['input'], 46 | adapter=Adapter({'meta': E('input', 'meta')})) 47 | 48 | loader = Step(name='loader', 49 | transformer=LOADER(train_mode=False, **loader_config), 50 | input_steps=[reader_inference], 51 | adapter=Adapter({'X': E(reader_inference.name, 'X'), 52 | 'y': E(reader_inference.name, 'y'), 53 | })) 54 | return loader 55 | 56 | 57 | def preprocessing_binary_train(config, model_name, suffix='_binary_model'): 58 | reader_train = Step(name='xy_train{}'.format(suffix), 59 | transformer=loaders.MetaReader(train_mode=True, **config.meta_reader[model_name]), 60 | input_data=['input'], 61 | adapter=Adapter({'meta': E('input', 'meta')})) 62 | 63 | reader_inference = Step(name='xy_inference{}'.format(suffix), 64 | transformer=loaders.MetaReader(train_mode=True, **config.meta_reader[model_name]), 65 | input_data=['callback_input'], 66 | adapter=Adapter({'meta': E('callback_input', 'meta_valid')})) 67 | 68 | transformer = OneClassImageClassificatioLoader( 69 | train_mode=True, 70 | loader_params=config.loaders.resize.loader_params, 71 | dataset_params=config.loaders.resize.dataset_params, 72 | augmentation_params=config.loaders.resize.augmentation_params 73 | ) 74 | 75 | binary_loader = Step(name='loader{}'.format(suffix), 76 | transformer=transformer, 77 | input_steps=[reader_train, reader_inference], 78 | adapter=Adapter({'X': E(reader_train.name, 'X'), 79 | 'y': E(reader_train.name, 'y'), 80 | 'X_valid': E(reader_inference.name, 'X'), 81 | 'y_valid': E(reader_inference.name, 'y'), 82 | })) 83 | 84 | return binary_loader 85 | 86 | 87 | def preprocessing_binary_inference(config, model_name, suffix='_binary_model'): 88 | reader_inference = Step(name='xy_inference{}'.format(suffix), 89 | transformer=loaders.MetaReader(train_mode=True, **config.meta_reader[model_name]), 90 | input_data=['input'], 91 | adapter=Adapter({'meta': E('input', 'meta')})) 92 | 93 | transformer = OneClassImageClassificatioLoader( 94 | train_mode=True, 95 | loader_params=config.loaders.resize.loader_params, 96 | dataset_params=config.loaders.resize.dataset_params, 97 | augmentation_params=config.loaders.resize.augmentation_params 98 | ) 99 | 100 | binary_loader = Step(name='loader{}'.format(suffix), 101 | transformer=transformer, 102 | input_steps=[reader_inference], 103 | adapter=Adapter({'X': E(reader_inference.name, 'X'), 104 | })) 105 | 106 | return binary_loader 107 | 108 | 109 | def preprocessing_inference_tta(config, model_name='network'): 110 | if config.general.loader_mode == 'resize': 111 | loader_config = config.loaders.resize_tta 112 | LOADER = loaders.ImageSegmentationLoaderResizeTTA 113 | else: 114 | raise NotImplementedError 115 | 116 | reader_inference = Step(name='reader_inference', 117 | transformer=loaders.MetaReader(train_mode=False, **config.meta_reader[model_name]), 118 | input_data=['input'], 119 | adapter=Adapter({'meta': E('input', 'meta')})) 120 | 121 | tta_generator = Step(name='tta_generator', 122 | transformer=loaders.MetaTestTimeAugmentationGenerator(**config.tta_generator), 123 | input_steps=[reader_inference], 124 | adapter=Adapter({'X': E('reader_inference', 'X')}) 125 | ) 126 | 127 | loader = Step(name='loader', 128 | transformer=LOADER(**loader_config), 129 | input_steps=[tta_generator], 130 | adapter=Adapter({'X': E(tta_generator.name, 'X_tta'), 131 | 'tta_params': E(tta_generator.name, 'tta_params'), 132 | }) 133 | ) 134 | return loader, tta_generator 135 | 136 | 137 | def aggregator(name, model, tta_generator, config): 138 | tta_aggregator = Step(name=name, 139 | transformer=loaders.TestTimeAugmentationAggregator(**config), 140 | input_steps=[model, tta_generator], 141 | adapter=Adapter({'images': E(model.name, 'mask_prediction'), 142 | 'tta_params': E(tta_generator.name, 'tta_params'), 143 | 'img_ids': E(tta_generator.name, 'img_ids'), 144 | }) 145 | ) 146 | return tta_aggregator 147 | -------------------------------------------------------------------------------- /common_blocks/postprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import neptune 4 | from scipy import ndimage as ndi 5 | from skimage.transform import resize 6 | 7 | from common_blocks.utils import misc 8 | 9 | CTX = neptune.Context() 10 | 11 | if CTX.params.__class__.__name__ == 'OfflineContextParams': 12 | PARAMS = misc.read_yaml().parameters 13 | else: 14 | PARAMS = CTX.params 15 | 16 | DROP_SIZE = PARAMS.postpro__drop_size 17 | MID_MIN_SIZE, MID_MAX_SIZE = PARAMS.postpro__mid_min_size, PARAMS.postpro__mid_max_size 18 | 19 | 20 | def resize_image(image, target_size): 21 | """Resize image to target size 22 | 23 | Args: 24 | image (numpy.ndarray): Image of shape (C x H x W). 25 | target_size (tuple): Target size (H, W). 26 | 27 | Returns: 28 | numpy.ndarray: Resized image of shape (C x H x W). 29 | 30 | """ 31 | n_channels = image.shape[0] 32 | resized_image = resize(image, (n_channels, target_size[0], target_size[1]), mode='constant') 33 | return resized_image 34 | 35 | 36 | def crop_image(image, target_size): 37 | """Crop image to target size. Image cropped symmetrically. 38 | 39 | Args: 40 | image (numpy.ndarray): Image of shape (C x H x W). 41 | target_size (tuple): Target size (H, W). 42 | 43 | Returns: 44 | numpy.ndarray: Cropped image of shape (C x H x W). 45 | 46 | """ 47 | top_crop, right_crop, bottom_crop, left_crop = misc.get_crop_pad_sequence(image.shape[1] - target_size[0], 48 | image.shape[2] - target_size[1]) 49 | cropped_image = image[:, top_crop:image.shape[1] - bottom_crop, left_crop:image.shape[2] - right_crop] 50 | return cropped_image 51 | 52 | 53 | def binarize(image, threshold): 54 | image_binarized = (image[1, :, :] > threshold).astype(np.uint8) 55 | return image_binarized 56 | 57 | 58 | def get_class(prediction, threshold): 59 | pix = prediction.argmax() 60 | score = prediction[pix] 61 | classid = pix 62 | if pix == 1 and score < threshold: 63 | classid = 0 64 | 65 | return classid 66 | 67 | 68 | def label(mask): 69 | labeled, nr_true = ndi.label(mask) 70 | return labeled 71 | 72 | 73 | def mask_postprocessing(labeled_mask): 74 | if labeled_mask.max() == 0: 75 | return labeled_mask 76 | else: 77 | img_box = np.zeros_like(labeled_mask) 78 | for label_id in range(1, labeled_mask.max() + 1, 1): 79 | label = np.where(labeled_mask == label_id, 1, 0).astype(np.uint8) 80 | size = label.sum() 81 | if size <= DROP_SIZE: 82 | continue 83 | elif MID_MIN_SIZE < size < MID_MAX_SIZE: 84 | bbox_label = mask_to_bbox(label) 85 | img_box = np.where(bbox_label, label_id, img_box).astype(np.uint8) 86 | else: 87 | img_box = np.where(label, label_id, img_box).astype(np.uint8) 88 | img_box = misc.relabel(img_box) 89 | return img_box 90 | 91 | 92 | def mask_to_bbox(mask): 93 | img_box = np.zeros_like(mask) 94 | _, cnt, _ = cv2.findContours(mask, 1, 2) 95 | rect = cv2.minAreaRect(cnt[0]) 96 | box = cv2.boxPoints(rect) 97 | box = np.int0(box) 98 | cv2.drawContours(img_box, [box], 0, 1, -1) 99 | return img_box 100 | -------------------------------------------------------------------------------- /common_blocks/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/minerva-ml/open-solution-ship-detection/9e26c98e25a1d38fcfa426640697e37dd39f4776/common_blocks/utils/__init__.py -------------------------------------------------------------------------------- /common_blocks/utils/io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from PIL import Image 4 | from tqdm import tqdm 5 | from sklearn.externals import joblib 6 | 7 | from .masks import get_overlayed_mask 8 | 9 | 10 | def read_masks_from_csv(image_ids, solution_file_path, image_sizes): 11 | solution = pd.read_csv(solution_file_path) 12 | masks = [] 13 | for image_id, image_size in zip(image_ids, image_sizes): 14 | image_id_pd = image_id + ".jpg" 15 | mask = get_overlayed_mask(solution.query('ImageId == @image_id_pd'), image_size, labeled=True) 16 | masks.append(mask) 17 | return masks 18 | 19 | 20 | def read_masks(masks_filepaths): 21 | masks = [] 22 | for mask_filepath in tqdm(masks_filepaths): 23 | mask = joblib.load(mask_filepath) 24 | if isinstance(mask, tuple): 25 | mask = np.zeros(mask).astype(np.uint8) 26 | masks.append(mask) 27 | return masks 28 | 29 | 30 | def read_gt_subset(annotation_file_path, image_ids): 31 | solution = pd.read_csv(annotation_file_path) 32 | return solution.query('ImageId in @image_ids') 33 | 34 | 35 | def read_images(filepaths): 36 | images = [] 37 | for filepath in filepaths: 38 | image = np.array(Image.open(filepath)) 39 | images.append(image) 40 | return images 41 | -------------------------------------------------------------------------------- /common_blocks/utils/masks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pycocotools import mask as cocomask 3 | 4 | 5 | def decompose(labeled): 6 | nr_true = labeled.max() 7 | masks = [] 8 | for i in range(1, nr_true + 1): 9 | msk = labeled.copy() 10 | msk[msk != i] = 0. 11 | msk[msk == i] = 255. 12 | masks.append(msk) 13 | 14 | if not masks: 15 | return [labeled] 16 | else: 17 | return masks 18 | 19 | 20 | def get_overlayed_mask(image_annotation, size, labeled=False): 21 | mask = np.zeros(size, dtype=np.uint8) 22 | if image_annotation['EncodedPixels'].any(): 23 | for i, row in image_annotation.reset_index(drop=True).iterrows(): 24 | if labeled: 25 | label = i + 1 26 | else: 27 | label = 1 28 | mask += label * run_length_decoding(row['EncodedPixels'], size) 29 | return mask 30 | 31 | 32 | def rle_from_predictions(predictions): 33 | return [rle_from_mask(mask) for mask in predictions] 34 | 35 | 36 | def rle_from_mask(x): 37 | # https://www.kaggle.com/c/data-science-bowl-2018/discussion/48561# 38 | bs = np.where(x.T.flatten())[0] 39 | 40 | rle = [] 41 | prev = -2 42 | for b in bs: 43 | if (b > prev + 1): rle.extend((b + 1, 0)) 44 | rle[-1] += 1 45 | prev = b 46 | 47 | if len(rle) != 0 and rle[-1] + rle[-2] == x.size: 48 | rle[-2] = rle[-2] - 1 49 | 50 | return rle 51 | 52 | 53 | def coco_rle_from_binary(prediction): 54 | prediction = np.asfortranarray(prediction) 55 | return cocomask.encode(prediction) 56 | 57 | 58 | def coco_binary_from_rle(rle): 59 | return cocomask.decode(rle) 60 | 61 | 62 | def get_segmentations(labeled): 63 | nr_true = labeled.max() 64 | segmentations = [] 65 | for i in range(1, nr_true + 1): 66 | msk = labeled == i 67 | segmentation = coco_rle_from_binary(msk.astype('uint8')) 68 | segmentation['counts'] = segmentation['counts'].decode("UTF-8") 69 | segmentations.append(segmentation) 70 | return segmentations 71 | 72 | 73 | def run_length_decoding(mask_rle, shape=(768, 768)): 74 | """ 75 | Based on https://www.kaggle.com/msl23518/visualize-the-stage1-test-solution and modified 76 | Args: 77 | mask_rle: run-length as string formatted (start length) 78 | shape: (height, width) of array to return 79 | 80 | Returns: 81 | numpy array, 1 - mask, 0 - background 82 | 83 | """ 84 | s = mask_rle.split() 85 | starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])] 86 | starts -= 1 87 | ends = starts + lengths 88 | img = np.zeros(shape[1] * shape[0], dtype=np.uint8) 89 | for lo, hi in zip(starts, ends): 90 | img[lo:hi] = 1 91 | return img.reshape((shape[1], shape[0])).T 92 | -------------------------------------------------------------------------------- /common_blocks/utils/misc.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | 3 | import logging 4 | import pathlib 5 | import random 6 | import sys 7 | from itertools import chain 8 | from collections import Iterable 9 | import math 10 | 11 | import numpy as np 12 | import pandas as pd 13 | from PIL import Image 14 | import torch 15 | import matplotlib.pyplot as plt 16 | from attrdict import AttrDict 17 | from tqdm import tqdm 18 | from sklearn.model_selection import train_test_split, GroupShuffleSplit 19 | from steppy.base import Step, BaseTransformer 20 | from steppy.utils import get_logger as get_steppy_logger 21 | import yaml 22 | 23 | from .masks import rle_from_mask, run_length_decoding 24 | 25 | import os 26 | 27 | NEPTUNE_CONFIG_PATH = os.environ.get('NEPTUNE_CONFIG_PATH', 28 | str(pathlib.Path(__file__).resolve().parents[1].parents[0] / 'neptune.yaml')) 29 | 30 | logger = get_steppy_logger() 31 | 32 | 33 | def read_params(ctx): 34 | if ctx.params.__class__.__name__ == 'OfflineContextParams': 35 | params = read_yaml().parameters 36 | else: 37 | params = ctx.params 38 | return params 39 | 40 | 41 | def read_yaml(fallback_file=NEPTUNE_CONFIG_PATH): 42 | print("USING FALLBACK NEPTUNE CONFIG {}".format(fallback_file)) 43 | with open(fallback_file) as f: 44 | config = yaml.load(f) 45 | return AttrDict(config) 46 | 47 | 48 | def init_logger(): 49 | logger = logging.getLogger('ships-detection') 50 | logger.setLevel(logging.INFO) 51 | message_format = logging.Formatter(fmt='%(asctime)s %(name)s >>> %(message)s', 52 | datefmt='%Y-%m-%d %H-%M-%S') 53 | 54 | # console handler for validation info 55 | ch_va = logging.StreamHandler(sys.stdout) 56 | ch_va.setLevel(logging.INFO) 57 | 58 | ch_va.setFormatter(fmt=message_format) 59 | 60 | # add the handlers to the logger 61 | logger.addHandler(ch_va) 62 | 63 | return logger 64 | 65 | 66 | def get_logger(): 67 | return logging.getLogger('ships-detection') 68 | 69 | 70 | def create_submission(image_ids, predictions): 71 | output = [] 72 | for image_id, mask in zip(image_ids, predictions): 73 | for label_nr in range(1, mask.max() + 1): 74 | mask_label = mask == label_nr 75 | rle_encoded = ' '.join(str(rle) for rle in rle_from_mask(mask_label)) 76 | output.append([image_id, rle_encoded]) 77 | if mask.max() == 0: 78 | output.append([image_id, np.nan]) 79 | 80 | submission = pd.DataFrame(output, columns=['ImageId', 'EncodedPixels']) 81 | return submission 82 | 83 | 84 | def get_ship_no_ship_ids(image_ids, prediction): 85 | ids_ship = [idx for idx, pred in zip(image_ids, prediction) if pred] 86 | ids_no_ship = [idx for idx, pred in zip(image_ids, prediction) if not pred] 87 | return ids_ship, ids_no_ship 88 | 89 | 90 | def combine_two_stage_predictions(ids_no_ship, prediction_ship, ordered_ids): 91 | prediction_no_ship = pd.DataFrame({'ImageId': ids_no_ship}) 92 | prediction_no_ship['EncodedPixels'] = np.nan 93 | 94 | prediction_ship.reset_index(drop=True, inplace=True) 95 | prediction_no_ship.reset_index(drop=True, inplace=True) 96 | 97 | prediction = pd.concat([prediction_ship, prediction_no_ship], axis=0) 98 | 99 | return prediction[prediction['ImageId'].isin(ordered_ids)] 100 | 101 | 102 | def prepare_results(ground_truth, prediction, meta, f2, image_ids): 103 | f2_with_id = pd.DataFrame({'f2': f2, 'ImageId': image_ids}) 104 | meta['ImageId'] = meta['id'] + '.jpg' 105 | scores_merged = pd.merge(ground_truth, prediction, on='ImageId', suffixes=['_gt', '_pred']) 106 | results = pd.merge(meta, scores_merged, on='ImageId') 107 | results = pd.merge(results, f2_with_id, on='ImageId') 108 | return results 109 | 110 | 111 | def generate_data_frame_chunks(meta, chunk_size): 112 | n_rows = meta.shape[0] 113 | chunk_nr = math.ceil(n_rows / chunk_size) 114 | for i in tqdm(range(chunk_nr)): 115 | meta_chunk = meta.iloc[i * chunk_size:(i + 1) * chunk_size] 116 | yield meta_chunk 117 | 118 | 119 | def generate_metadata(train_images_dir, masks_overlayed_dir, test_images_dir, annotation_file_name): 120 | metadata = {} 121 | annotations = pd.read_csv(annotation_file_name) 122 | for filename in tqdm(os.listdir(train_images_dir)): 123 | image_filepath = os.path.join(train_images_dir, filename) 124 | mask_filepath = os.path.join(masks_overlayed_dir, filename.split('.')[0]) 125 | image_id = filename.split('.')[0] 126 | number_of_ships = get_number_of_ships(annotations.query('ImageId == @filename')) 127 | 128 | metadata.setdefault('file_path_image', []).append(image_filepath) 129 | metadata.setdefault('file_path_mask', []).append(mask_filepath) 130 | metadata.setdefault('is_train', []).append(1) 131 | metadata.setdefault('id', []).append(image_id) 132 | metadata.setdefault('number_of_ships', []).append(number_of_ships) 133 | metadata.setdefault('is_not_empty', []).append(int(number_of_ships != 0)) 134 | 135 | for filename in tqdm(os.listdir(test_images_dir)): 136 | image_filepath = os.path.join(test_images_dir, filename) 137 | image_id = filename.split('.')[0] 138 | 139 | metadata.setdefault('file_path_image', []).append(image_filepath) 140 | metadata.setdefault('file_path_mask', []).append(None) 141 | metadata.setdefault('is_train', []).append(0) 142 | metadata.setdefault('id', []).append(image_id) 143 | metadata.setdefault('number_of_ships', []).append(None) 144 | metadata.setdefault('is_not_empty', []).append(None) 145 | 146 | return pd.DataFrame(metadata) 147 | 148 | 149 | def get_number_of_ships(image_annotations): 150 | if image_annotations['EncodedPixels'].any(): 151 | return len(image_annotations) 152 | else: 153 | return 0 154 | 155 | 156 | def train_test_split_with_empty_fraction(df, empty_fraction, test_size, shuffle=True, random_state=1234): 157 | valid_empty_size = int(test_size * empty_fraction) 158 | valid_non_empty_size = int(test_size * (1.0 - empty_fraction)) 159 | df_empty = df[df['is_not_empty'] == 0] 160 | df_non_empty = df[df['is_not_empty'] == 1] 161 | 162 | train_empty, valid_empty = train_test_split(df_empty, 163 | test_size=valid_empty_size, 164 | shuffle=shuffle, 165 | random_state=random_state) 166 | train_non_empty, valid_non_empty = train_test_split(df_non_empty, 167 | test_size=valid_non_empty_size, 168 | shuffle=shuffle, 169 | random_state=random_state) 170 | 171 | train = pd.concat([train_empty, train_non_empty], axis=0) 172 | valid = pd.concat([valid_empty, valid_non_empty], axis=0) 173 | 174 | return train, valid 175 | 176 | 177 | def train_test_split_with_empty_fraction_with_groups(df, 178 | groups, 179 | empty_fraction, 180 | test_size, 181 | shuffle=True, random_state=1234): 182 | cv = GroupShuffleSplit(n_splits=2, test_size=test_size, random_state=random_state) 183 | 184 | for train_inds, test_inds in cv.split(df.values, groups=groups.values): 185 | train, test = df.iloc[train_inds], df.iloc[test_inds] 186 | break 187 | 188 | empty_train, empty_test = train[train['is_not_empty'] == 0], test[test['is_not_empty'] == 0] 189 | non_empty_train, non_empty_test = train[train['is_not_empty'] == 1], test[test['is_not_empty'] == 1] 190 | 191 | test_empty_size = int(test_size * empty_fraction) 192 | test_non_empty_size = int(test_size * (1.0 - empty_fraction)) 193 | 194 | empty_test = empty_test.sample(test_empty_size, random_state=random_state) 195 | non_empty_test = non_empty_test.sample(test_non_empty_size, random_state=random_state) 196 | 197 | train = pd.concat([empty_train, non_empty_train], axis=0).sample(frac=1, random_state=random_state) 198 | test = pd.concat([empty_test, non_empty_test], axis=0) 199 | 200 | if shuffle: 201 | train = train.sample(frac=1, random_state=random_state) 202 | test = test.sample(frac=1, random_state=random_state) 203 | 204 | return train, test 205 | 206 | 207 | def sigmoid(x): 208 | return 1. / (1 + np.exp(-x)) 209 | 210 | 211 | def softmax(X, theta=1.0, axis=None): 212 | """ 213 | https://nolanbconaway.github.io/blog/2017/softmax-numpy 214 | Compute the softmax of each element along an axis of X. 215 | 216 | Parameters 217 | ---------- 218 | X: ND-Array. Probably should be floats. 219 | theta (optional): float parameter, used as a multiplier 220 | prior to exponentiation. Default = 1.0 221 | axis (optional): axis to compute values along. Default is the 222 | first non-singleton axis. 223 | 224 | Returns an array the same size as X. The result will sum to 1 225 | along the specified axis. 226 | """ 227 | 228 | # make X at least 2d 229 | y = np.atleast_2d(X) 230 | 231 | # find axis 232 | if axis is None: 233 | axis = next(j[0] for j in enumerate(y.shape) if j[1] > 1) 234 | 235 | # multiply y against the theta parameter, 236 | y = y * float(theta) 237 | 238 | # subtract the max for numerical stability 239 | y = y - np.expand_dims(np.max(y, axis=axis), axis) 240 | 241 | # exponentiate y 242 | y = np.exp(y) 243 | 244 | # take the sum along the specified axis 245 | ax_sum = np.expand_dims(np.sum(y, axis=axis), axis) 246 | 247 | # finally: divide elementwise 248 | p = y / ax_sum 249 | 250 | # flatten if X was 1D 251 | if len(X.shape) == 1: p = p.flatten() 252 | 253 | return p 254 | 255 | 256 | def get_crop_pad_sequence(vertical, horizontal): 257 | top = int(vertical / 2) 258 | bottom = vertical - top 259 | right = int(horizontal / 2) 260 | left = horizontal - right 261 | return (top, right, bottom, left) 262 | 263 | 264 | def get_list_of_image_predictions(batch_predictions): 265 | image_predictions = [] 266 | for batch_pred in batch_predictions: 267 | image_predictions.extend(list(batch_pred)) 268 | return image_predictions 269 | 270 | 271 | def relabel(img): 272 | relabeled_img = np.zeros_like(img) 273 | for i, k in enumerate(np.unique(img)): 274 | if k == 0: 275 | continue 276 | else: 277 | relabeled_img = np.where(img == k, i, relabeled_img) 278 | return relabeled_img 279 | 280 | 281 | def set_seed(seed): 282 | random.seed(seed) 283 | np.random.seed(seed) 284 | torch.manual_seed(seed) 285 | if torch.cuda.is_available(): 286 | torch.cuda.manual_seed_all(seed) 287 | 288 | 289 | def clean_memory(): 290 | if torch.cuda.is_available(): 291 | torch.cuda.empty_cache() 292 | 293 | 294 | def plot_list(images=None, labels=None): 295 | images = [] if not images else images 296 | labels = [] if not labels else labels 297 | 298 | n_img = len(images) 299 | n_lab = len(labels) 300 | n = n_lab + n_img 301 | fig, axs = plt.subplots(1, n, figsize=(16, 12)) 302 | for i, image in enumerate(images): 303 | axs[i].imshow(image) 304 | axs[i].set_xticks([]) 305 | axs[i].set_yticks([]) 306 | for j, label in enumerate(labels): 307 | axs[n_img + j].imshow(label, cmap='nipy_spectral') 308 | axs[n_img + j].set_xticks([]) 309 | axs[n_img + j].set_yticks([]) 310 | plt.show() 311 | 312 | 313 | def plot_results_for_id(results, idx): 314 | results_per_image = results[results['ImageId'] == idx] 315 | file_path_image = results_per_image.file_path_image.values[0] 316 | image = np.array(Image.open(file_path_image)).astype(np.uint8) 317 | ground_truth_image = np.zeros((image.shape[:2])) 318 | prediction_image = np.zeros((image.shape[:2])) 319 | 320 | ground_truth_rle = results_per_image.EncodedPixels_gt.unique() 321 | prediction_rle = results_per_image.EncodedPixels_pred.unique() 322 | 323 | for i, gt in enumerate(ground_truth_rle): 324 | if isinstance(gt, float): 325 | continue 326 | obj_mask = run_length_decoding(gt) 327 | ground_truth_image = np.where(obj_mask, i + 1, ground_truth_image) 328 | 329 | for i, pred in enumerate(prediction_rle): 330 | if isinstance(pred, float): 331 | continue 332 | obj_mask = run_length_decoding(pred) 333 | prediction_image = np.where(obj_mask, i + 1, prediction_image) 334 | 335 | plot_list(images=[image], labels=[ground_truth_image, prediction_image]) 336 | 337 | 338 | class FineTuneStep(Step): 339 | def __init__(self, 340 | transformer, 341 | name=None, 342 | experiment_directory=None, 343 | output_directory=None, 344 | input_data=None, 345 | input_steps=None, 346 | adapter=None, 347 | 348 | is_fittable=True, 349 | force_fitting=True, 350 | fine_tuning=False, 351 | 352 | persist_output=False, 353 | cache_output=False, 354 | load_persisted_output=False): 355 | super().__init__(name=name, 356 | transformer=transformer, 357 | experiment_directory=experiment_directory, 358 | output_directory=output_directory, 359 | input_data=input_data, 360 | input_steps=input_steps, 361 | adapter=adapter, 362 | is_fittable=is_fittable, 363 | force_fitting=force_fitting, 364 | cache_output=cache_output, 365 | persist_output=persist_output, 366 | load_persisted_output=load_persisted_output) 367 | self.fine_tuning = fine_tuning 368 | 369 | def _fit_transform_operation(self, step_inputs): 370 | if self.is_fittable: 371 | if self.transformer_is_persisted: 372 | if self.force_fitting and self.fine_tuning: 373 | raise ValueError('only one of force_fitting or fine_tuning can be True') 374 | elif self.force_fitting: 375 | logger.info('Step {}, fitting and transforming...'.format(self.name)) 376 | step_output_data = self.transformer.fit_transform(**step_inputs) 377 | logger.info('Step {}, persisting transformer to the {}' 378 | .format(self.name, self.experiment_directory_transformers_step)) 379 | self.transformer.persist(self.experiment_directory_transformers_step) 380 | elif self.fine_tuning: 381 | logger.info('Step {}, loading transformer from the {}' 382 | .format(self.name, self.experiment_directory_transformers_step)) 383 | self.transformer.load(self.experiment_directory_transformers_step) 384 | logger.info('Step {}, transforming...'.format(self.name)) 385 | step_output_data = self.transformer.fit_transform(**step_inputs) 386 | self.transformer.persist(self.experiment_directory_transformers_step) 387 | else: 388 | logger.info('Step {}, loading transformer from the {}' 389 | .format(self.name, self.experiment_directory_transformers_step)) 390 | self.transformer.load(self.experiment_directory_transformers_step) 391 | logger.info('Step {}, transforming...'.format(self.name)) 392 | step_output_data = self.transformer.transform(**step_inputs) 393 | else: 394 | logger.info('Step {}, fitting and transforming...'.format(self.name)) 395 | step_output_data = self.transformer.fit_transform(**step_inputs) 396 | logger.info('Step {}, persisting transformer to the {}' 397 | .format(self.name, self.experiment_directory_transformers_step)) 398 | self.transformer.persist(self.experiment_directory_transformers_step) 399 | else: 400 | logger.info('Step {}, transforming...'.format(self.name)) 401 | step_output_data = self.transformer.transform(**step_inputs) 402 | 403 | if self.cache_output: 404 | logger.info('Step {}, caching output to the {}' 405 | .format(self.name, self.experiment_directory_output_step)) 406 | self.output = step_output_data 407 | if self.persist_output: 408 | logger.info('Step {}, persisting output to the {}' 409 | .format(self.name, self.experiment_directory_output_step)) 410 | self._persist_output(step_output_data, self.experiment_directory_output_step) 411 | return step_output_data 412 | 413 | 414 | def make_apply_transformer(func, output_name='output', apply_on=None, n_threads=1): 415 | class StaticApplyTransformer(BaseTransformer): 416 | def transform(self, *args, **kwargs): 417 | self.check_input(*args, **kwargs) 418 | if not apply_on: 419 | iterator = list(zip(*args, *kwargs.values())) 420 | else: 421 | iterator = list(zip(*args, *[kwargs[key] for key in apply_on])) 422 | 423 | n_jobs = np.minimum(n_threads, len(iterator)) 424 | with mp.pool.ThreadPool(n_jobs) as executor: 425 | output = list(tqdm(executor.imap(lambda p: func(*p), iterator), total=len(iterator))) 426 | return {output_name: output} 427 | 428 | @staticmethod 429 | def check_input(*args, **kwargs): 430 | if len(args) and len(kwargs) == 0: 431 | raise Exception('Input must not be empty') 432 | 433 | arg_length = None 434 | for arg in chain(args, kwargs.values()): 435 | if not isinstance(arg, Iterable): 436 | raise Exception('All inputs must be iterable') 437 | arg_length_loc = None 438 | try: 439 | arg_length_loc = len(arg) 440 | except: 441 | pass 442 | if arg_length_loc is not None: 443 | if arg_length is None: 444 | arg_length = arg_length_loc 445 | elif arg_length_loc != arg_length: 446 | raise Exception('All inputs must be the same length') 447 | 448 | @staticmethod 449 | def get_arg_length(*args, **kwargs): 450 | arg_length = None 451 | for arg in chain(args, kwargs.values()): 452 | if arg_length is None: 453 | try: 454 | arg_length = len(arg) 455 | except: 456 | pass 457 | if arg_length is not None: 458 | return arg_length 459 | 460 | return StaticApplyTransformer() 461 | 462 | 463 | class OneCycle(object): 464 | """ 465 | In paper (https://arxiv.org/pdf/1803.09820.pdf), author suggests to do one cycle during 466 | whole run with 2 steps of equal length. During first step, increase the learning rate 467 | from lower learning rate to higher learning rate. And in second step, decrease it from 468 | higher to lower learning rate. This is Cyclic learning rate policy. Author suggests one 469 | addition to this. - During last few hundred/thousand iterations of cycle reduce the 470 | learning rate to 1/100th or 1/1000th of the lower learning rate. 471 | Also, Author suggests that reducing momentum when learning rate is increasing. So, we make 472 | one cycle of momentum also with learning rate - Decrease momentum when learning rate is 473 | increasing and increase momentum when learning rate is decreasing. 474 | Args: 475 | 476 | nb Total number of iterations including all epochs 477 | max_lr The optimum learning rate. This learning rate will be used as highest 478 | learning rate. The learning rate will fluctuate between max_lr to 479 | max_lr/div and then (max_lr/div)/div. 480 | momentum_vals The maximum and minimum momentum values between which momentum will 481 | fluctuate during cycle. 482 | Default values are (0.95, 0.85) 483 | prcnt The percentage of cycle length for which we annihilate learning rate 484 | way below the lower learnig rate. 485 | The default value is 10 486 | div The division factor used to get lower boundary of learning rate. This 487 | will be used with max_lr value to decide lower learning rate boundary. 488 | This value is also used to decide how much we annihilate the learning 489 | rate below lower learning rate. 490 | The default value is 10. 491 | """ 492 | 493 | def __init__(self, nb, max_lr, optimizer=None, momentum_vals=(0.95, 0.85), prcnt=10, div=10): 494 | self.nb = nb 495 | self.div = div 496 | self.step_len = int(self.nb * (1 - prcnt / 100) / 2) 497 | self.high_lr = max_lr 498 | self.low_mom = momentum_vals[1] 499 | self.high_mom = momentum_vals[0] 500 | self.prcnt = prcnt 501 | self.iteration = 0 502 | self.lrs = [] 503 | self.moms = [] 504 | self.optimizer = optimizer 505 | 506 | def batch_step(self): 507 | if self.optimizer is None: 508 | raise ValueError(""" 509 | Can you have to provide an optimizer otherwise 510 | you can only use calc_lr anc calc_mom methods""") 511 | 512 | lr, mom = self.calc() 513 | for param_group in self.optimizer.param_groups: 514 | param_group['lr'] = lr 515 | param_group['mom'] = mom 516 | 517 | return lr, mom 518 | 519 | def calc(self): 520 | self.iteration += 1 521 | lr = self.calc_lr() 522 | mom = self.calc_mom() 523 | return (lr, mom) 524 | 525 | def calc_lr(self): 526 | if self.iteration == self.nb: 527 | self.iteration = 0 528 | self.lrs.append(self.high_lr / self.div) 529 | return self.high_lr / self.div 530 | if self.iteration > 2 * self.step_len: 531 | ratio = (self.iteration - 2 * self.step_len) / (self.nb - 2 * self.step_len) 532 | lr = self.high_lr * (1 - 0.99 * ratio) / self.div 533 | elif self.iteration > self.step_len: 534 | ratio = 1 - (self.iteration - self.step_len) / self.step_len 535 | lr = self.high_lr * (1 + ratio * (self.div - 1)) / self.div 536 | else: 537 | ratio = self.iteration / self.step_len 538 | lr = self.high_lr * (1 + ratio * (self.div - 1)) / self.div 539 | self.lrs.append(lr) 540 | return lr 541 | 542 | def calc_mom(self): 543 | if self.iteration == self.nb: 544 | self.iteration = 0 545 | self.moms.append(self.high_mom) 546 | return self.high_mom 547 | if self.iteration > 2 * self.step_len: 548 | mom = self.high_mom 549 | elif self.iteration > self.step_len: 550 | ratio = (self.iteration - self.step_len) / self.step_len 551 | mom = self.low_mom + ratio * (self.high_mom - self.low_mom) 552 | else: 553 | ratio = self.iteration / self.step_len 554 | mom = self.high_mom - ratio * (self.high_mom - self.low_mom) 555 | self.moms.append(mom) 556 | return mom 557 | -------------------------------------------------------------------------------- /neptune.yaml: -------------------------------------------------------------------------------- 1 | #project: USERNAME/PROJECT 2 | 3 | name: airbus_ships_challenge 4 | tags: [solution-4] 5 | 6 | metric: 7 | channel: 'f2' 8 | goal: maximize 9 | 10 | #Comment out if not in Cloud Environment 11 | #pip-requirements-file: requirements.txt 12 | 13 | exclude: 14 | - .git 15 | - .idea 16 | - .ipynb_checkpoints 17 | - output 18 | - imgs 19 | - neptune.log 20 | - offline_job.log 21 | - notebooks 22 | - big-images-ids.csv 23 | - big-images-ids_v2.csv 24 | 25 | parameters: 26 | 27 | # Data Paths 28 | train_images_dir: /public/challenges/kaggle-ship-detection/train 29 | test_images_dir: /public/challenges/kaggle-ship-detection/test 30 | metadata_filepath: /output/metadata.csv 31 | annotation_file: /public/challenges/kaggle-ship-detection/train_ship_segmentations.csv 32 | masks_overlayed_dir: /output/masks_overlayed 33 | 34 | # Execution 35 | num_workers: 4 36 | num_threads: 100 37 | image_source: disk 38 | pin_memory: 1 39 | loader_mode: resize 40 | resize_target_size: 256 41 | pad_method: symmetric 42 | target_format: 'joblib' 43 | dev_mode_size: 1000 44 | 45 | # General parameters 46 | image_h: 256 47 | image_w: 256 48 | image_channels: 3 49 | training_sampler_size: 7500 50 | training_sampler_empty_fraction: 0.0 51 | evaluation_size: 10000 52 | evaluation_empty_fraction: 0.52 53 | in_train_evaluation_size: 500 54 | fine_tuning: 1 55 | 56 | # Network parameters 57 | network_output_channels: 2 58 | network_activation: 'sigmoid' 59 | architecture: LargeKernelMatters 60 | encoder: DenseNet 61 | 62 | # Training schedule 63 | epochs_nr: 1000 64 | batch_size_train: 16 65 | batch_size_inference: 16 66 | lr: 0.0003 67 | momentum: 0.9 68 | gamma: 0.95 69 | patience: 20 70 | validation_metric_name: 'f2' 71 | minimize_validation_metric: 0 72 | reduce_factor: 0.5 73 | reduce_patience: 5 74 | min_lr: 0 75 | use_one_cycle: 1 76 | one_cycle_number_of_batches_per_full_cycle: 20000 77 | one_cycle_max_lr: 0.05 78 | 79 | # Regularization 80 | use_batch_norm: 1 81 | l2_reg_conv: 0.0001 82 | 83 | # Postprocessing 84 | threshold_masks: 0.44 85 | tta_aggregation_method: mean 86 | postpro__drop_size: 100 87 | postpro__mid_min_size: 0 88 | postpro__mid_max_size: 0 89 | 90 | # -------------------- SNS (SHIP NO SHIP) ---------------------- 91 | 92 | # Network parameters 93 | sns_architecture: Densenet 94 | 95 | # Training schedule 96 | sns_training_sampler_empty_fracion: 0.52 97 | sns_epochs_nr: 100 98 | sns_batch_size_train: 64 99 | sns_batch_size_inference: 256 100 | sns_lr: 0.0001 101 | sns_momentum: 0.9 102 | sns_gamma: 0.95 103 | sns_patience: 5 104 | sns_validation_metric_name: 'sum' 105 | sns_minimize_validation_metric_name: 'sum' 106 | sns_minimize_validation_metric: 1 107 | sns_reduce_factor: 0.5 108 | sns_reduce_patience: 10 109 | sns_min_lr: 0 110 | sns_use_one_cycle: 1 111 | sns_one_cycle_number_of_batches_per_full_cycle: 10000 112 | sns_one_cycle_max_lr: 0.0001 113 | sns_image_h: 224 114 | sns_image_w: 224 115 | 116 | 117 | # Regularization 118 | sns_use_batch_norm: 1 119 | sns_l2_reg_conv: 0.0001 120 | 121 | # Postprocessing 122 | sns_threshold: 0.6 -------------------------------------------------------------------------------- /prediction_exploration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import pandas as pd\n", 11 | "from PIL import Image\n", 12 | "import numpy as np\n", 13 | "import ipywidgets as ipy\n", 14 | "\n", 15 | "from common_blocks.utils.misc import plot_results_for_id\n", 16 | "\n", 17 | "RESULTS_PATH = 'YOUR/PATH/TO/validation_results.csv'\n", 18 | "RESULTS_PATH = '/mnt/ml-team/minerva/open-solutions/ships/kuba/experiments/ship_505_cv_678_lb_681/validation_results.csv'" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "results = pd.read_csv(RESULTS_PATH)\n", 28 | "results.head()" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Results drill down" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "results_ = results[['ImageId','number_of_ships','f2']].drop_duplicates()\n", 45 | "size = len(results_)\n", 46 | "\n", 47 | "empty = results_[results_['number_of_ships']==0]['f2']\n", 48 | "f2_empty, size_empty = empty.mean(), len(empty)\n", 49 | "gain_empty = (1-f2_empty)*size_empty/size\n", 50 | "\n", 51 | "non_empty = results_[results_['number_of_ships']!=0]['f2']\n", 52 | "f2_non_empty, size_non_empty = non_empty.mean(), len(non_empty)\n", 53 | "gain_non_empty = (1-f2_non_empty)*size_non_empty/size\n", 54 | "\n", 55 | "ship_1 = results_[results_['number_of_ships']==1]['f2']\n", 56 | "f2_1_ship, size_1_ship = ship_1.mean(), len(ship_1)\n", 57 | "gain_1_ship = (1-f2_1_ship)*size_1_ship/size\n", 58 | "\n", 59 | "ship_2_to_5 = results_[results_['number_of_ships'].between(2,5)]['f2']\n", 60 | "f2_2_to_5_ships, size_2_to_5_ships = ship_2_to_5.mean(), len(ship_2_to_5)\n", 61 | "gain_2_to_5_ship = (1-f2_2_to_5_ships)*size_2_to_5_ships/size\n", 62 | "\n", 63 | "ship_6_to_10 = results_[results_['number_of_ships'].between(6,10)]['f2']\n", 64 | "f2_6_to_10_ships, size_6_to_10_ships = ship_6_to_10.mean(), len(ship_6_to_10)\n", 65 | "gain_6_to_10_ship = (1-f2_6_to_10_ships)*size_6_to_10_ships/size\n", 66 | "\n", 67 | "ship_10_plus = results_[results_['number_of_ships']>10]['f2']\n", 68 | "f2_more_than_10_ships, size_more_than_10_ships = ship_10_plus.mean(), len(ship_10_plus)\n", 69 | "gain_10_ships = (1-f2_more_than_10_ships)*size_more_than_10_ships/size\n", 70 | "\n", 71 | "print('Empty | f2: {0:.3f} | gain: {1:.3f}'.format(f2_empty, gain_empty))\n", 72 | "print('Non Empty f2: {0:.3f} | gain: {1:.3f}'.format(f2_non_empty, gain_non_empty))\n", 73 | "print('1 ship f2: {0:.3f} | gain: {1:.3f}'.format(f2_1_ship, gain_1_ship))\n", 74 | "print('2-5 ships f2: {0:.3f} | gain: {1:.3f}'.format(f2_2_to_5_ships, gain_2_to_5_ship))\n", 75 | "print('5-10 ships f2: {0:.3f} | gain: {1:.3f}'.format(f2_6_to_10_ships, gain_6_to_10_ship))\n", 76 | "print('10+ ships f2: {0:.3f} | gain: {1:.3f}'.format(f2_more_than_10_ships, gain_10_ships))" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "# Predictions Exploration" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "### Non Empty" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "selected_predictions = results[(results['number_of_ships']!=0) &\n", 100 | " (results['f2'].between(0.0, 1.0))\n", 101 | " ][['ImageId','number_of_ships','f2']].\\\n", 102 | " drop_duplicates().sort_values('f2').reset_index(drop=True)\n", 103 | "selected_predictions.head()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "scrolled": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "@ipy.interact(idx=ipy.IntSlider(min=0.0, max=len(selected_predictions)-1, step=1.0, value=0.0))\n", 115 | "def plot(idx):\n", 116 | " idx_pred = selected_predictions.iloc[idx]\n", 117 | " print('f2 {}'.format(idx_pred['f2']))\n", 118 | " plot_results_for_id(results, idx=idx_pred['ImageId'])" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# def plot(idx):\n", 128 | "# idx_pred = selected_predictions.iloc[idx]\n", 129 | "# print('f2 {}'.format(idx_pred['f2']))\n", 130 | "# plot_results_for_id(results, idx=idx_pred['ImageId'])\n", 131 | "# plot(0)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "cpu py3", 145 | "language": "python", 146 | "name": "cpu_py3" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 3 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython3", 158 | "version": "3.5.2" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 2 163 | } 164 | -------------------------------------------------------------------------------- /prepare_metadata.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import neptune 4 | from sklearn.externals import joblib 5 | import pandas as pd 6 | from tqdm import tqdm 7 | 8 | from common_blocks.utils import misc, masks 9 | 10 | CTX = neptune.Context() 11 | LOGGER = misc.init_logger() 12 | 13 | DEV_MODE = True 14 | 15 | # ______ ______ .__1 __. _______ __ _______ _______. 16 | # / | / __ \ | \ | | | ____|| | / _____| / | 17 | # | ,----'| | | | | \| | | |__ | | | | __ | (----` 18 | # | | | | | | | . ` | | __| | | | | |_ | \ \ 19 | # | `----.| `--' | | |\ | | | | | | |__| | .----) | 20 | # \______| \______/ |__| \__| |__| |__| \______| |_______/ 21 | # 22 | 23 | ORIGINAL_SIZE = (768, 768) 24 | EXCLUDED_FILENAMES = ['6384c3e78.jpg', ] 25 | 26 | if CTX.params.__class__.__name__ == 'OfflineContextParams': 27 | PARAMS = misc.read_yaml().parameters 28 | else: 29 | PARAMS = CTX.params 30 | 31 | 32 | # __________ ___ _______ ______ __ __ .___________. __ ______ .__ __. 33 | # | ____\ \ / / | ____| / || | | | | || | / __ \ | \ | | 34 | # | |__ \ V / | |__ | ,----'| | | | `---| |----`| | | | | | | \| | 35 | # | __| > < | __| | | | | | | | | | | | | | | | . ` | 36 | # | |____ / . \ | |____ | `----.| `--' | | | | | | `--' | | |\ | 37 | # |_______/__/ \__\ |_______| \______| \______/ |__| |__| \______/ |__| \__| 38 | # 39 | 40 | def prepare_masks(): 41 | LOGGER.info('overlaying masks') 42 | overlay_masks(annotation_file_name=PARAMS.annotation_file, target_dir=PARAMS.masks_overlayed_dir) 43 | 44 | 45 | def prepare_metadata(): 46 | LOGGER.info('creating metadata') 47 | meta = generate_metadata(train_images_dir=PARAMS.train_images_dir, 48 | masks_overlayed_dir=PARAMS.masks_overlayed_dir, 49 | test_images_dir=PARAMS.test_images_dir, 50 | annotation_file_name=PARAMS.annotation_file 51 | ) 52 | meta.to_csv(os.path.join(PARAMS.metadata_filepath), index=None) 53 | 54 | 55 | # __ __ .___________. __ __ _______. 56 | # | | | | | || | | | / | 57 | # | | | | `---| |----`| | | | | (----` 58 | # | | | | | | | | | | \ \ 59 | # | `--' | | | | | | `----.----) | 60 | # \______/ |__| |__| |_______|_______/ 61 | # 62 | 63 | def overlay_masks(annotation_file_name, target_dir): 64 | os.makedirs(target_dir, exist_ok=True) 65 | annotations = pd.read_csv(annotation_file_name) 66 | 67 | for file_name, image_annotation in tqdm(annotations.groupby("ImageId")): 68 | if file_name not in EXCLUDED_FILENAMES: 69 | target_file_name = os.path.join(target_dir, file_name.split('.')[0]) 70 | mask = masks.get_overlayed_mask(image_annotation, ORIGINAL_SIZE) 71 | if mask.sum() == 0: 72 | mask = ORIGINAL_SIZE 73 | joblib.dump(mask, target_file_name) 74 | 75 | 76 | def generate_metadata(train_images_dir, masks_overlayed_dir, test_images_dir, annotation_file_name): 77 | metadata = {} 78 | annotations = pd.read_csv(annotation_file_name) 79 | for filename in tqdm(os.listdir(train_images_dir)): 80 | image_filepath = os.path.join(train_images_dir, filename) 81 | mask_filepath = os.path.join(masks_overlayed_dir, filename.split('.')[0]) 82 | image_id = filename.split('.')[0] 83 | number_of_ships = get_number_of_ships(annotations.query('ImageId == @filename')) 84 | is_not_empty = int(number_of_ships != 0) 85 | 86 | metadata.setdefault('file_path_image', []).append(image_filepath) 87 | metadata.setdefault('file_path_mask', []).append(mask_filepath) 88 | metadata.setdefault('is_train', []).append(1) 89 | metadata.setdefault('id', []).append(image_id) 90 | metadata.setdefault('number_of_ships', []).append(number_of_ships) 91 | metadata.setdefault('is_not_empty', []).append(is_not_empty) 92 | 93 | for filename in tqdm(os.listdir(test_images_dir)): 94 | image_filepath = os.path.join(test_images_dir, filename) 95 | image_id = filename.split('.')[0] 96 | 97 | metadata.setdefault('file_path_image', []).append(image_filepath) 98 | metadata.setdefault('file_path_mask', []).append(None) 99 | metadata.setdefault('is_train', []).append(0) 100 | metadata.setdefault('id', []).append(image_id) 101 | metadata.setdefault('number_of_ships', []).append(None) 102 | metadata.setdefault('is_not_empty', []).append(None) 103 | 104 | return pd.DataFrame(metadata) 105 | 106 | 107 | def get_number_of_ships(image_annotations): 108 | if image_annotations['EncodedPixels'].any(): 109 | return len(image_annotations) 110 | else: 111 | return 0 112 | 113 | 114 | # .___ ___. ___ __ .__ __. 115 | # | \/ | / \ | | | \ | | 116 | # | \ / | / ^ \ | | | \| | 117 | # | |\/| | / /_\ \ | | | . ` | 118 | # | | | | / _____ \ | | | |\ | 119 | # |__| |__| /__/ \__\ |__| |__| \__| 120 | # 121 | 122 | if __name__ == "__main__": 123 | prepare_masks() 124 | prepare_metadata() 125 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm==4.23.0 2 | pandas==0.22.0 3 | imageio==2.2.0 4 | pydot_ng==1.0.0 5 | opencv_python==4.2.0.32 6 | torch==0.3.1 7 | attrdict==2.0.0 8 | scipy==1.0.0 9 | scikit_image==0.13.1 10 | numpy==1.22.0 11 | imgaug==0.2.5 12 | neptune-cli 13 | ipython==7.16.3 14 | Pillow>=6.2.2 15 | pyyaml>=4.2b1 16 | Cython==0.28.2 17 | pycocotools==2.0.0 18 | steppy==0.1.14 19 | steppy-toolkit==0.1.12 20 | pretrainedmodels==0.7.0 21 | torchvision==0.2.0 22 | 23 | --------------------------------------------------------------------------------