├── tensorpipe ├── augment │ ├── __init__.py │ ├── augment.py │ └── augmentations.py ├── register │ ├── __init__.py │ └── register.py ├── pipe │ ├── __init__.py │ └── funnel.py ├── __init__.py ├── containers.py ├── funnels │ ├── __init__.py │ ├── create_records.py │ ├── base_funnel.py │ └── funnels.py └── utils │ ├── utils.py │ └── registry.py ├── logo.png ├── testdata ├── test │ ├── cat │ │ ├── cat.jpg │ │ ├── cat1.jpg │ │ ├── cat3.jpg │ │ └── cat4.jpg │ └── dog │ │ ├── dog.jpg │ │ ├── dog1.jpg │ │ ├── dog2.jpg │ │ └── dog3.jpg ├── train │ ├── cat │ │ ├── cat.jpg │ │ ├── cat1.jpg │ │ ├── cat3.jpg │ │ └── cat4.jpg │ └── dog │ │ ├── dog.jpg │ │ ├── dog1.jpg │ │ ├── dog2.jpg │ │ └── dog3.jpg └── validation │ ├── cat │ ├── cat.jpg │ ├── cat1.jpg │ ├── cat3.jpg │ └── cat4.jpg │ └── dog │ ├── dog.jpg │ ├── dog1.jpg │ ├── dog2.jpg │ └── dog3.jpg ├── requirements.txt ├── run_tests.sh ├── pylint.sh ├── CONTRIBUTING.md ├── .github ├── ISSUE_TEMPLATE │ └── feature_request.md └── workflows │ ├── codecov.yml │ └── python-app.yml ├── CHANGELOG.md ├── examples ├── example_classification.py └── example_objectdetection.py ├── tests ├── test_classification.py ├── test_config.py └── test_augment.py ├── .gitignore ├── README.md ├── LICENSE └── .pylintrc /tensorpipe/augment/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorpipe/register/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorpipe/pipe/__init__.py: -------------------------------------------------------------------------------- 1 | from .funnel import Funnel as Funnel 2 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/logo.png -------------------------------------------------------------------------------- /testdata/test/cat/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/test/cat/cat.jpg -------------------------------------------------------------------------------- /testdata/test/dog/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/test/dog/dog.jpg -------------------------------------------------------------------------------- /tensorpipe/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorpipe.augment 2 | import tensorpipe.pipe 3 | import tensorpipe.funnels 4 | -------------------------------------------------------------------------------- /testdata/test/cat/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/test/cat/cat1.jpg -------------------------------------------------------------------------------- /testdata/test/cat/cat3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/test/cat/cat3.jpg -------------------------------------------------------------------------------- /testdata/test/cat/cat4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/test/cat/cat4.jpg -------------------------------------------------------------------------------- /testdata/test/dog/dog1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/test/dog/dog1.jpg -------------------------------------------------------------------------------- /testdata/test/dog/dog2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/test/dog/dog2.jpg -------------------------------------------------------------------------------- /testdata/test/dog/dog3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/test/dog/dog3.jpg -------------------------------------------------------------------------------- /testdata/train/cat/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/train/cat/cat.jpg -------------------------------------------------------------------------------- /testdata/train/cat/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/train/cat/cat1.jpg -------------------------------------------------------------------------------- /testdata/train/cat/cat3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/train/cat/cat3.jpg -------------------------------------------------------------------------------- /testdata/train/cat/cat4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/train/cat/cat4.jpg -------------------------------------------------------------------------------- /testdata/train/dog/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/train/dog/dog.jpg -------------------------------------------------------------------------------- /testdata/train/dog/dog1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/train/dog/dog1.jpg -------------------------------------------------------------------------------- /testdata/train/dog/dog2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/train/dog/dog2.jpg -------------------------------------------------------------------------------- /testdata/train/dog/dog3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/train/dog/dog3.jpg -------------------------------------------------------------------------------- /testdata/validation/cat/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/validation/cat/cat.jpg -------------------------------------------------------------------------------- /testdata/validation/cat/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/validation/cat/cat1.jpg -------------------------------------------------------------------------------- /testdata/validation/cat/cat3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/validation/cat/cat3.jpg -------------------------------------------------------------------------------- /testdata/validation/cat/cat4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/validation/cat/cat4.jpg -------------------------------------------------------------------------------- /testdata/validation/dog/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/validation/dog/dog.jpg -------------------------------------------------------------------------------- /testdata/validation/dog/dog1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/validation/dog/dog1.jpg -------------------------------------------------------------------------------- /testdata/validation/dog/dog2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/validation/dog/dog2.jpg -------------------------------------------------------------------------------- /testdata/validation/dog/dog3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kartik4949/TensorPipe/HEAD/testdata/validation/dog/dog3.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow-addons==0.11.2 2 | tensorflow==2.2.0 3 | sklearn 4 | typeguard 5 | bunch 6 | dependency-injector 7 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | ls tests/*.py|xargs -n 1 -P 3 python &> test.log &&echo "All passed" || echo "Failed! Search keyword FAILED in test.log" 2 | 3 | -------------------------------------------------------------------------------- /pylint.sh: -------------------------------------------------------------------------------- 1 | echo "Checking File : $1" 2 | python -m pylint --rcfile=.pylintrc --disable=deprecated-module --const-rgx='[a-z_][a-z0-9_]{2,30}$' $1 3 | -------------------------------------------------------------------------------- /tensorpipe/containers.py: -------------------------------------------------------------------------------- 1 | from bunch import Bunch 2 | from dependency_injector import containers, providers 3 | 4 | from .augment import augment 5 | 6 | class Container(containers.DeclarativeContainer): 7 | config = providers.Configuration() 8 | external_config = providers.Factory(Bunch, config.external_config) 9 | augmenter = providers.Factory( 10 | augment.Augment, 11 | config=external_config, 12 | datatype=config.datatype 13 | ) 14 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | Contributions and patches are always welcomed, following are some few things before submitting your contriibution. 4 | 5 | ## Format 6 | 7 | Format your code in following way: 8 | Use Black formatter 9 | 10 | ``` 11 | black -l 79 file_name.py 12 | ``` 13 | 14 | ## Tests 15 | 16 | Complete all tests in tests folder by using following code: 17 | 18 | ``` 19 | ls tests/*.py|xargs -n 1 -P 3 python &> test.log &&echo "All passed" || echo "Failed! Search keyword FAILED in test.log" 20 | 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | 4 | ## v1.0 5 | 6 | ### Added 7 | 8 | * Custom numpy function injection. 9 | * Type check validation. 10 | * Examples. 11 | * Config and some augmentation tests. 12 | * Access to internal properites of base class to public interface. 13 | 14 | ### Changed 15 | 16 | * Change log 17 | * Software Architecture 18 | * Internal Changes 19 | * Format changes. 20 | 21 | ## v1.0-beta.1 22 | 23 | Released on October 11, 2020. 24 | 25 | ### Added 26 | 27 | * Released first version of tensorpipe with classification data support with some sanity checks: 28 | - Classification Support 29 | - Tests 30 | - Documentation 31 | - Code style checking 32 | - Change log 33 | - Code of Conduct 34 | - Contributing guidelines 35 | - License 36 | - README 37 | - Requirements.txt 38 | 39 | -------------------------------------------------------------------------------- /.github/workflows/codecov.yml: -------------------------------------------------------------------------------- 1 | name: workflow for Codecov 2 | on: [push] 3 | jobs: 4 | run: 5 | runs-on: ${{ matrix.os }} 6 | strategy: 7 | matrix: 8 | os: [ubuntu-latest, macos-latest] 9 | env: 10 | OS: ${{ matrix.os }} 11 | PYTHON: '3.8' 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Setup Python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.8 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install flake8 pytest 22 | pip install -r requirements.txt 23 | - name: Generate coverage report 24 | run: | 25 | pip install pytest 26 | pip install pytest-cov 27 | python -m pytest --cov=./ --cov-report=xml 28 | - name: Upload coverage to Codecov 29 | run: bash <(curl -s https://codecov.io/bash) 30 | -------------------------------------------------------------------------------- /tensorpipe/register/register.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | from ..utils.registry import Registry 23 | 24 | # register for funnels 25 | FUNNEL = Registry("Funnels") 26 | 27 | # register for transformations 28 | AUG = Registry("Transformations") 29 | -------------------------------------------------------------------------------- /tensorpipe/funnels/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | from os.path import dirname, basename, isfile, join 19 | import glob 20 | 21 | from .base_funnel import ALLOWED_TYPES 22 | from .funnels import * 23 | 24 | modules = glob.glob(join(dirname(__file__), "*.py")) 25 | __all__ = [ 26 | basename(f)[:-3] for f in modules if isfile(f) and not f.endswith("__init__.py") 27 | ] 28 | -------------------------------------------------------------------------------- /tensorpipe/utils/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | def enforce(*types): 19 | def check_accepts(f): 20 | assert len(types) == f.__code__.co_argcount 21 | 22 | def new_f(*args, **kwds): 23 | for (a, t) in zip(args, types): 24 | assert isinstance(a, t), "arg %r does not match %s" % (a, t) 25 | return f(*args, **kwds) 26 | 27 | new_f.__name__ = f.__name__ 28 | return new_f 29 | 30 | return check_accepts 31 | -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | pull_request: 10 | branches: [ main ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.8 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: 3.8 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install flake8 pytest 27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 28 | - name: Lint with flake8 29 | run: | 30 | # stop the build if there are Python syntax errors or undefined names 31 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 32 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 33 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 34 | - name: Test with pytest 35 | run: | 36 | python -m pytest -s tests 37 | -------------------------------------------------------------------------------- /examples/example_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | import os 17 | import sys 18 | 19 | sys.path.append(os.getcwd()) 20 | 21 | from tensorpipe.pipe import Funnel 22 | import numpy as np 23 | 24 | from tensorpipe.pipe import Funnel 25 | 26 | """ 27 | Create a Funnel for the Pipeline! 28 | """ 29 | 30 | # Custom numpy code for injection. 31 | def numpy_function(image, label): 32 | image = np.fliplr(image) 33 | return image, label 34 | 35 | 36 | config = { 37 | "batch_size": 2, 38 | "image_size": [512, 512], 39 | "transformations": { 40 | "flip_left_right": None, 41 | "gridmask": None, 42 | "random_rotate": None, 43 | }, 44 | "categorical_encoding": "labelencoder", 45 | "numpy_function": numpy_function, 46 | } 47 | funnel = Funnel(data_path="testdata", config=config, datatype="categorical") 48 | dataset = funnel.from_dataset(type="train") 49 | 50 | for data in dataset: 51 | print(data[0].shape) 52 | -------------------------------------------------------------------------------- /examples/example_objectdetection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import os 18 | import sys 19 | 20 | sys.path.append(os.getcwd()) 21 | 22 | import numpy as np 23 | from tensorpipe.pipe import Funnel 24 | 25 | """ 26 | Create a Funnel for the Pipeline! 27 | """ 28 | 29 | # Custom numpy code for injection. 30 | def numpy_function(image, label): 31 | """normalize image""" 32 | image = image / 255.0 33 | return image, label 34 | 35 | 36 | config = { 37 | "batch_size": 2, 38 | "image_size": [512, 512], 39 | "transformations": { 40 | "flip_left_right": None, 41 | "gridmask": None, 42 | "random_rotate": None, 43 | }, 44 | "max_instances_per_image": 100, 45 | "categorical_encoding": "labelencoder", 46 | "numpy_function": numpy_function, 47 | } 48 | funnel = Funnel(data_path="tfrecorddata", config=config, datatype="bbox") 49 | dataset = funnel.from_tfrecords(type="train") 50 | 51 | for data in dataset: 52 | print(data[1].shape) 53 | -------------------------------------------------------------------------------- /tests/test_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from bunch import Bunch 18 | from absl import logging 19 | 20 | import tensorflow as tf 21 | 22 | from tensorpipe.pipe import Funnel 23 | 24 | 25 | class TestClassificationFunnel(tf.test.TestCase): 26 | def __init__(self, *args, **kwargs): 27 | super().__init__(*args, **kwargs) 28 | 29 | # Create an Augmentation pipeline ! 30 | config = { 31 | "batch_size": 1, 32 | "image_size": [512, 512], 33 | "transformations": { 34 | "flip_left_right": None, 35 | "gridmask": None, 36 | "random_rotate": None, 37 | }, 38 | "categorical_encoding": "labelencoder", 39 | } 40 | self.config = Bunch(config) 41 | tf.compat.v1.random.set_random_seed(111111) 42 | 43 | def test_sanity(self): 44 | funnel = Funnel( 45 | data_path="testdata", config=self.config, datatype="categorical" 46 | ) 47 | dataset = funnel.from_dataset(type="train") 48 | data = next(iter(dataset)) 49 | images = data[0] 50 | self.assertEqual(self.config.image_size, images[0].shape[:2]) 51 | 52 | 53 | if __name__ == "__main__": 54 | logging.set_verbosity(logging.WARNING) 55 | tf.test.main() 56 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from bunch import Bunch 18 | from absl import logging 19 | 20 | import tensorflow as tf 21 | 22 | from tensorpipe.pipe import Funnel 23 | 24 | 25 | class ConfigTest(tf.test.TestCase): 26 | def __init__(self, *args, **kwargs): 27 | super().__init__(*args, **kwargs) 28 | 29 | config = { 30 | "batch_size": 1, 31 | "image_size": [512, 512], 32 | "transformations": { 33 | "flip_left_right": None, 34 | "gridmask": None, 35 | "random_rotate": None, 36 | }, 37 | "categorical_encoding": "labelencoder", 38 | } 39 | config = Bunch(config) 40 | self.config = config 41 | tf.compat.v1.random.set_random_seed(111111) 42 | 43 | def test_config_getter(self): 44 | """Verify config.""" 45 | funnel = Funnel( 46 | data_path="testdata", config=self.config, datatype="categorical" 47 | ) 48 | _ = funnel.from_dataset(type="train") 49 | self.assertEqual(self.config.batch_size, 1) 50 | 51 | def test_config_setter(self): 52 | """Simple test for config assignment""" 53 | self.config.batch_size = 2 54 | self.assertEqual(self.config.batch_size, 2) 55 | 56 | 57 | if __name__ == "__main__": 58 | logging.set_verbosity(logging.WARNING) 59 | tf.test.main() 60 | -------------------------------------------------------------------------------- /tests/test_augment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | """Data Pipeline simple tests.""" 19 | 20 | from bunch import Bunch 21 | from absl import logging 22 | 23 | import tensorflow as tf 24 | 25 | from tensorpipe.pipe import Funnel 26 | from tensorpipe.augment import augment 27 | 28 | 29 | class AugmentTest(tf.test.TestCase): 30 | def __init__(self, *args, **kwargs): 31 | super().__init__(*args, **kwargs) 32 | 33 | # Create an Augmentation pipeline ! 34 | config = { 35 | "batch_size": 1, 36 | "image_size": [512, 512], 37 | "transformations": { 38 | "flip_left_right": None, 39 | "gridmask": None, 40 | "random_rotate": None, 41 | }, 42 | "categorical_encoding": "labelencoder", 43 | } 44 | config = Bunch(config) 45 | 46 | self.augmentor = augment.Augment(config) 47 | tf.compat.v1.random.set_random_seed(111111) 48 | 49 | def test_augment_boxes(self): 50 | """Verify num of boxes are valid and syntax check random four images.""" 51 | images = tf.random.uniform( 52 | shape=(512, 512, 3), minval=0, maxval=255, dtype=tf.float32 53 | ) 54 | bboxes = tf.random.uniform(shape=(2, 4), minval=1, maxval=511, dtype=tf.int32) 55 | 56 | _, bbox = self.augmentor(images, bboxes) 57 | self.assertEqual(bboxes.shape[0], bbox.shape[0]) 58 | 59 | def test_image_dimensions(self): 60 | images = tf.random.uniform( 61 | shape=(512, 512, 3), minval=0, maxval=255, dtype=tf.float32 62 | ) 63 | bboxes = tf.random.uniform(shape=(2, 4), minval=1, maxval=511, dtype=tf.int32) 64 | image, bbox = self.augmentor(images, bboxes) 65 | self.assertEqual(image.shape[1], images.shape[1]) 66 | 67 | 68 | if __name__ == "__main__": 69 | logging.set_verbosity(logging.WARNING) 70 | tf.test.main() 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | tfrecorddata/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # vim 79 | .vim/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | -------------------------------------------------------------------------------- /tensorpipe/funnels/create_records.py: -------------------------------------------------------------------------------- 1 | """ Create TFrecords from json GTs """ 2 | import io 3 | from collections import namedtuple 4 | import os 5 | import logging 6 | import json 7 | from glob import glob 8 | 9 | import numpy as np 10 | import cv2 11 | from tqdm import tqdm 12 | from absl import app, flags 13 | import pandas as pd 14 | import tensorflow as tf 15 | 16 | from PIL import Image 17 | 18 | 19 | class CreateRecords: 20 | @staticmethod 21 | def int64_feature(value): 22 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 23 | 24 | @staticmethod 25 | def int64_list_feature(value): 26 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 27 | 28 | @staticmethod 29 | def bytes_feature(value): 30 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 31 | 32 | @staticmethod 33 | def bytes_list_feature(value): 34 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 35 | 36 | @staticmethod 37 | def float_list_feature(value): 38 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 39 | 40 | def create_tf_example(self, group): 41 | """create_tf_example. 42 | create tf example buffer 43 | 44 | Args: 45 | group: group name 46 | """ 47 | with tf.io.gfile.GFile(os.path.join(group.filename), "rb") as fid: 48 | encoded_jpg = fid.read() 49 | encoded_jpg_io = io.BytesIO(encoded_jpg) 50 | image = Image.open(encoded_jpg_io) 51 | 52 | width, height = image.size 53 | 54 | filename = group.filename.encode("utf8") 55 | image_format = b"jpg" 56 | xmins = [] 57 | xmaxs = [] 58 | ymins = [] 59 | ymaxs = [] 60 | classes_text = [] 61 | classes = [] 62 | 63 | for _, row in group.bbox.iterrows(): 64 | xmins.append(row["xmin"] / width) 65 | xmaxs.append(row["xmax"] / width) 66 | ymins.append(row["ymin"] / height) 67 | ymaxs.append(row["ymax"] / height) 68 | if ( 69 | row["xmin"] / width > 1 70 | or row["ymin"] / height > 1 71 | or row["xmax"] / width > 1 72 | or row["ymax"] / height > 1 73 | ): 74 | logging.info(row) 75 | 76 | classes_text.append(row["class"].encode("utf8")) 77 | classes.append(1) 78 | tf_example = tf.train.Example( 79 | features=tf.train.Features( 80 | feature={ 81 | "image/height": self.int64_feature(height), 82 | "image/width": self.int64_feature(width), 83 | "image/filename": self.bytes_feature(filename), 84 | "image/image_id": self.bytes_feature("0".encode("utf8")), 85 | "image/encoded": self.bytes_feature(encoded_jpg), 86 | "image/format": self.bytes_feature(image_format), 87 | "image/bbox/xmin": self.float_list_feature(xmins), 88 | "image/bbox/xmax": self.float_list_feature(xmaxs), 89 | "image/bbox/ymin": self.float_list_feature(ymins), 90 | "image/bbox/ymax": self.float_list_feature(ymaxs), 91 | "image/class/text": self.bytes_list_feature(classes_text), 92 | "image/class/label": self.int64_list_feature(classes), 93 | } 94 | ) 95 | ) 96 | return tf_example 97 | -------------------------------------------------------------------------------- /tensorpipe/utils/registry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | 23 | import inspect 24 | import warnings 25 | from functools import partial 26 | 27 | 28 | class Registry: 29 | """Registry. 30 | Registry Class which stores module references which can be used to 31 | apply pluging architecture and achieve flexiblity. 32 | """ 33 | 34 | def __init__(self, name): 35 | """__init__. 36 | 37 | Args: 38 | name: 39 | """ 40 | self._name = name 41 | self._module_dict = dict() 42 | 43 | def __len__(self): 44 | """__len__.""" 45 | return len(self._module_dict) 46 | 47 | def __contains__(self, key): 48 | """__contains__. 49 | 50 | Args: 51 | key: 52 | """ 53 | return self.get(key) is not None 54 | 55 | def __repr__(self): 56 | """__repr__.""" 57 | format_str = ( 58 | self.__class__.__name__ + f"(name={self._name}, " 59 | f"items={self._module_dict})" 60 | ) 61 | return format_str 62 | 63 | @property 64 | def name(self): 65 | """name.""" 66 | return self._name 67 | 68 | @property 69 | def module_dict(self): 70 | """module_dict.""" 71 | return self._module_dict 72 | 73 | def get(self, key): 74 | """get. 75 | 76 | Args: 77 | key: 78 | """ 79 | return self._module_dict.get(key, None) 80 | 81 | def _register_module(self, module_class, module_name=None, force=False): 82 | """_register_module. 83 | 84 | Args: 85 | module_class: Module class to register 86 | module_name: Module name to register 87 | force: forced injection in register 88 | """ 89 | 90 | if module_name is None: 91 | module_name = module_class.__name__ 92 | if not force and module_name in self._module_dict: 93 | raise KeyError( 94 | f"{module_name} is already registered " f"in {self.name}" 95 | ) 96 | self._module_dict[module_name] = module_class 97 | 98 | def register_module(self, name=None, force=False, module=None): 99 | """register_module. 100 | Registers module passed and stores in the modules dict. 101 | 102 | Args: 103 | name: module name. 104 | force: if forced inject register module if already present. default False. 105 | module: Module Reference. 106 | """ 107 | 108 | if module is not None: 109 | self._register_module( 110 | module_class=module, module_name=name, force=force 111 | ) 112 | return module 113 | 114 | if not (name is None or isinstance(name, str)): 115 | raise TypeError(f"name must be a str, but got {type(name)}") 116 | 117 | def _register(cls): 118 | self._register_module( 119 | module_class=cls, module_name=name, force=force 120 | ) 121 | return cls 122 | 123 | return _register 124 | -------------------------------------------------------------------------------- /tensorpipe/pipe/funnel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import typeguard 22 | import sys 23 | import tensorflow as tf 24 | 25 | from .. import funnels 26 | from ..register.register import FUNNEL 27 | from ..augment import augment 28 | from ..containers import Container 29 | 30 | container = Container() 31 | 32 | """Singleton Design pattern""" 33 | class _singleton(type): 34 | 35 | _max_allowed_instances = 2 36 | _instances = {} 37 | 38 | def __call__(cls, *args, **kwargs): 39 | """__call__. 40 | 41 | Args: 42 | cls: Child class 43 | args: child class args 44 | kwargs: child class kwargs 45 | """ 46 | if len(cls._instances) == cls._max_allowed_instances: 47 | raise Exception( 48 | f"{cls.__name__} is allowed to have at most {cls._max_allowed_instances} instances" 49 | ) 50 | 51 | if cls not in cls._instances: 52 | cls._instances[cls] = super(_singleton, cls).__call__(*args, **kwargs) 53 | return cls._instances[cls] 54 | 55 | 56 | def singleton_pattern(cls): 57 | """singleton_pattern. 58 | Helper singleton_pattern decorater. 59 | """ 60 | return _singleton(cls.__name__, cls.__bases__, dict(cls.__dict__)) 61 | 62 | 63 | """Funnel Interface class""" 64 | 65 | 66 | @singleton_pattern 67 | class Funnel(object): 68 | """Funnel. 69 | Funnel Class which gets the required Funnel given in 70 | configuration. 71 | """ 72 | 73 | @typeguard.typechecked 74 | def __new__( 75 | cls, 76 | data_path: str, 77 | config: dict, 78 | datatype: str = "bbox", 79 | training: bool = True, 80 | ): 81 | # pylint: disable=line-too-long 82 | 83 | """__new__. 84 | 85 | Args: 86 | data_path: Data path in structured format,please see readme file 87 | for more information. 88 | config: Config passed as dict instance containing all required. 89 | datatype: Dataset type e.g ['bbox','categorical','segmentation'], 90 | bbox - Bounding Box dataset containing object detection 91 | data. i.e x1,y1,x2,y2 92 | categorical - Categorical data i.e categorical 93 | (multi class) or binary (two class) for 94 | Classification problems. 95 | Example: 96 | ********************************************************** 97 | >> from TensorPipe.pipe import Funnel 98 | >> funnel = Funnel('testdata',config=config,datatype='categorical') 99 | # high performance with parallelism tf.data iterable. 100 | >> dataset = funnel.dataset(type = 'train') 101 | 102 | """ 103 | # pylint: enable=line-too-long 104 | if datatype not in funnels.ALLOWED_TYPES: 105 | raise TypeError( 106 | "datasettype not in ALLOWED_TYPEs, please check\ 107 | allowed dataset i.e bbox,classification labels,\ 108 | segmentation." 109 | ) 110 | container.config.external_config.from_value(config) 111 | container.config.datatype.from_value(datatype) 112 | container.wire(modules=["..funnels"]) 113 | _funnel_class = FUNNEL.get(datatype) 114 | 115 | # dependency injected 116 | return _funnel_class(data_path, datatype=datatype, training=training) 117 | -------------------------------------------------------------------------------- /tensorpipe/augment/augment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import functools 22 | import inspect 23 | import typeguard 24 | from typing import List 25 | 26 | import tensorflow as tf 27 | import tensorflow_addons as tfa 28 | 29 | from ..augment import augmentations 30 | from ..register.register import AUG 31 | 32 | ALLOWED_TRANSFORMATIONS = [ 33 | "flip_left_right", 34 | "random_rotate", 35 | "gridmask", 36 | "random_rotate", 37 | "random_shear_y", 38 | "cutout", 39 | "mosaic", 40 | "random_shear_x", 41 | ] 42 | 43 | 44 | """Augment Class for interface of Augmentations.""" 45 | 46 | 47 | class Augmentation(augmentations.TransformMixin): 48 | """Augmentation. 49 | Class Augmentation which consists inhouse augmentations and builds 50 | the transformations pipeline with given transformations in config. 51 | :: 52 | 53 | Example: 54 | 55 | augment = Augmentation(config,["random_rotate","gridmask"]) 56 | # Use the pipeline and iterate over function in the pipeline. 57 | pipeline = augment._pipeline 58 | 59 | """ 60 | 61 | @typeguard.typechecked 62 | def __init__(self, config: dict, transformations: dict, type: str = "bbox"): 63 | """__init__. 64 | Augmentation class provides and builds the augmentations pipe- 65 | line required for tf.data iterable. 66 | 67 | Args: 68 | config: config file containing augmentations and kwargs required. 69 | transformations: transformations contains list of augmentations 70 | to build the pipeline one. 71 | type: type of dataset to built the pipeline for e.g bbox, 72 | keypoints,categorical,etc. 73 | """ 74 | self.config = config 75 | self.type = type 76 | self.transformations = transformations 77 | self._pipeline = [] 78 | self.image_size = config.image_size 79 | # builds the augment pipeline. 80 | self._pipeline.append( 81 | (functools.partial(tf.image.resize, size=self.image_size), False) 82 | ) 83 | 84 | def setup(self): 85 | # set tfa attributes 86 | self._set_tfa_attrb() 87 | 88 | for transform, kwargs in self.transformations.items(): 89 | if transform not in ALLOWED_TRANSFORMATIONS and not hasattr( 90 | tf.image, transform 91 | ): 92 | raise ValueError( 93 | f"{transform} is not a valid augmentation for \ 94 | tf.image or TensorPipe,please visit readme section" 95 | ) 96 | 97 | kwargs = kwargs if isinstance(kwargs, dict) else {} 98 | 99 | if hasattr(tf.image, transform): 100 | transform = getattr(tf.image, transform) 101 | transform = functools.partial(transform, **kwargs) 102 | self._pipeline.append((transform, False)) 103 | else: 104 | transform = getattr(self, transform) 105 | transform = functools.partial(transform, **kwargs) 106 | self._pipeline.append((transform, True)) 107 | 108 | def _set_tfa_attrb(self): 109 | """_set_tfa_attrb. 110 | helper function which bounds attributes of tfa.image to self. 111 | """ 112 | _ = [ 113 | setattr(self, attrib[0], attrib[1]) 114 | for attrib in inspect.getmembers(tfa.image) 115 | if inspect.isfunction(attrib[1]) 116 | ] 117 | 118 | 119 | class Augment(Augmentation): 120 | """Augment. 121 | Augmentation Interface which performs the augmentation in pipeline 122 | in sequential manner. 123 | """ 124 | 125 | @typeguard.typechecked 126 | def __init__(self, config: dict, datatype: str = "bbox"): 127 | """__init__. 128 | 129 | Args: 130 | config: config file. 131 | datatype: dataset type i.e bbox,keypoints,caetgorical,etc. 132 | """ 133 | self.config = config 134 | self.transformations = self.config.transformations 135 | self.dataset_type = datatype 136 | super().__init__(config, self.transformations, type=datatype) 137 | self.setup() 138 | 139 | @typeguard.typechecked 140 | def __call__( 141 | self, 142 | image: tf.Tensor, 143 | label: tf.Tensor, 144 | image_id=None, 145 | classes=None, 146 | return_image_label=True, 147 | ) -> (tf.Tensor, tf.Tensor): 148 | """__call__. 149 | Callable which is invoked in tfdata pipeline and performs the 150 | actual transformation on images and labels. 151 | 152 | Args: 153 | image: Image Tensor tf.Tensor. 154 | label: Label tensor tf.Tensor. 155 | 156 | Returns: 157 | Returns the transform image and labels. 158 | """ 159 | for transform in self._pipeline: 160 | transform_func, pass_label = transform 161 | if pass_label: 162 | image, label = transform_func(image, label) 163 | else: 164 | image = transform_func(image) 165 | if return_image_label: 166 | return image, label 167 | else: 168 | return image_id, image, label, classes 169 | -------------------------------------------------------------------------------- /tensorpipe/funnels/base_funnel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | from abc import ABC, abstractmethod, abstractproperty 24 | 25 | import tensorflow as tf 26 | import logging 27 | 28 | ALLOWED_TYPES = ["categorical", "binary", "bbox"] 29 | 30 | 31 | """Funnel Abstract Class provides essential helper functions across""" 32 | 33 | 34 | class Funnel(ABC): 35 | """Funnel. 36 | Abstract Funnel Class which acts as intterface for three supported 37 | Class of dataset, and provides helper functions. 38 | """ 39 | 40 | AUTOTUNE = tf.data.experimental.AUTOTUNE 41 | 42 | @property 43 | def allowed_dataset_types(self): 44 | return ALLOWED_TYPES 45 | 46 | @property 47 | @abstractmethod 48 | def classes(self): 49 | raise NotImplementedError 50 | 51 | def tf_path_pattern(self, path): 52 | return os.path.join(path, "*.record") 53 | 54 | @property 55 | @abstractmethod 56 | def data_path(self): 57 | return self._data_path 58 | 59 | @property 60 | @abstractmethod 61 | def datatype(self): 62 | return self._datatype 63 | 64 | @datatype.setter 65 | def datatype(self, value): 66 | if value not in self.allowed_dataset_types: 67 | msg = f"{value} is not in {self.allowed_dataset_types}" 68 | logging.error(msg) 69 | raise TypeError("Only str allowed") 70 | self._data_path = value 71 | 72 | @property 73 | def size(self): 74 | return self._size 75 | 76 | @property 77 | def optimized_options(self): 78 | options = tf.data.Options() 79 | options.experimental_deterministic = not self._training 80 | options.experimental_optimization.map_vectorization.enabled = True 81 | options.experimental_optimization.map_parallelization = True 82 | options.experimental_optimization.parallel_batch = True 83 | return options 84 | 85 | @abstractmethod 86 | def parser(self): 87 | """parser. 88 | Parser Abstract method which will act as abstract method for 89 | Base classes. 90 | """ 91 | raise NotImplementedError( 92 | "Method parser is not implemented in class " + self.__class__.__name__ 93 | ) 94 | 95 | @abstractmethod 96 | def encoder(self): 97 | """encoder. 98 | Encoder Abstract which is abstractmethod, Encoder encodes 99 | output in required format i.e fixed data size in bbox,segmentation. 100 | """ 101 | raise NotImplementedError( 102 | "Method encoder is not implemented in class " + self.__class__.__name__ 103 | ) 104 | 105 | def _fetch_records(self, filename): 106 | """_fetch_records. 107 | Fetches record files using TfRecordDataset 108 | 109 | Args: 110 | filename: filename to be fetched 111 | """ 112 | """_fetch_records. 113 | 114 | Args: 115 | filename: 116 | """ 117 | return tf.data.TFRecordDataset(filename).prefetch(1) 118 | 119 | @staticmethod 120 | def _pad_data(data, pad_value, output_shape): 121 | """helper function which pads data to given shape.""" 122 | max_instances_per_image = output_shape[0] 123 | dimension = output_shape[1] 124 | data = tf.reshape(data, [-1, dimension]) 125 | num_instances = tf.shape(data)[0] 126 | msg = "ERROR: no. of object are more than max_instances_per_image, please increase max_instances_per_image." 127 | with tf.control_dependencies( 128 | [tf.assert_less(num_instances, max_instances_per_image, message=msg)] 129 | ): 130 | pad_length = max_instances_per_image - num_instances 131 | paddings = pad_value * tf.ones([pad_length, dimension]) 132 | padded_data = tf.concat([data, paddings], axis=0) 133 | padded_data = tf.reshape(padded_data, output_shape) 134 | return padded_data 135 | 136 | def pad_to_fixed_len(self, *args): 137 | """ 138 | Bundle inputs into a fixed length. 139 | """ 140 | 141 | image_id, image, bboxes, classes = args 142 | return ( 143 | image_id, 144 | image, 145 | self._pad_data(bboxes, -1, [self.max_instances_per_image, 4]), 146 | self._pad_data(classes, -1, [self.max_instances_per_image, 1]), 147 | ) 148 | 149 | @staticmethod 150 | def pretraining(ds, cache=False): 151 | """pretraining. 152 | Provides post training configuration i.e prefetching,caching, 153 | batches,opitmizations. 154 | 155 | Args: 156 | ds: tf.data dataset reference 157 | cache: Cache the dataset, WARNING: use only if dataset is small 158 | enough to fit in ram, default False. 159 | """ 160 | if cache: 161 | if isinstance(cache, str): 162 | ds = ds.cache(cache) 163 | else: 164 | ds = ds.cache() 165 | return ds.prefetch(tf.data.experimental.AUTOTUNE) 166 | 167 | @abstractmethod 168 | def from_dataset(self): 169 | """from_dataset. 170 | abstractmethod for dataset, returns iterable which can be used 171 | for feed inputs to neural network. 172 | provides high performing, low latency data iterable. 173 | """ 174 | raise NotImplementedError( 175 | "Method dataset is not implemented in class " + self.__class__.__name__ 176 | ) 177 | 178 | @abstractmethod 179 | def from_tfrecords(self): 180 | """from_tfrecords. 181 | abstractmethod for fetch tfrecords, returns iterable which can be used 182 | for feed inputs to neural network. 183 | provides high performing, low latency data iterable. 184 | """ 185 | raise NotImplementedError( 186 | "Method dataset is not implemented in class " + self.__class__.__name__ 187 | ) 188 | 189 | @abstractmethod 190 | def from_remote(self): 191 | """from_remote. 192 | abstractmethod for fetch remote files, returns iterable which can be used 193 | for feed inputs to neural network. 194 | provides high performing, low latency data iterable. 195 | """ 196 | raise NotImplementedError( 197 | "Method dataset is not implemented in class " + self.__class__.__name__ 198 | ) 199 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorPipe 2 | [![Library][tensorflow-shield]][tensorflow-url] 3 | [![MIT License][license-shield]][license-url] 4 | [![LinkedIn][linkedin-shield]][linkedin-url] 5 | [![codecov](https://codecov.io/gh/kartik4949/TensorPipe/branch/main/graph/badge.svg?token=RWGTCX17C1)](https://codecov.io/gh/kartik4949/TensorPipe) 6 | 7 | ![alt text](logo.png) 8 | 9 | 10 | High Performance Tensorflow Data Pipeline with State of Art Augmentations build as a wrapper aroung tensorflow datasets api, with low level optimizations. 11 | 12 | ## Requirements 13 | 14 | * Python 3.8 15 | * Tensorflow 2.2 16 | * Tensorflow addons 17 | * Sklearn 18 | * typeguard 19 | 20 | Install using: 21 | 22 | ``` 23 | pip install tensorflow-addons==0.11.2 24 | pip install tensorflow==2.2.0 25 | pip install sklearn 26 | pip install typeguard 27 | ``` 28 | 29 | ## Features 30 | 31 | - [x] High Performance tf.data pipline 32 | - [x] Core tensorflow support for high performance 33 | - [x] Classification data support 34 | - [x] Bbox data support 35 | - [ ] Keypoints data support 36 | - [ ] Segmentation data support 37 | - [x] GridMask in core tf2.x 38 | - [x] Mosiac Augmentation in core tf2.x 39 | - [x] CutOut in core tf2.x 40 | - [x] Flexible and easy configuration 41 | - [x] Gin-config support 42 | - [x] Custom numpy function injection. 43 | ## Advance Users Section: 44 | ## Example Usage 1 45 | ### Create a Data Pipeline for Training. (Categorical Data). 46 | ```python 47 | from pipe import Funnel 48 | from bunch import Bunch 49 | """ 50 | Create a Funnel for the Pipeline! 51 | """ 52 | 53 | 54 | # Config for Funnel 55 | config = { 56 | "batch_size": 2, 57 | "image_size": [512,512], 58 | "transformations": { 59 | "flip_left_right": None, 60 | "gridmask": None, 61 | "random_rotate":None, 62 | }, 63 | "categorical_encoding":"labelencoder" 64 | } 65 | config = Bunch(config) 66 | pipeline = Funnel(data_path="testdata", config=config, datatype="categorical") 67 | pipeline = pipeline.from_dataset(type="train") 68 | 69 | # Pipline ready to use, iter over it to use. 70 | # Custom loop example. 71 | for data in pipeline: 72 | image_batch , label_batch = data[0], data[1] 73 | # you can use _loss = loss(label_batch,model.predict(image_batch)) 74 | # calculate gradients on loss and optimize the model. 75 | print(image_batch,label_batch) 76 | 77 | ``` 78 | 79 | ## Example Usage 2 80 | ### Create a Data Pipeline for Bounding Boxes with tf records. 81 | 82 | ```python 83 | import numpy as np 84 | from tensorpipe.pipe import Funnel 85 | 86 | """ 87 | Create a Funnel for the Pipeline! 88 | """ 89 | 90 | # Custom numpy code for injection. 91 | def numpy_function(image, label): 92 | """normalize image""" 93 | image = image / 255.0 94 | return image, label 95 | 96 | 97 | config = { 98 | "batch_size": 2, 99 | "image_size": [512, 512], 100 | "transformations": { 101 | "flip_left_right": None, 102 | "gridmask": None, 103 | "random_rotate": None, 104 | }, 105 | "max_instances_per_image": 100, 106 | "categorical_encoding": "labelencoder", 107 | "numpy_function": numpy_function, 108 | } 109 | funnel = Funnel(data_path="tfrecorddata", config=config, datatype="bbox") 110 | dataset = funnel.from_tfrecords(type="train") 111 | 112 | for data in dataset: 113 | print(data[1].shape) 114 | 115 | ``` 116 | # Object Detection Usage 117 | ### Now build your custom bounding box funnel with subclassing BboxFunnel. 118 | 119 | ```python 120 | from tensorpipe.funnels import funnels 121 | class CustomObjectDetectionLoader(funnels.BboxFunnel): 122 | def __init__(self, *args): 123 | super().__init__(*args) 124 | 125 | def encoder(self,args): 126 | # encoder is overriden to give custom anchors to the model as per the need. 127 | 128 | image_id, image, bbox, classes = args 129 | # make custom anchors and encode the image and bboxes as per the model need. 130 | return image, custom_anchors, classes 131 | def decoder(self, args): 132 | # override decoder if using custom tf records and decode your custom tfrecord in this method 133 | return decoded_data 134 | 135 | funnel = CustomObjectDetectionLoader(data_path="tfrecorddata", config=config, datatype="bbox") 136 | dataset = funnel.from_tfrecords(type="train") 137 | ``` 138 | 139 | # Steps to build tfrecords and custom input loader. 140 | - use funnels.create_records script to build tfrecord or build your own tfrecords using your custom script. 141 | - if used the create_records script to build records, we dont need to overide decoder, but if using custom 142 | script overriding the decoder function is mandatory. 143 | - If using anchors in the models, please override encoder with custom anchor script. 144 | 145 | 146 | ## Beginners Section. 147 | ## Keras Compatiblity. 148 | ### Very simple example to use pipeline with keras model.fit as iterable. 149 | ```python 150 | import tensorflow as tf 151 | from pipe import Funnel 152 | 153 | """ 154 | Create a Funnel for the Pipeline! 155 | """ 156 | 157 | config = { 158 | "batch_size": 2, 159 | "image_size": [100, 100], 160 | "transformations": { 161 | "flip_left_right": None, 162 | "gridmask": None, 163 | "random_rotate": None, 164 | }, 165 | "categorical_encoding": "labelencoder", 166 | } 167 | pipeline = Funnel(data_path="testdata", config=config, datatype="categorical") 168 | # from dataset i.e normal dataset. 169 | pipeline = pipeline.from_dataset(type="train") 170 | 171 | # e.g from tfrecords i.e tfrecord dataset. 172 | # pipeline = pipeline.from_tfrecords(type="train") # testdata/train/*.tfrecord 173 | 174 | # Create Keras model 175 | model = tf.keras.applications.VGG16( 176 | include_top=True, weights=None,input_shape=(100,100,3), 177 | pooling=None, classes=2, classifier_activation='sigmoid' 178 | ) 179 | 180 | # compile 181 | model.compile(loss='mse', optimizer='adam') 182 | 183 | # pass pipeline as iterable 184 | model.fit(pipeline , batch_size=2,steps_per_epoch=5,verbose=1) 185 | ``` 186 | 187 | ## Config. 188 | * **image_size** - Output Image Size for the pipeline. 189 | * **batch_size** - Batch size for the pipeline. 190 | * **transformations** - Dictionary of transformations to apply with respective keyword arguments. 191 | * **categorical_encoding** - Encoding for categorical data - ('labelencoder' , 'onehotencoder'). 192 | 193 | ## Augmentations: 194 | 195 | ### GridMask 196 | Creates a gridmask on input image with rotation defined on range. 197 | * **params**: 198 | * **ratio** - grid to space ratio 199 | * **fill** - fill value 200 | * **rotate** - rotation range in degrees 201 | 202 | ### MixUp 203 | Mixes two randomly sampled images and their respective labels with given alpha. 204 | * **params**: 205 | * **alpha** - value for blend function. 206 | 207 | ### RandomErase 208 | Randomly erases rectangular chunk with is sampled randomly on given image. 209 | * **params**: 210 | * **prob** - probablity to randomerase on image. 211 | 212 | ### CutMix 213 | Overlaps a resized randomly sample image on given image with complete overlay on subset portion of image. 214 | * **params**: 215 | * **prob** - probablity to CutMix on image. 216 | 217 | ### Mosaic 218 | Creates a mosaic of input 4 images into one single image. 219 | * **params**: 220 | * **prob** - Probablity to mosaic. 221 | 222 | ## CutMix, CutOut, MixUp 223 | 224 | ![alt text](https://www.researchgate.net/publication/340296142/figure/fig1/AS:874996595429376@1585626853032/Comparison-of-our-proposed-Attentive-CutMix-with-Mixup-5-Cutout-1-and-CutMix-3.png) 225 | #### source (https://www.researchgate.net/publication/340296142/figure/fig1/AS:874996595429376@1585626853032/Comparison-of-our-proposed-Attentive-CutMix-with-Mixup-5-Cutout-1-and-CutMix-3.png) 226 | 227 | ## Mosaic 228 | ![alt-text](https://hoya012.github.io/assets/img/yolov4/8.PNG) 229 | #### source (https://hoya012.github.io/assets/img/yolov4/8.PNG) 230 | 231 | ## Grid Mask 232 | ![alt-text](https://storage.googleapis.com/groundai-web-prod/media/users/user_302546/project_404544/images/x1.png) 233 | #### source (https://storage.googleapis.com/groundai-web-prod/media/users/user_302546/project_404544/images/x1.png) 234 | 235 | 236 | ## Release History 237 | * v1.0 238 | * Bbox, Keypoints, Custom Py Functions Support.(WIP) 239 | * v1.0-beta 240 | * Classification Support with gridmask and mosaic augmentations. 241 | 242 | ## Meta 243 | 244 | Kartik Sharma – [@linkedIn](https://www.linkedin.com/in/kartik-sharma-aaa021169/) – kartik4949@gmail.com 245 | 246 | Distributed under the Apache 2.0 license. See ``LICENSE`` for more information. 247 | 248 | 249 | 250 | 251 | [tensorflow-shield]: https://img.shields.io/badge/Tensorflow-2.x-orange 252 | [tensorflow-url]: https://tensorflow.org 253 | [license-shield]: https://img.shields.io/badge/OpenSource-%E2%9D%A4%EF%B8%8F-blue 254 | [license-url]: LICENSE.txt 255 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=flat-square&logo=linkedin&colorB=555 256 | [linkedin-url]: https://www.linkedin.com/in/kartik-sharma-aaa021169/ 257 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /tensorpipe/augment/augmentations.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import functools 21 | from absl import logging 22 | 23 | import tensorflow as tf 24 | import tensorflow_addons as tfa 25 | import numpy as np 26 | 27 | from ..register.register import AUG 28 | 29 | 30 | def radians(degree: int) -> float: 31 | """radians. 32 | helper function converts degrees to radians. 33 | Args: 34 | degree: degrees. 35 | """ 36 | pi_on_180 = 0.017453292519943295 37 | return degree * pi_on_180 38 | 39 | 40 | """Grid Masking Augmentation Reference: https://arxiv.org/abs/2001.04086""" 41 | 42 | 43 | @AUG.register_module(name="gridmask") 44 | class GridMask(object): 45 | """GridMask. 46 | Class which provides grid masking augmentation 47 | masks a grid with fill_value on the image. 48 | """ 49 | 50 | def __init__( 51 | self, 52 | image_shape, 53 | ratio=0.6, 54 | rotate=10, 55 | gridmask_size_ratio=0.5, 56 | fill=1, 57 | ): 58 | """__init__. 59 | 60 | Args: 61 | image_shape: Image shape (h,w,channels) 62 | ratio: grid mask ratio i.e if 0.5 grid and spacing will be equal 63 | rotate: Rotation of grid mesh 64 | gridmask_size_ratio: Grid mask size, grid to image size ratio. 65 | fill: Fill value for grids. 66 | """ 67 | self.h = image_shape[0] 68 | self.w = image_shape[1] 69 | self.ratio = ratio 70 | self.rotate = rotate 71 | self.gridmask_size_ratio = gridmask_size_ratio 72 | self.fill = fill 73 | 74 | @staticmethod 75 | def random_crop(mask, image_shape): 76 | """random_crop. 77 | crops in middle of mask and image corners. 78 | 79 | Args: 80 | mask: Grid Mask 81 | image_shape: (h,w) 82 | """ 83 | hh, ww = mask.shape 84 | h, w = image_shape[:2] 85 | mask = mask[ 86 | (hh - h) // 2 : (hh - h) // 2 + h, 87 | (ww - w) // 2 : (ww - w) // 2 + w, 88 | ] 89 | return mask 90 | 91 | @tf.function 92 | def mask(self): 93 | """mask helper function for initializing grid mask of required size.""" 94 | mask_w = mask_h = int((self.gridmask_size_ratio + 1) * max(self.h, self.w)) 95 | mask = tf.zeros(shape=[mask_h, mask_w], dtype=tf.int32) 96 | gridblock = tf.random.uniform( 97 | shape=[], 98 | minval=int(min(self.h * 0.5, self.w * 0.3)), 99 | maxval=int(max(self.h * 0.5, self.w * 0.3)), 100 | dtype=tf.int32, 101 | ) 102 | 103 | if self.ratio == 1: 104 | length = tf.random.uniform( 105 | shape=[], minval=1, maxval=gridblock, dtype=tf.int32 106 | ) 107 | else: 108 | length = tf.cast( 109 | tf.math.minimum( 110 | tf.math.maximum( 111 | int(tf.cast(gridblock, tf.float32) * self.ratio + 0.5), 112 | 1, 113 | ), 114 | gridblock - 1, 115 | ), 116 | tf.int32, 117 | ) 118 | 119 | for _ in range(2): 120 | start_w = tf.random.uniform( 121 | shape=[], minval=0, maxval=gridblock, dtype=tf.int32 122 | ) 123 | for i in range(mask_w // gridblock): 124 | start = gridblock * i + start_w 125 | end = tf.math.minimum(start + length, mask_w) 126 | indices = tf.reshape(tf.range(start, end), [end - start, 1]) 127 | updates = ( 128 | tf.ones(shape=[end - start, mask_w], dtype=tf.int32) * self.fill 129 | ) 130 | mask = tf.tensor_scatter_nd_update(mask, indices, updates) 131 | mask = tf.transpose(mask) 132 | 133 | return mask 134 | 135 | def __call__(self, image, label): 136 | grid = self.mask() 137 | mask = self.__class__.random_crop(grid, image.shape) 138 | mask = tf.cast(mask, image.dtype) 139 | mask = tf.expand_dims(mask, -1) if image._rank() != mask._rank() else mask 140 | image *= mask 141 | return image, label 142 | 143 | 144 | """Mosaic augmentation.""" 145 | 146 | 147 | @AUG.register_module(name="mosaic") 148 | class Mosaic: 149 | """Mosaic Augmentation class. 150 | 1. Mosaic sub images will not be preserving aspect ratio of original images. 151 | 2. Tested on static graphs and eager execution. 152 | 3. This Implementation of mosaic augmentation is tested in tf2.x. 153 | """ 154 | 155 | def __init__( 156 | self, 157 | out_size=(680, 680), 158 | n_images: int = 4, 159 | _minimum_mosaic_image_dim: int = 25, 160 | ): 161 | """__init__. 162 | Args: 163 | out_size: output mosaic image size. 164 | n_images: number images to make mosaic 165 | _minimum_mosaic_image_dim: minimum percentage of out_size dimension 166 | should the mosaic be. i.e if out_size is (680,680) and 167 | _minimum_mosaic_image_dim is 25 , minimum mosaic sub images 168 | dimension will be 25 % of 680. 169 | """ 170 | # TODO(someone) #MED #use n_images to build mosaic. 171 | self._n_images = n_images 172 | self._out_size = out_size 173 | self._minimum_mosaic_image_dim = _minimum_mosaic_image_dim 174 | assert ( 175 | _minimum_mosaic_image_dim > 0 176 | ), "Minimum Mosaic image dimension should be above 0" 177 | 178 | @property 179 | def n_images(self) -> int: 180 | return self._n_images 181 | 182 | @property 183 | def out_size(self) -> int: 184 | return self._out_size 185 | 186 | def _mosaic_divide_points(self) -> (int, int): 187 | """Returns a tuple of x and y which corresponds to mosaic divide points.""" 188 | x_point = tf.random.uniform( 189 | shape=[1], 190 | minval=tf.cast( 191 | self.out_size[0] * (self._minimum_mosaic_image_dim / 100), 192 | tf.int32, 193 | ), 194 | maxval=tf.cast( 195 | self.out_size[0] * ((100 - self._minimum_mosaic_image_dim) / 100), 196 | tf.int32, 197 | ), 198 | dtype=tf.int32, 199 | ) 200 | y_point = tf.random.uniform( 201 | shape=[1], 202 | minval=tf.cast( 203 | self.out_size[1] * (self._minimum_mosaic_image_dim / 100), 204 | tf.int32, 205 | ), 206 | maxval=tf.cast( 207 | self.out_size[1] * ((100 - self._minimum_mosaic_image_dim) / 100), 208 | tf.int32, 209 | ), 210 | dtype=tf.int32, 211 | ) 212 | return x_point, y_point 213 | 214 | @staticmethod 215 | def _scale_box(box, image, mosaic_image): 216 | """scale boxes with mosaic sub image. 217 | Args: 218 | box: mosaic image box. 219 | image: original image. 220 | mosaic_image: mosaic sub image. 221 | Returns: 222 | Scaled bounding boxes. 223 | """ 224 | return [ 225 | box[0] * tf.shape(mosaic_image)[1] / tf.shape(image)[1], 226 | box[1] * tf.shape(mosaic_image)[0] / tf.shape(image)[0], 227 | box[2] * tf.shape(mosaic_image)[1] / tf.shape(image)[1], 228 | box[-1] * tf.shape(mosaic_image)[0] / tf.shape(image)[0], 229 | ] 230 | 231 | def _scale_images(self, images, mosaic_divide_points): 232 | """Scale Sub Images. 233 | Args: 234 | images: original single images to make mosaic. 235 | mosaic_divide_points: Points to build mosaic around on given output. 236 | Returns: 237 | A tuple of scaled Mosaic sub images. 238 | """ 239 | x, y = mosaic_divide_points[0][0], mosaic_divide_points[1][0] 240 | mosaic_image_topleft = tf.image.resize(images[0], (x, y)) 241 | mosaic_image_topright = tf.image.resize(images[1], (self.out_size[0] - x, y)) 242 | mosaic_image_bottomleft = tf.image.resize(images[2], (x, self.out_size[1] - y)) 243 | mosaic_image_bottomright = tf.image.resize( 244 | images[3], (self.out_size[0] - x, self.out_size[1] - y) 245 | ) 246 | return ( 247 | mosaic_image_topleft, 248 | mosaic_image_topright, 249 | mosaic_image_bottomleft, 250 | mosaic_image_bottomright, 251 | ) 252 | 253 | @tf.function 254 | def _mosaic(self, images, boxes, mosaic_divide_points): 255 | """Builds mosaic of provided images. 256 | Args: 257 | images: original single images to make mosaic. 258 | boxes: corresponding bounding boxes to images. 259 | mosaic_divide_points: Points to build mosaic around on given output size. 260 | Returns: 261 | A tuple of mosaic Image, Mosaic Boxes merged. 262 | """ 263 | ( 264 | mosaic_image_topleft, 265 | mosaic_image_topright, 266 | mosaic_image_bottomleft, 267 | mosaic_image_bottomright, 268 | ) = self._scale_images(images, mosaic_divide_points) 269 | 270 | ##################################################### 271 | # Scale Boxes for TOP LEFT image. 272 | # Note: Below function is complex because of TF item assignment restriction. 273 | # Map_fn is replace with vectorized_map below for optimization purpose. 274 | mosaic_box_topleft = tf.transpose( 275 | tf.vectorized_map( 276 | functools.partial( 277 | self._scale_box, 278 | image=images[0], 279 | mosaic_image=mosaic_image_topleft, 280 | ), 281 | boxes[0], 282 | ) 283 | ) 284 | 285 | # Scale and Pad Boxes for TOP RIGHT image. 286 | 287 | mosaic_box_topright = tf.vectorized_map( 288 | functools.partial( 289 | self._scale_box, 290 | image=images[1], 291 | mosaic_image=mosaic_image_topright, 292 | ), 293 | boxes[1], 294 | ) 295 | num_boxes = boxes[1].shape[0] 296 | idx_tp = tf.constant([[1], [3]]) 297 | update_tp = [ 298 | [tf.shape(mosaic_image_topleft)[0]] * num_boxes, 299 | [tf.shape(mosaic_image_topleft)[0]] * num_boxes, 300 | ] 301 | mosaic_box_topright = tf.transpose( 302 | tf.tensor_scatter_nd_add(mosaic_box_topright, idx_tp, update_tp) 303 | ) 304 | 305 | # Scale and Pad Boxes for BOTTOM LEFT image. 306 | 307 | mosaic_box_bottomleft = tf.vectorized_map( 308 | functools.partial( 309 | self._scale_box, 310 | image=images[2], 311 | mosaic_image=mosaic_image_bottomleft, 312 | ), 313 | boxes[2], 314 | ) 315 | 316 | num_boxes = boxes[2].shape[0] 317 | idx_bl = tf.constant([[0], [2]]) 318 | update_bl = [ 319 | [tf.shape(mosaic_image_topleft)[1]] * num_boxes, 320 | [tf.shape(mosaic_image_topleft)[1]] * num_boxes, 321 | ] 322 | mosaic_box_bottomleft = tf.transpose( 323 | tf.tensor_scatter_nd_add(mosaic_box_bottomleft, idx_bl, update_bl) 324 | ) 325 | 326 | # Scale and Pad Boxes for BOTTOM RIGHT image. 327 | mosaic_box_bottomright = tf.vectorized_map( 328 | functools.partial( 329 | self._scale_box, 330 | image=images[3], 331 | mosaic_image=mosaic_image_bottomright, 332 | ), 333 | boxes[3], 334 | ) 335 | 336 | num_boxes = boxes[3].shape[0] 337 | idx_br = tf.constant([[0], [2], [1], [3]]) 338 | update_br = [ 339 | [tf.shape(mosaic_image_topright)[1]] * num_boxes, 340 | [tf.shape(mosaic_image_topright)[1]] * num_boxes, 341 | [tf.shape(mosaic_image_bottomleft)[0]] * num_boxes, 342 | [tf.shape(mosaic_image_bottomleft)[0]] * num_boxes, 343 | ] 344 | mosaic_box_bottomright = tf.transpose( 345 | tf.tensor_scatter_nd_add(mosaic_box_bottomright, idx_br, update_br) 346 | ) 347 | 348 | # Gather mosaic_sub_images and boxes. 349 | mosaic_images = [ 350 | mosaic_image_topleft, 351 | mosaic_image_topright, 352 | mosaic_image_bottomleft, 353 | mosaic_image_bottomright, 354 | ] 355 | mosaic_boxes = [ 356 | mosaic_box_topleft, 357 | mosaic_box_topright, 358 | mosaic_box_bottomleft, 359 | mosaic_box_bottomright, 360 | ] 361 | 362 | return mosaic_images, mosaic_boxes 363 | 364 | def __call__(self, images, boxes): 365 | """Builds mosaic with given images, boxes.""" 366 | if images.shape[0] != 4: 367 | err_msg = "Currently Exact 4 Images are supported by Mosaic Aug." 368 | logging.error(err_msg) 369 | raise Exception(err_msg) 370 | 371 | x, y = self._mosaic_divide_points() 372 | mosaic_sub_images, mosaic_boxes = self._mosaic( 373 | images, boxes, mosaic_divide_points=(x, y) 374 | ) 375 | 376 | upper_stack = tf.concat([mosaic_sub_images[0], mosaic_sub_images[1]], axis=0) 377 | lower_stack = tf.concat([mosaic_sub_images[2], mosaic_sub_images[3]], axis=0) 378 | mosaic_image = tf.concat([upper_stack, lower_stack], axis=1) 379 | return mosaic_image, mosaic_boxes 380 | 381 | 382 | @AUG.register_module(name="cutout") 383 | def cut_out( 384 | image, 385 | label, 386 | p=0.5, 387 | s_l=0.02, 388 | s_h=0.4, 389 | r_1=0.3, 390 | r_2=1 / 0.3, 391 | v_l=0, 392 | v_h=255, 393 | ): 394 | img_h, img_w, img_c = image.shape 395 | p_1 = np.random.rand() 396 | 397 | if p_1 > p: 398 | return image, label 399 | 400 | while True: 401 | s = np.random.uniform(s_l, s_h) * img_h * img_w 402 | r = np.random.uniform(r_1, r_2) 403 | w = int(np.sqrt(s / r)) 404 | h = int(np.sqrt(s * r)) 405 | left = np.random.randint(0, img_w) 406 | top = np.random.randint(0, img_h) 407 | 408 | if left + w <= img_w and top + h <= img_h: 409 | break 410 | c = np.random.uniform(v_l, v_h, (h, w, img_c)) 411 | image[top : top + h, left : left + w, :] = c 412 | return image, label 413 | 414 | 415 | class TransformMixin: 416 | """ A transformations helper class mixed with augmentations class. """ 417 | 418 | @tf.function 419 | def random_rotate( 420 | self, image, label, prob=0.6, range=[-25, 25], interpolation="BILINEAR" 421 | ): 422 | """random_rotate. 423 | Randomly rotates the given image using rotation range 424 | and probablity. 425 | 426 | Args: 427 | image: Image tensor. 428 | label: label tensor i.e labels,bboxes,keypoints, etc. 429 | prob: probablity is rotation occurs. 430 | range: range of rotation in degrees. 431 | interpolation: interpolation method. 432 | 433 | Example: 434 | **************************************************** 435 | image , label = random_rotate(image,label,prob = 1.0) 436 | visualize(image) 437 | """ 438 | occur = tf.random.uniform([], 0, 1) < prob 439 | degree = tf.random.uniform([], range[0], range[1]) 440 | image = tf.cond( 441 | occur, 442 | lambda: tfa.image.rotate( 443 | image, radians(degree), interpolation=interpolation 444 | ), 445 | lambda: image, 446 | ) 447 | return image, label 448 | 449 | @tf.function 450 | def random_shear_x(self, image, label, prob=0.2, range=[0, 1]): 451 | """random_shear_x. 452 | Randomly shears the given image using shear range 453 | and probablity in x direction. 454 | 455 | Args: 456 | image: Image tensor. 457 | label: label tensor i.e labels,bboxes,keypoints, etc. 458 | prob: probablity if shear occurs. 459 | range: range of shear (0,1). 460 | 461 | Example: 462 | **************************************************** 463 | image , label = random_shear_x(image,label,prob = 1.0) 464 | visualize(image) 465 | """ 466 | 467 | occur = tf.random.uniform([], -0.15, 0.15) < prob 468 | shearx = tf.random.uniform([], range[0], range[1]) 469 | image = tfa.image.shear_x(image, level=shearx, replace=0) if occur else image 470 | return image, label 471 | 472 | @tf.function 473 | def random_shear_y(self, image, label, prob=0.2, range=[0, 1]): 474 | """random_shear_y. 475 | Randomly shears the given image using shear range 476 | and probablity in y direction. 477 | 478 | Args: 479 | image: Image tensor. 480 | label: label tensor i.e labels,bboxes,keypoints, etc. 481 | prob: probablity of shear. 482 | range: range of shear (0,1). 483 | 484 | Example: 485 | **************************************************** 486 | image , label = random_shear_y(image,label,prob = 1.0) 487 | visualize(image) 488 | """ 489 | 490 | occur = tf.random.uniform([], 0, 1) < prob 491 | sheary = tf.random.uniform([], range[0], range[1]) 492 | image = tfa.image.shear_y(image, level=sheary) if occur else image 493 | return image, label 494 | 495 | def gridmask( 496 | self, 497 | image, 498 | label, 499 | ratio=0.6, 500 | rotate=10, 501 | gridmask_size_ratio=0.5, 502 | fill=1, 503 | ): 504 | """gridmask. 505 | GridMask initializer function which intializes GridMask class. 506 | 507 | Args: 508 | image: Image tensor. 509 | label: label tensor i.e labels,bboxes,keypoints, etc. 510 | ratio: Ratio of grid to space. 511 | rotate: rotation range for grid. 512 | gridmask_size_ratio: grid to image_size ratio. 513 | fill: fill value default 1. 514 | """ 515 | return AUG.get("gridmask")( 516 | self.image_size, 517 | ratio=ratio, 518 | rotate=rotate, 519 | gridmask_size_ratio=gridmask_size_ratio, 520 | fill=fill, 521 | ).__call__(image, label) 522 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-whitelist= 7 | 8 | # Add files or directories to the blacklist. They should be base names, not 9 | # paths. 10 | ignore=CVS 11 | 12 | # Add files or directories matching the regex patterns to the blacklist. The 13 | # regex matches against base names, not paths. 14 | ignore-patterns= 15 | 16 | # Python code to execute, usually for sys.path manipulation such as 17 | # pygtk.require(). 18 | #init-hook= 19 | 20 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 21 | # number of processors available to use. 22 | jobs=1 23 | 24 | # Control the amount of potential inferred values when inferring a single 25 | # object. This can help the performance when dealing with large functions or 26 | # complex, nested conditions. 27 | limit-inference-results=100 28 | 29 | # List of plugins (as comma separated values of python modules names) to load, 30 | # usually to register additional checkers. 31 | load-plugins= 32 | 33 | # Pickle collected data for later comparisons. 34 | persistent=yes 35 | 36 | # Specify a configuration file. 37 | #rcfile= 38 | 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit 40 | # user-friendly hints instead of false-positive error messages. 41 | suggestion-mode=yes 42 | 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the 44 | # active Python interpreter and may run arbitrary code. 45 | unsafe-load-any-extension=no 46 | 47 | 48 | [MESSAGES CONTROL] 49 | 50 | # Only show warnings with the listed confidence levels. Leave empty to show 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 52 | confidence= 53 | 54 | # Disable the message, report, category or checker with the given id(s). You 55 | # can either give multiple identifiers separated by comma (,) or put this 56 | # option multiple times (only on the command line, not in the configuration 57 | # file where it should appear only once). You can also use "--disable=all" to 58 | # disable everything first and then reenable specific checks. For example, if 59 | # you want to run only the similarities checker, you can use "--disable=all 60 | # --enable=similarities". If you want to run only the classes checker, but have 61 | # no Warning level messages displayed, use "--disable=all --enable=classes 62 | # --disable=W". 63 | disable=print-statement, 64 | parameter-unpacking, 65 | unpacking-in-except, 66 | old-raise-syntax, 67 | backtick, 68 | long-suffix, 69 | old-ne-operator, 70 | old-octal-literal, 71 | import-star-module-level, 72 | non-ascii-bytes-literal, 73 | raw-checker-failed, 74 | bad-inline-option, 75 | locally-disabled, 76 | file-ignored, 77 | suppressed-message, 78 | useless-suppression, 79 | deprecated-pragma, 80 | use-symbolic-message-instead, 81 | apply-builtin, 82 | basestring-builtin, 83 | buffer-builtin, 84 | cmp-builtin, 85 | coerce-builtin, 86 | execfile-builtin, 87 | file-builtin, 88 | long-builtin, 89 | raw_input-builtin, 90 | reduce-builtin, 91 | standarderror-builtin, 92 | unicode-builtin, 93 | xrange-builtin, 94 | coerce-method, 95 | delslice-method, 96 | getslice-method, 97 | setslice-method, 98 | no-absolute-import, 99 | old-division, 100 | dict-iter-method, 101 | dict-view-method, 102 | next-method-called, 103 | metaclass-assignment, 104 | indexing-exception, 105 | raising-string, 106 | reload-builtin, 107 | oct-method, 108 | hex-method, 109 | nonzero-method, 110 | cmp-method, 111 | input-builtin, 112 | round-builtin, 113 | intern-builtin, 114 | unichr-builtin, 115 | map-builtin-not-iterating, 116 | zip-builtin-not-iterating, 117 | range-builtin-not-iterating, 118 | filter-builtin-not-iterating, 119 | using-cmp-argument, 120 | eq-without-hash, 121 | div-method, 122 | idiv-method, 123 | rdiv-method, 124 | exception-message-attribute, 125 | invalid-str-codec, 126 | sys-max-int, 127 | bad-python3-import, 128 | deprecated-string-function, 129 | deprecated-str-translate-call, 130 | deprecated-itertools-function, 131 | deprecated-types-field, 132 | next-method-defined, 133 | dict-items-not-iterating, 134 | dict-keys-not-iterating, 135 | dict-values-not-iterating, 136 | deprecated-operator-function, 137 | deprecated-urllib-function, 138 | xreadlines-attribute, 139 | deprecated-sys-function, 140 | exception-escape, 141 | comprehension-escape 142 | 143 | # Enable the message, report, category or checker with the given id(s). You can 144 | # either give multiple identifier separated by comma (,) or put this option 145 | # multiple time (only on the command line, not in the configuration file where 146 | # it should appear only once). See also the "--disable" option for examples. 147 | enable=c-extension-no-member 148 | 149 | 150 | [REPORTS] 151 | 152 | # Python expression which should return a note less than 10 (10 is the highest 153 | # note). You have access to the variables errors warning, statement which 154 | # respectively contain the number of errors / warnings messages and the total 155 | # number of statements analyzed. This is used by the global evaluation report 156 | # (RP0004). 157 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 158 | 159 | # Template used to display messages. This is a python new-style format string 160 | # used to format the message information. See doc for all details. 161 | #msg-template= 162 | 163 | # Set the output format. Available formats are text, parseable, colorized, json 164 | # and msvs (visual studio). You can also give a reporter class, e.g. 165 | # mypackage.mymodule.MyReporterClass. 166 | output-format=text 167 | 168 | # Tells whether to display a full report or only the messages. 169 | reports=no 170 | 171 | # Activate the evaluation score. 172 | score=yes 173 | 174 | 175 | [REFACTORING] 176 | 177 | # Maximum number of nested blocks for function / method body 178 | max-nested-blocks=5 179 | 180 | # Complete name of functions that never returns. When checking for 181 | # inconsistent-return-statements if a never returning function is called then 182 | # it will be considered as an explicit return statement and no message will be 183 | # printed. 184 | never-returning-functions=sys.exit 185 | 186 | 187 | [VARIABLES] 188 | 189 | # List of additional names supposed to be defined in builtins. Remember that 190 | # you should avoid defining new builtins when possible. 191 | additional-builtins= 192 | 193 | # Tells whether unused global variables should be treated as a violation. 194 | allow-global-unused-variables=yes 195 | 196 | # List of strings which can identify a callback function by name. A callback 197 | # name must start or end with one of those strings. 198 | callbacks=cb_, 199 | _cb 200 | 201 | # A regular expression matching the name of dummy variables (i.e. expected to 202 | # not be used). 203 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 204 | 205 | # Argument names that match this expression will be ignored. Default to name 206 | # with leading underscore. 207 | ignored-argument-names=_.*|^ignored_|^unused_ 208 | 209 | # Tells whether we should check for unused import in __init__ files. 210 | init-import=no 211 | 212 | # List of qualified module names which can have objects that can redefine 213 | # builtins. 214 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 215 | 216 | 217 | [BASIC] 218 | 219 | # Naming style matching correct argument names. 220 | argument-naming-style=snake_case 221 | 222 | # Regular expression matching correct argument names. Overrides argument- 223 | # naming-style. 224 | #argument-rgx= 225 | 226 | # Naming style matching correct attribute names. 227 | attr-naming-style=snake_case 228 | 229 | # Regular expression matching correct attribute names. Overrides attr-naming- 230 | # style. 231 | #attr-rgx= 232 | 233 | # Bad variable names which should always be refused, separated by a comma. 234 | bad-names=foo, 235 | bar, 236 | baz, 237 | toto, 238 | tutu, 239 | tata 240 | 241 | # Naming style matching correct class attribute names. 242 | class-attribute-naming-style=any 243 | 244 | # Regular expression matching correct class attribute names. Overrides class- 245 | # attribute-naming-style. 246 | #class-attribute-rgx= 247 | 248 | # Naming style matching correct class names. 249 | class-naming-style=PascalCase 250 | 251 | # Regular expression matching correct class names. Overrides class-naming- 252 | # style. 253 | #class-rgx= 254 | 255 | # Naming style matching correct constant names. 256 | const-naming-style=UPPER_CASE 257 | 258 | # Regular expression matching correct constant names. Overrides const-naming- 259 | # style. 260 | #const-rgx= 261 | 262 | # Minimum line length for functions/classes that require docstrings, shorter 263 | # ones are exempt. 264 | docstring-min-length=-1 265 | 266 | # Naming style matching correct function names. 267 | function-naming-style=snake_case 268 | 269 | # Regular expression matching correct function names. Overrides function- 270 | # naming-style. 271 | #function-rgx= 272 | 273 | # Good variable names which should always be accepted, separated by a comma. 274 | good-names=i, 275 | j, 276 | k, 277 | ex, 278 | Run, 279 | _ 280 | 281 | # Include a hint for the correct naming format with invalid-name. 282 | include-naming-hint=no 283 | 284 | # Naming style matching correct inline iteration names. 285 | inlinevar-naming-style=any 286 | 287 | # Regular expression matching correct inline iteration names. Overrides 288 | # inlinevar-naming-style. 289 | #inlinevar-rgx= 290 | 291 | # Naming style matching correct method names. 292 | method-naming-style=snake_case 293 | 294 | # Regular expression matching correct method names. Overrides method-naming- 295 | # style. 296 | #method-rgx= 297 | 298 | # Naming style matching correct module names. 299 | module-naming-style=snake_case 300 | 301 | # Regular expression matching correct module names. Overrides module-naming- 302 | # style. 303 | #module-rgx= 304 | 305 | # Colon-delimited sets of names that determine each other's naming style when 306 | # the name regexes allow several styles. 307 | name-group= 308 | 309 | # Regular expression which should only match function or class names that do 310 | # not require a docstring. 311 | no-docstring-rgx=^_ 312 | 313 | # List of decorators that produce properties, such as abc.abstractproperty. Add 314 | # to this list to register other decorators that produce valid properties. 315 | # These decorators are taken in consideration only for invalid-name. 316 | property-classes=abc.abstractproperty 317 | 318 | # Naming style matching correct variable names. 319 | variable-naming-style=snake_case 320 | 321 | # Regular expression matching correct variable names. Overrides variable- 322 | # naming-style. 323 | #variable-rgx= 324 | 325 | 326 | [SPELLING] 327 | 328 | # Limits count of emitted suggestions for spelling mistakes. 329 | max-spelling-suggestions=4 330 | 331 | # Spelling dictionary name. Available dictionaries: none. To make it working 332 | # install python-enchant package.. 333 | spelling-dict= 334 | 335 | # List of comma separated words that should not be checked. 336 | spelling-ignore-words= 337 | 338 | # A path to a file that contains private dictionary; one word per line. 339 | spelling-private-dict-file= 340 | 341 | # Tells whether to store unknown words to indicated private dictionary in 342 | # --spelling-private-dict-file option instead of raising a message. 343 | spelling-store-unknown-words=no 344 | 345 | 346 | [TYPECHECK] 347 | 348 | # List of decorators that produce context managers, such as 349 | # contextlib.contextmanager. Add to this list to register other decorators that 350 | # produce valid context managers. 351 | contextmanager-decorators=contextlib.contextmanager 352 | 353 | # List of members which are set dynamically and missed by pylint inference 354 | # system, and so shouldn't trigger E1101 when accessed. Python regular 355 | # expressions are accepted. 356 | generated-members= 357 | 358 | # Tells whether missing members accessed in mixin class should be ignored. A 359 | # mixin class is detected if its name ends with "mixin" (case insensitive). 360 | ignore-mixin-members=yes 361 | 362 | # Tells whether to warn about missing members when the owner of the attribute 363 | # is inferred to be None. 364 | ignore-none=yes 365 | 366 | # This flag controls whether pylint should warn about no-member and similar 367 | # checks whenever an opaque object is returned when inferring. The inference 368 | # can return multiple potential results while evaluating a Python object, but 369 | # some branches might not be evaluated, which results in partial inference. In 370 | # that case, it might be useful to still emit no-member and other checks for 371 | # the rest of the inferred objects. 372 | ignore-on-opaque-inference=yes 373 | 374 | # List of class names for which member attributes should not be checked (useful 375 | # for classes with dynamically set attributes). This supports the use of 376 | # qualified names. 377 | ignored-classes=optparse.Values,thread._local,_thread._local 378 | 379 | # List of module names for which member attributes should not be checked 380 | # (useful for modules/projects where namespaces are manipulated during runtime 381 | # and thus existing member attributes cannot be deduced by static analysis. It 382 | # supports qualified module names, as well as Unix pattern matching. 383 | ignored-modules= 384 | 385 | # Show a hint with possible names when a member name was not found. The aspect 386 | # of finding the hint is based on edit distance. 387 | missing-member-hint=yes 388 | 389 | # The minimum edit distance a name should have in order to be considered a 390 | # similar match for a missing member name. 391 | missing-member-hint-distance=1 392 | 393 | # The total number of similar names that should be taken in consideration when 394 | # showing a hint for a missing member. 395 | missing-member-max-choices=1 396 | 397 | 398 | [FORMAT] 399 | 400 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 401 | expected-line-ending-format= 402 | 403 | # Regexp for a line that is allowed to be longer than the limit. 404 | ignore-long-lines=^\s*(# )??$ 405 | 406 | # Number of spaces of indent required inside a hanging or continued line. 407 | indent-after-paren=4 408 | 409 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 410 | # tab). 411 | indent-string=' ' 412 | 413 | # Maximum number of characters on a single line. 414 | max-line-length=100 415 | 416 | # Maximum number of lines in a module. 417 | max-module-lines=1000 418 | 419 | # List of optional constructs for which whitespace checking is disabled. `dict- 420 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 421 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 422 | # `empty-line` allows space-only lines. 423 | no-space-check=trailing-comma, 424 | dict-separator 425 | 426 | # Allow the body of a class to be on the same line as the declaration if body 427 | # contains single statement. 428 | single-line-class-stmt=no 429 | 430 | # Allow the body of an if to be on the same line as the test if there is no 431 | # else. 432 | single-line-if-stmt=no 433 | 434 | 435 | [LOGGING] 436 | 437 | # Format style used to check logging format string. `old` means using % 438 | # formatting, while `new` is for `{}` formatting. 439 | logging-format-style=old 440 | 441 | # Logging modules to check that the string format arguments are in logging 442 | # function parameter format. 443 | logging-modules=logging 444 | 445 | 446 | [MISCELLANEOUS] 447 | 448 | # List of note tags to take in consideration, separated by a comma. 449 | notes=FIXME, 450 | XXX, 451 | TODO 452 | 453 | 454 | [SIMILARITIES] 455 | 456 | # Ignore comments when computing similarities. 457 | ignore-comments=yes 458 | 459 | # Ignore docstrings when computing similarities. 460 | ignore-docstrings=yes 461 | 462 | # Ignore imports when computing similarities. 463 | ignore-imports=no 464 | 465 | # Minimum lines number of a similarity. 466 | min-similarity-lines=4 467 | 468 | 469 | [IMPORTS] 470 | 471 | # Allow wildcard imports from modules that define __all__. 472 | allow-wildcard-with-all=no 473 | 474 | # Analyse import fallback blocks. This can be used to support both Python 2 and 475 | # 3 compatible code, which means that the block might have code that exists 476 | # only in one or another interpreter, leading to false positives when analysed. 477 | analyse-fallback-blocks=no 478 | 479 | # Deprecated modules which should not be used, separated by a comma. 480 | deprecated-modules=optparse,tkinter.tix 481 | 482 | # Create a graph of external dependencies in the given file (report RP0402 must 483 | # not be disabled). 484 | ext-import-graph= 485 | 486 | # Create a graph of every (i.e. internal and external) dependencies in the 487 | # given file (report RP0402 must not be disabled). 488 | import-graph= 489 | 490 | # Create a graph of internal dependencies in the given file (report RP0402 must 491 | # not be disabled). 492 | int-import-graph= 493 | 494 | # Force import order to recognize a module as part of the standard 495 | # compatibility libraries. 496 | known-standard-library= 497 | 498 | # Force import order to recognize a module as part of a third party library. 499 | known-third-party=enchant 500 | 501 | 502 | [DESIGN] 503 | 504 | # Maximum number of arguments for function / method. 505 | max-args=5 506 | 507 | # Maximum number of attributes for a class (see R0902). 508 | max-attributes=7 509 | 510 | # Maximum number of boolean expressions in an if statement. 511 | max-bool-expr=5 512 | 513 | # Maximum number of branch for function / method body. 514 | max-branches=12 515 | 516 | # Maximum number of locals for function / method body. 517 | max-locals=15 518 | 519 | # Maximum number of parents for a class (see R0901). 520 | max-parents=7 521 | 522 | # Maximum number of public methods for a class (see R0904). 523 | max-public-methods=20 524 | 525 | # Maximum number of return / yield for function / method body. 526 | max-returns=6 527 | 528 | # Maximum number of statements in function / method body. 529 | max-statements=50 530 | 531 | # Minimum number of public methods for a class (see R0903). 532 | min-public-methods=2 533 | 534 | 535 | [CLASSES] 536 | 537 | # List of method names used to declare (i.e. assign) instance attributes. 538 | defining-attr-methods=__init__, 539 | __new__, 540 | setUp 541 | 542 | # List of member names, which should be excluded from the protected access 543 | # warning. 544 | exclude-protected=_asdict, 545 | _fields, 546 | _replace, 547 | _source, 548 | _make 549 | 550 | # List of valid names for the first argument in a class method. 551 | valid-classmethod-first-arg=cls 552 | 553 | # List of valid names for the first argument in a metaclass class method. 554 | valid-metaclass-classmethod-first-arg=cls 555 | 556 | 557 | [EXCEPTIONS] 558 | 559 | # Exceptions that will emit a warning when being caught. Defaults to 560 | # "Exception". 561 | overgeneral-exceptions=Exception 562 | 563 | -------------------------------------------------------------------------------- /tensorpipe/funnels/funnels.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2020 Kartik Sharma 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | import inspect 23 | import typeguard 24 | from typing import Optional 25 | 26 | import tensorflow as tf 27 | import numpy as np 28 | from dependency_injector.wiring import Provide, inject 29 | import logging 30 | 31 | from .base_funnel import Funnel 32 | from ..augment import augment 33 | from ..register.register import FUNNEL 34 | from ..containers import Container 35 | 36 | __all__ = ["BboxFunnel", "CategoricalTensorFunnel"] 37 | 38 | """Bbox Funnel for bounding box dataset.""" 39 | 40 | 41 | class TFDecoderMixin: 42 | """Tensorflow decoder.""" 43 | 44 | KEYS_TO_FEATURES = { 45 | "image/encoded": tf.io.FixedLenFeature((), tf.string), 46 | "image/source_id": tf.io.FixedLenFeature((), tf.string, ""), 47 | "image/height": tf.io.FixedLenFeature((), tf.int64, -1), 48 | "image/width": tf.io.FixedLenFeature((), tf.int64, -1), 49 | "image/object/bbox/xmin": tf.io.VarLenFeature(tf.float32), 50 | "image/object/bbox/xmax": tf.io.VarLenFeature(tf.float32), 51 | "image/object/bbox/ymin": tf.io.VarLenFeature(tf.float32), 52 | "image/object/bbox/ymax": tf.io.VarLenFeature(tf.float32), 53 | "image/object/class/label": tf.io.VarLenFeature(tf.int64), 54 | "image/object/area": tf.io.VarLenFeature(tf.float32), 55 | "image/object/is_crowd": tf.io.VarLenFeature(tf.int64), 56 | } 57 | 58 | def _decode_image(self, parsed_tensors): 59 | """Decodes the image""" 60 | image = tf.io.decode_image(parsed_tensors["image/encoded"], channels=3) 61 | image.set_shape([None, None, 3]) 62 | return image 63 | 64 | def _decode_boxes(self, parsed_tensors): 65 | """Concat box coordinates in the format of [ymin, xmin, ymax, xmax].""" 66 | xmin = parsed_tensors["image/object/bbox/xmin"] 67 | xmax = parsed_tensors["image/object/bbox/xmax"] 68 | ymin = parsed_tensors["image/object/bbox/ymin"] 69 | ymax = parsed_tensors["image/object/bbox/ymax"] 70 | return tf.stack([ymin, xmin, ymax, xmax], axis=-1) 71 | 72 | def decode(self, serialized_example): 73 | """Decode the serialized example.""" 74 | parsed_tensors = tf.io.parse_single_example( 75 | serialized_example, self.KEYS_TO_FEATURES 76 | ) 77 | for k in parsed_tensors: 78 | if isinstance(parsed_tensors[k], tf.SparseTensor): 79 | if parsed_tensors[k].dtype == tf.string: 80 | parsed_tensors[k] = tf.sparse.to_dense( 81 | parsed_tensors[k], default_value="" 82 | ) 83 | else: 84 | parsed_tensors[k] = tf.sparse.to_dense( 85 | parsed_tensors[k], default_value=0 86 | ) 87 | 88 | image = self._decode_image(parsed_tensors) 89 | boxes = self._decode_boxes(parsed_tensors) 90 | decode_image_shape = tf.logical_or( 91 | tf.equal(parsed_tensors["image/height"], -1), 92 | tf.equal(parsed_tensors["image/width"], -1), 93 | ) 94 | image_shape = tf.cast(tf.shape(image), dtype=tf.int64) 95 | 96 | parsed_tensors["image/height"] = tf.where( 97 | decode_image_shape, image_shape[0], parsed_tensors["image/height"] 98 | ) 99 | parsed_tensors["image/width"] = tf.where( 100 | decode_image_shape, image_shape[1], parsed_tensors["image/width"] 101 | ) 102 | 103 | decoded_tensors = { 104 | "image": image, 105 | "height": parsed_tensors["image/height"], 106 | "width": parsed_tensors["image/width"], 107 | "groundtruth_classes": parsed_tensors["image/object/class/label"], 108 | "groundtruth_boxes": boxes, 109 | } 110 | return decoded_tensors 111 | 112 | 113 | @FUNNEL.register_module(name="bbox") 114 | class BboxFunnel(Funnel, TFDecoderMixin): 115 | """BboxFunnel. 116 | BboxFunnel Class for Bbox dataset,This class will provide 117 | data iterable with images,bboxs or images,targets with required 118 | augmentations. 119 | """ 120 | 121 | def __init__( 122 | self, 123 | data_path: str, 124 | datatype="bbox", 125 | training=True, 126 | augmenter: augment.Augment = Provide[Container.augmenter], 127 | config: dict = Provide[Container.external_config], 128 | ): 129 | """__init__. 130 | 131 | Args: 132 | data_path: Dataset Path ,this is required in proper structure 133 | please see readme file for more details on structuring. 134 | config: Config File for setting the required configuration of datapipeline. 135 | training:Traning mode on or not? 136 | 137 | Example:: 138 | e.g 1 139 | >> funnel = Funnel(config=config, datatype = "bbox") 140 | >> data = next(iter(funnel.from_tfrecords('tfrecord_data/' , type="train"))) 141 | 142 | e.g 2: 143 | class CustomFunnel(BboxFunnel): 144 | def __init__(self, *args): 145 | super().__init__(*args) 146 | 147 | def encoder(self,args): 148 | # should be overriden if there is a need for anchors in the model. 149 | 150 | image_id, image, bbox, classes = args 151 | # make custom anchors and encode the image and bboxes as per 152 | /the model need. 153 | return image, custom_anchors, classes 154 | funnel = CustomFunnel() 155 | 156 | 157 | 158 | """ 159 | if not isinstance(data_path, str): 160 | msg = f"datapath should be str but pass {type(data_path)}." 161 | logging.error(msg) 162 | raise TypeError("Only str allowed") 163 | if not os.path.exists(data_path): 164 | msg = f"path doesnt exists" 165 | logging.error(msg) 166 | raise TypeError("Path doesnt exists") 167 | 168 | self._datatype = "bbox" 169 | self._data_path = data_path 170 | self.config = config 171 | self._training = training 172 | self._drop_remainder = self.config.get("drop_remainder", True) 173 | self.augmenter = augmenter 174 | self.numpy_function = self.config.get("numpy_function", None) 175 | self._per_shard = self.config.get("shard", 10) # hardcoded shard size 176 | self.max_instances_per_image = self.config.get("max_instances_per_image", 100) 177 | self.numpy_function = self.config.get("numpy_function", None) 178 | 179 | if self.numpy_function: 180 | assert callable(self.numpy_function), "numpy_function should be a callable." 181 | assert len( 182 | inspect.getfullargspec(self.numpy_function).args 183 | ), "py_function should be having two arguments." 184 | 185 | @property 186 | def datatype(self): 187 | return self._datatype 188 | 189 | @property 190 | def classes(self): 191 | return self._classes 192 | 193 | def parser(self, dataset_folder): 194 | """parser for reading images and bbox from tensor records.""" 195 | dataset = tf.data.Dataset.list_files( 196 | self.tf_path_pattern(os.path.join(self.data_path, dataset_folder)), 197 | shuffle=self._training, 198 | ) 199 | if self._training: 200 | dataset = dataset.repeat() 201 | dataset = dataset.interleave( 202 | self._fetch_records, num_parallel_calls=self.AUTOTUNE 203 | ) 204 | 205 | dataset = dataset.with_options(self.optimized_options) 206 | if self._training: 207 | dataset = dataset.shuffle(self._per_shard) 208 | return dataset 209 | 210 | def encoder(self): 211 | """Method expected to be overidden by the user. """ 212 | raise NotImplementedError() 213 | 214 | def decoder(self, value): 215 | """helper decoder, a wrapper around tfrecorde decoder.""" 216 | data = self.decode(value) 217 | image_id = 1.0 218 | image = data["image"] 219 | boxes = data["groundtruth_boxes"] 220 | classes = data["groundtruth_classes"] 221 | classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) 222 | return (image_id, image, boxes, classes) 223 | 224 | def from_tfrecords(self, type="train"): 225 | """tf_records. 226 | Returns a iterable tf.data dataset ,which is configured 227 | with the config file passed with require augmentations. 228 | """ 229 | dataset = self.parser(type) 230 | decode_rawdata = lambda input_records: self.decoder( 231 | input_records 232 | ) # pylint: enable=g-long-lambda 233 | dataset = dataset.map(decode_rawdata, num_parallel_calls=self.AUTOTUNE) 234 | dataset = dataset.prefetch(self.config.batch_size) 235 | 236 | # custom numpy function to inject in datapipeline. 237 | def _numpy_function(img_id, img, bbox, classes): 238 | _output = tf.numpy_function( 239 | func=self.numpy_function, 240 | inp=[img, bbox], 241 | Tout=(tf.float32, tf.float32), 242 | ) 243 | return img_id, _output[0], _output[1], classes 244 | 245 | if self._training: 246 | dataset = dataset.map( 247 | lambda image_id, image, bbox, classes: self.augmenter( 248 | image, bbox, image_id, classes, return_image_label=False 249 | ) 250 | ) 251 | if self.numpy_function: 252 | dataset = dataset.map(_numpy_function, num_parallel_calls=self.AUTOTUNE) 253 | 254 | # try if encoder is implemented. 255 | try: 256 | self.encoder() 257 | except NotImplementedError: 258 | logging.info("Encoder is not implemented,giving raw output.") 259 | else: 260 | dataset = dataset.map(lambda *args: self.encoder(*args)) 261 | 262 | # pad to fixed length. 263 | dataset = dataset.map( 264 | self.pad_to_fixed_len, 265 | num_parallel_calls=self.AUTOTUNE, 266 | ) 267 | 268 | # make batches. 269 | dataset = dataset.batch( 270 | self.config.batch_size, drop_remainder=self._drop_remainder 271 | ) 272 | dataset = self.pretraining(dataset) 273 | return dataset 274 | 275 | @property 276 | def data_path(self): 277 | return self._data_path 278 | 279 | def from_dataset(self, tfrecord_path: str = None): 280 | # TODO(kartik4949) : write me 281 | # fetch raw data 282 | raise NotImplementedError 283 | 284 | def from_remote(self, remote_path: str = None): 285 | # TODO(kartik4949) : write me 286 | # fetch remote files 287 | raise NotImplementedError 288 | 289 | 290 | @FUNNEL.register_module(name="categorical") 291 | class CategoricalTensorFunnel(Funnel): 292 | # pylint: disable=line-too-long 293 | """CategoricalTensorFunnel 294 | TensorFunnel for Categorical Data provides DataPipeline according 295 | to config passed with required augmentations. 296 | 297 | Example: *********************************************** 298 | funnel = CategoricalTensorFunnel('testdata', config=config, datatype='categorical') 299 | iterable = funnel.dataset(type = 'train') 300 | 301 | Note: This class can only be used for categorical dataset. 302 | i.e either multiclass or binary. 303 | """ 304 | # pylint: enable=line-too-long 305 | 306 | @typeguard.typechecked 307 | @inject 308 | def __init__( 309 | self, 310 | data_path: str, 311 | datatype="categorical", 312 | training=True, 313 | augmenter: augment.Augment = Provide[Container.augmenter], 314 | config: dict = Provide[Container.external_config], 315 | ): 316 | """__init__. 317 | 318 | Args: 319 | data_path: Dataset Path which should be in structure way 320 | please see readme file for more details on structuring. 321 | config: Config file , a dict file contains all required attributes 322 | to configure. 323 | datatype: Dataset Type i.e (Bbox , Labels ,Segmentations) 324 | bbox dataset is object detection dataset which will be provided in 325 | form of [image,bboxs] or [image, class_targets,bbox_targets]. 326 | training: is pipeline in training mode or not? 327 | """ 328 | 329 | if not isinstance(data_path, str): 330 | msg = f"datapath should be str but pass {type(data_path)}." 331 | logging.error(msg) 332 | raise TypeError("Only str allowed") 333 | 334 | self._datatype = datatype 335 | self._data_path = data_path 336 | self.config = config 337 | self._training = training 338 | self._shuffle_buffer = None 339 | self._batch_size = self.config.get("batch_size", 32) 340 | self._image_size = self.config.get("image_size", [512, 512]) 341 | self._drop_remainder = self.config.get("drop_remainder", True) 342 | self.augmenter = augmenter 343 | self.numpy_function = self.config.get("numpy_function", None) 344 | 345 | if self.numpy_function: 346 | assert callable(self.numpy_function), "numpy_function should be a callable." 347 | assert len( 348 | inspect.getfullargspec(self.numpy_function).args 349 | ), "py_function should be having two arguments." 350 | 351 | def categorical_encoding(self, labels): 352 | """categorical_encoding. 353 | Encodes the labels with given encoding in config file. 354 | 355 | Args: 356 | labels: Labels to encode 357 | """ 358 | encoding = ( 359 | self.config.categorical_encoding 360 | if self.config.categorical_encoding 361 | else "onehot" 362 | ) 363 | if encoding == "onehot": 364 | from sklearn.preprocessing import ( 365 | OneHotEncoder, 366 | ) # pylint: disable=g-import-not-at-top 367 | 368 | encoding = OneHotEncoder(drop="if_binary", sparse=False) 369 | else: 370 | from sklearn.preprocessing import ( 371 | LabelEncoder, 372 | ) # pylint: disable=g-import-not-at-top 373 | 374 | encoding = LabelEncoder() 375 | labels = encoding.fit_transform(labels) 376 | return labels 377 | 378 | @property 379 | def get_id_to_imagefile(self): 380 | return self._get_id_to_imagefile 381 | 382 | @property 383 | def classes(self): 384 | return self._classes 385 | 386 | @property 387 | def data_path(self): 388 | return self._data_path 389 | 390 | @property 391 | def datatype(self): 392 | return self._datatype 393 | 394 | def resize(self, image): 395 | return tf.image.resize( 396 | image, 397 | self._image_size, 398 | method=tf.image.ResizeMethod.BILINEAR, 399 | preserve_aspect_ratio=False, 400 | antialias=False, 401 | name=None, 402 | ) 403 | 404 | @get_id_to_imagefile.setter 405 | def get_id_to_imagefile(self, value): 406 | if not isinstance(value, dict): 407 | msg = "Only dict assign is allowed" 408 | logging.error(msg) 409 | raise TypeError(msg) 410 | self._get_id_to_imagefile = value 411 | 412 | def _generate_ids(self, image_files): 413 | """_generate_ids. 414 | Generate igs for the imagefiles, which will be further used 415 | to parse image file to read. 416 | 417 | Args: 418 | image_files: images files list containing filename of images. 419 | """ 420 | # TODO: (HIGH) make get_id_to_imagefile as dataclass. 421 | self._get_id_to_imagefile = {} 422 | _ = [ 423 | self._get_id_to_imagefile.update({id: image_file}) 424 | for id, image_file in enumerate(image_files) 425 | ] 426 | return list(self._get_id_to_imagefile.keys()) 427 | 428 | def _get_file_labels(self, subset): 429 | """_get_file_labels. 430 | returns files, labels which will be further used for reading images. 431 | """ 432 | _images = [] 433 | _labels = [] 434 | 435 | for label_folder in os.listdir(self.data_path + "/" + subset): 436 | for images in os.listdir( 437 | self.data_path + "/" + subset + "/" + label_folder 438 | ): 439 | _images.append( 440 | self.data_path + "/" + subset + "/" + label_folder + "/" + images 441 | ) 442 | _labels.append(label_folder) 443 | 444 | self._classes = set(_labels) 445 | _labels = np.reshape(np.asarray(_labels), (-1, 1)) 446 | _labels = self.categorical_encoding(_labels) 447 | _labels = np.reshape(np.asarray(_labels), (-1, 1)) 448 | self._size = len(_images) 449 | assert len(_images) == len(_labels), "Length of Images and Labels didnt match" 450 | return _images, _labels 451 | 452 | @typeguard.typechecked 453 | def parser(self, subset: str) -> tf.data: 454 | """parser for reading images and bbox from tensor records.""" 455 | dataset = tf.data.Dataset.from_tensor_slices(self._get_file_labels(subset)) 456 | 457 | if self._training: 458 | dataset = dataset.shuffle(self._size) 459 | dataset = dataset.repeat() 460 | 461 | dataset = dataset.map(self._read, num_parallel_calls=self.AUTOTUNE) 462 | return dataset 463 | 464 | @tf.function 465 | def _read(self, image, label): 466 | """Tensorflow Read Image helper function.""" 467 | image = tf.io.read_file(image) 468 | image = tf.io.decode_jpeg(image, try_recover_truncated=True) 469 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 470 | image = self.resize(image) 471 | return image, label 472 | 473 | def encoder(self, dataset): 474 | return dataset 475 | 476 | @typeguard.typechecked 477 | def from_dataset(self, type: str = "train") -> tf.data: 478 | """dataset. 479 | Dataset function which provides high performance tf.data 480 | iterable, which gives tuple comprising (x - image, y - labels) 481 | Iterate over the provided iterable to for feeding into custom 482 | training loop for pass it to keras model.fit. 483 | 484 | Args: 485 | type: Subset data for the current dataset i.e train,val,test. 486 | """ 487 | if type.lower() not in ["train", "val", "test", "validation"]: 488 | raise Exception("Subset Data you asked is not a valid portion") 489 | 490 | dataset = self.parser(type) 491 | dataset = dataset.prefetch(self._batch_size) 492 | 493 | # custom numpy function to inject in datapipeline. 494 | def _numpy_function(img, lbl): 495 | _output = tf.numpy_function( 496 | func=self.numpy_function, 497 | inp=[img, lbl], 498 | Tout=(tf.float32, tf.int64), 499 | ) 500 | return _output[0], _output[1] 501 | 502 | if self._training: 503 | dataset = dataset.map(self.augmenter, num_parallel_calls=self.AUTOTUNE) 504 | if self.numpy_function: 505 | dataset = dataset.map(_numpy_function, num_parallel_calls=self.AUTOTUNE) 506 | dataset = dataset.batch(self._batch_size, drop_remainder=self._drop_remainder) 507 | dataset = self.pretraining(dataset) 508 | return dataset 509 | 510 | def from_tfrecords(self, tfrecord_path: str = None): 511 | # TODO(kartik4949) : write me 512 | # fetch tf_records 513 | raise NotImplementedError 514 | 515 | def from_remote(self, remote_path: str = None): 516 | # TODO(kartik4949) : write me 517 | # fetch remote files 518 | raise NotImplementedError 519 | --------------------------------------------------------------------------------