├── noisify
    ├── __init__.py
    ├── tests
    │   ├── __init__.py
    │   ├── ecosystem
    │   │   ├── __init__.py
    │   │   ├── test_image.jpeg
    │   │   ├── test_pandas.py
    │   │   ├── test_numpy.py
    │   │   └── test_pillow.py
    │   ├── faults
    │   │   ├── __init__.py
    │   │   ├── test_faults.py
    │   │   ├── test_attribute_faults.py
    │   │   └── test_report_faults.py
    │   ├── helpers
    │   │   ├── __init__.py
    │   │   ├── test_init_saving.py
    │   │   └── test_fallible.py
    │   ├── recipes
    │   │   ├── __init__.py
    │   │   ├── test_human_error.py
    │   │   └── test_machine_error.py
    │   ├── reporters
    │   │   ├── __init__.py
    │   │   ├── test_series.py
    │   │   └── test_reports.py
    │   └── attributes
    │   │   ├── __init__.py
    │   │   ├── test_object_attribute_introspection.py
    │   │   └── test_attribute.py
    ├── recipes
    │   ├── __init__.py
    │   └── default_recipes.py
    ├── reporters
    │   ├── __init__.py
    │   ├── report.py
    │   ├── series.py
    │   └── reporter.py
    ├── helpers
    │   ├── __init__.py
    │   ├── saved_init_statement.py
    │   ├── multi_dispatch.py
    │   └── fallible.py
    ├── faults
    │   ├── __init__.py
    │   ├── utilities.py
    │   ├── report_faults.py
    │   ├── fault.py
    │   └── attribute_faults.py
    └── attribute_readers
    │   ├── __init__.py
    │   ├── inspection_strategies.py
    │   └── attribute_readers.py
├── docs
    ├── _static
    │   ├── dstl.jpg
    │   ├── noisify.jpg
    │   └── noisy_noisify.jpg
    ├── noisify.recipes.rst
    ├── api.rst
    ├── community
    │   ├── support.rst
    │   ├── updates.rst
    │   └── release-process.rst
    ├── noisify.reporters.rst
    ├── noisify.helpers.rst
    ├── noisify.attributes.rst
    ├── basics
    │   ├── install.rst
    │   ├── introduction.rst
    │   ├── advanced.rst
    │   └── quickstart.rst
    ├── Makefile
    ├── noisify.faults.rst
    ├── index.rst
    └── conf.py
├── setup.py
├── HISTORY.md
├── LICENSE.txt
└── README.md


/noisify/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """


--------------------------------------------------------------------------------
/noisify/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """


--------------------------------------------------------------------------------
/noisify/tests/ecosystem/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """


--------------------------------------------------------------------------------
/noisify/tests/faults/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """


--------------------------------------------------------------------------------
/noisify/tests/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """


--------------------------------------------------------------------------------
/noisify/tests/recipes/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """


--------------------------------------------------------------------------------
/noisify/tests/reporters/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """


--------------------------------------------------------------------------------
/noisify/tests/attributes/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """


--------------------------------------------------------------------------------
/docs/_static/dstl.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dstl/Noisify/HEAD/docs/_static/dstl.jpg


--------------------------------------------------------------------------------
/docs/_static/noisify.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dstl/Noisify/HEAD/docs/_static/noisify.jpg


--------------------------------------------------------------------------------
/docs/_static/noisy_noisify.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dstl/Noisify/HEAD/docs/_static/noisy_noisify.jpg


--------------------------------------------------------------------------------
/noisify/recipes/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """
4 | from .default_recipes import *
5 | 


--------------------------------------------------------------------------------
/noisify/tests/ecosystem/test_image.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dstl/Noisify/HEAD/noisify/tests/ecosystem/test_image.jpeg


--------------------------------------------------------------------------------
/noisify/reporters/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """
4 | from .reporter import Reporter
5 | from .series import Noisifier
6 | 


--------------------------------------------------------------------------------
/noisify/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """
4 | from .saved_init_statement import SavedInitStatement
5 | from .fallible import Fallible
6 | 


--------------------------------------------------------------------------------
/noisify/faults/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """
4 | from .fault import Fault
5 | from .attribute_faults import *
6 | from .report_faults import *
7 | 


--------------------------------------------------------------------------------
/docs/noisify.recipes.rst:
--------------------------------------------------------------------------------
1 | noisify.recipes package
2 | =======================
3 | 
4 | .. automodule:: noisify.recipes.default_recipes
5 |     :members:
6 |     :show-inheritance:
7 | 


--------------------------------------------------------------------------------
/noisify/attribute_readers/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | .. Dstl (c) Crown Copyright 2019
3 | """
4 | from .attribute_readers import *
5 | from noisify.attribute_readers.inspection_strategies import *
6 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. _api:
 2 | 
 3 | API reference
 4 | =============
 5 | 
 6 | .. toctree::
 7 | 
 8 |     noisify.attributes
 9 |     noisify.faults
10 |     noisify.helpers
11 |     noisify.recipes
12 |     noisify.reporters
13 | 


--------------------------------------------------------------------------------
/docs/community/support.rst:
--------------------------------------------------------------------------------
 1 | .. _support:
 2 | 
 3 | Support
 4 | =======
 5 | 
 6 | File an Issue
 7 | -------------
 8 | 
 9 | If you spot a bug in noisify, or would like to suggest an additional feature, you can
10 | `use our issue tracker on GitHub <https://github.com/dstl/noisify/issues>`_.


--------------------------------------------------------------------------------
/docs/community/updates.rst:
--------------------------------------------------------------------------------
 1 | .. _updates:
 2 | 
 3 | 
 4 | Community Updates
 5 | =================
 6 | 
 7 | GitHub
 8 | ------
 9 | 
10 | The latest information on the status of the project is available on
11 | `the GitHub repo <https://github.com/dstl/noisify>`_.
12 | 
13 | 
14 | Release and Version History
15 | ---------------------------
16 | 
17 | .. include:: ../../HISTORY.md
18 | 


--------------------------------------------------------------------------------
/docs/noisify.reporters.rst:
--------------------------------------------------------------------------------
 1 | noisify.reporters package
 2 | =========================
 3 | 
 4 | noisify.reporters.reporter module
 5 | ---------------------------------
 6 | 
 7 | .. automodule:: noisify.reporters.reporter
 8 |     :members:
 9 |     :show-inheritance:
10 | 
11 | noisify.reporters.series module
12 | -------------------------------
13 | 
14 | .. automodule:: noisify.reporters.series
15 |     :members:
16 |     :show-inheritance:
17 | 


--------------------------------------------------------------------------------
/docs/noisify.helpers.rst:
--------------------------------------------------------------------------------
 1 | noisify.helpers package
 2 | =======================
 3 | 
 4 | noisify.helpers.fallible module
 5 | -------------------------------
 6 | 
 7 | .. automodule:: noisify.helpers.fallible
 8 |     :members:
 9 |     :show-inheritance:
10 | 
11 | noisify.helpers.saved\_init\_statement module
12 | ---------------------------------------------
13 | 
14 | .. automodule:: noisify.helpers.saved_init_statement
15 |     :members:
16 |     :show-inheritance:
17 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | from setuptools import setup, find_packages
 5 | 
 6 | setup(name='noisify',
 7 |       version='1.0',
 8 |       description='Framework for creating synthetic data with realistic errors for refining data science pipelines.',
 9 |       url='',
10 |       author='Declan Crew',
11 |       author_email='dcrew@dstl.gov.uk',
12 |       license='MIT',
13 |       packages=find_packages(),
14 |       install_requires=[],
15 |       test_suite='noisify.tests',
16 |       test_requires=['numpy', 'Pillow', 'pandas'])
17 | 


--------------------------------------------------------------------------------
/docs/noisify.attributes.rst:
--------------------------------------------------------------------------------
 1 | noisify.attribute_readers package
 2 | =================================
 3 | 
 4 | noisify.attribute_readers.attribute_readers module
 5 | --------------------------------------------------
 6 | 
 7 | .. automodule:: noisify.attribute_readers.attribute_readers
 8 |     :members:
 9 |     :show-inheritance:
10 | 
11 | noisify.attribute_readers.inspection_strategies module
12 | ------------------------------------------------------
13 | 
14 | .. automodule:: noisify.attribute_readers.inspection_strategies
15 |     :members:
16 |     :show-inheritance:
17 | 


--------------------------------------------------------------------------------
/docs/basics/install.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | Installation
 4 | ========================
 5 | 
 6 | 
 7 | Noisify is hosted on `the PyPI central repo <https://pypi.org/>`_ and can be installed as follows
 8 | 
 9 |     $ pip install noisify
10 | 
11 | The only dependency is Python 3.5+ !
12 | 
13 | Build from Source
14 | -------------------
15 | 
16 | If you would prefer to install the latest version of the code, or perhaps to modify or contribute to it, the project
17 | is `hosted on GitHub <https://github.com/dstl/noisify>`_.
18 | 
19 |     $ git clone https://github.com/dstl/noisify.git
20 | 
21 |     $ cd noisify
22 | 
23 |     $ pip install .
24 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/noisify/reporters/report.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | 
 5 | 
 6 | class Report:
 7 |     """Report class, stores the noisified data with the faults and ground truth
 8 |     for future use. Delegates all methods and attribute_readers to the observed item."""
 9 |     def __init__(self, identifier, truth, triggered_faults, observed):
10 |         self.identifier = identifier
11 |         self.truth = truth
12 |         self.triggered_faults = triggered_faults
13 |         self.observed = observed
14 | 
15 |     def __getattr__(self, item):
16 |         return getattr(self.observed, item)
17 | 
18 |     def __getitem__(self, item):
19 |         return self.observed[item]
20 | 


--------------------------------------------------------------------------------
/docs/basics/introduction.rst:
--------------------------------------------------------------------------------
 1 | .. _introduction:
 2 | 
 3 | Introduction
 4 | ============
 5 | 
 6 | 
 7 | Background
 8 | ----------
 9 | 
10 | Noisify is a project by Dstl (the Defence Science and Technology Laboratory). We are an executive agency of the UK
11 | Ministry of Defence.
12 | 
13 | Noisify was developed in part to expand the work done in image augmentation to other forms of data, and also to help
14 | test and perfect data cleaning and processing pipelines.
15 | 
16 | Copyright and usage information
17 | ---------------
18 | 
19 | Crown Copyright 2019
20 | 
21 | 
22 | Noisify is released under the terms of the `MIT licence`_.
23 | 
24 | .. _`MIT licence`: https://opensource.org/licenses/MIT
25 | 


--------------------------------------------------------------------------------
/docs/noisify.faults.rst:
--------------------------------------------------------------------------------
 1 | noisify.faults package
 2 | ======================
 3 | 
 4 | noisify.faults.fault module
 5 | ---------------------------
 6 | 
 7 | .. automodule:: noisify.faults.fault
 8 |     :members:
 9 | 
10 | noisify.faults.attribute\_faults module
11 | ---------------------------------------
12 | 
13 | .. automodule:: noisify.faults.attribute_faults
14 |     :members:
15 | 
16 | 
17 | noisify.faults.report\_faults module
18 | ------------------------------------
19 | 
20 | .. automodule:: noisify.faults.report_faults
21 |     :members:
22 | 
23 | noisify.faults.utilities module
24 | -------------------------------
25 | 
26 | .. automodule:: noisify.faults.utilities
27 |     :members:
28 | 


--------------------------------------------------------------------------------
/noisify/helpers/saved_init_statement.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import inspect
 5 | 
 6 | 
 7 | class SavedInitStatement:
 8 |     """Init statement saving mixin, introspects on object instantiation arguments and
 9 |     saves them to the final object"""
10 | 
11 |     def __init__(self, *args, **kwargs):
12 |         frame = inspect.currentframe()
13 |         _, _, _, values = inspect.getargvalues(frame)
14 |         value_set = [str(values['args'])]
15 |         if kwargs:
16 |             if values['args']:
17 |                 value_set += [str(values['kwargs'])]
18 |             else:
19 |                 value_set = [str(values['kwargs'])]
20 |         self.init_statement = '-'.join(value_set)
21 |         pass
22 | 


--------------------------------------------------------------------------------
/HISTORY.md:
--------------------------------------------------------------------------------
 1 | v1.0
 2 | ----
 3 | - Initial release!
 4 | 
 5 | v0.9
 6 | ----
 7 | - Looping behaviour for infinite generation
 8 | - Initial documentation
 9 | 
10 | v0.8
11 | ----
12 | - Ecosystem support for pandas, pil etc.
13 | 
14 | v0.7
15 | ----
16 | - Type annotation dispatch added to priority dispatch mechanism
17 | 
18 | v0.6
19 | ----
20 | - Renamed to Noisify
21 | - First recipes
22 | 
23 | v0.5
24 | ----
25 | - Priority dispatch mechanism first built
26 | 
27 | v0.4
28 | ----
29 | - First reporter level faults
30 | - Attribute introspection
31 | 
32 | v0.3
33 | ----
34 | - Add composability by overloading addition to faults and reporters
35 | 
36 | v0.2
37 | ----
38 | - Major rewrite from 0.1, focuses purely on fault generation.
39 | 
40 | v0.1
41 | ----
42 | - Simulation and data augmentation together, too messy
43 | 


--------------------------------------------------------------------------------
/noisify/tests/helpers/test_init_saving.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.helpers import SavedInitStatement
 6 | from ast import literal_eval
 7 | 
 8 | 
 9 | class TestInitSaving(unittest.TestCase):
10 |     def test_args(self):
11 |         new_object = SavedInitStatement('a', 'b', 'c')
12 |         self.assertEqual(literal_eval(new_object.init_statement), ('a', 'b', 'c'))
13 |         pass
14 | 
15 |     def test_kwargs(self):
16 |         new_object = SavedInitStatement(a='1', b='2', c='3')
17 |         self.assertEqual(literal_eval(new_object.init_statement), {'a': '1', 'b': '2', 'c': '3'})
18 |         pass
19 | 
20 |     def test_both(self):
21 |         new_object = SavedInitStatement('a', 'b', c=1, d=2)
22 |         args, kwargs = new_object.init_statement.split('-')
23 |         self.assertEqual(literal_eval(args), ('a', 'b'))
24 |         self.assertEqual(literal_eval(kwargs), {'c': 1, 'd': 2})
25 |         pass
26 | 


--------------------------------------------------------------------------------
/noisify/tests/faults/test_faults.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.faults import Fault
 6 | 
 7 | 
 8 | class AddOneFault(Fault):
 9 |     @register_implementation(priority=1)
10 |     def add_to_int_string(self, integer_string_object):
11 |         return integer_string_object + "1"
12 | 
13 |     @register_implementation(priority=2)
14 |     def make_uppercase(self, lowercase_string):
15 |         return lowercase_string.upper()
16 | 
17 | 
18 | class TestGeneralFaultBehaviour(unittest.TestCase):
19 |     def test_missing_implementation(self):
20 |         p_fault = AddOneFault()
21 | 
22 |         class UselessClass:
23 |             pass
24 | 
25 |         useless_object = UselessClass()
26 |         self.assertEqual(useless_object, p_fault.impact(useless_object))
27 |         pass
28 | 
29 |     def test_fault_priority(self):
30 |         p_fault = AddOneFault()
31 |         test_string = 'this is a test'
32 |         self.assertEqual(p_fault.impact(test_string), 'THIS IS A TEST')
33 |         pass
34 | 


--------------------------------------------------------------------------------
/noisify/tests/attributes/test_object_attribute_introspection.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.attribute_readers import dictionary_lookup, object_attributes_lookup
 6 | 
 7 | 
 8 | class TestObjectAttributeIntrospection(unittest.TestCase):
 9 |     def test_dict_to_attributes(self):
10 |         test_object = {'test1': 1, 'test2': 2}
11 |         attributes = [a for a in dictionary_lookup(test_object)]
12 |         attribute_names = set(a.attribute_identifier for a in attributes)
13 |         expected_names = {'test1', 'test2'}
14 |         self.assertEqual(attribute_names, expected_names)
15 |         pass
16 | 
17 |     def test_object_to_attributes(self):
18 |         class Tester:
19 |             test1 = 1
20 |             test2 = 2
21 |             pass
22 |         test_object = Tester()
23 |         attributes = [a for a in object_attributes_lookup(test_object)]
24 |         attribute_names = set(a.attribute_identifier for a in attributes)
25 |         expected_names = {'test1', 'test2'}
26 |         self.assertEqual(attribute_names, expected_names)
27 |         pass
28 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Dstl (c) Crown Copyright 2019
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/noisify/tests/recipes/test_human_error.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.recipes.default_recipes import human_error
 6 | 
 7 | 
 8 | class TestHumanError(unittest.TestCase):
 9 |     def test_multiple_series(self):
10 |         test_noisifier = human_error(0)
11 |         test_input = [{'test1': 1}, {'test2': 2}]
12 |         test_output = [i.observed for i in test_noisifier(test_input)]
13 |         for input_term, output_term in zip(test_input, test_output):
14 |             self.assertEqual(input_term, output_term)
15 |         pass
16 | 
17 |     def test_single(self):
18 |         test_noisifier = human_error(0)
19 |         test_input = {'test1': 1}
20 |         test_output = [i.observed for i in test_noisifier(test_input)][0]
21 |         self.assertEqual(test_input, test_output)
22 |         pass
23 | 
24 |     def test_noise(self):
25 |         test_noisifier = human_error(10)
26 |         test_input = {'test1': 'hello', 'test2': 'world'}
27 |         test_output = [i.observed for i in test_noisifier(test_input)][0]
28 |         self.assertNotEqual(test_input, test_output)
29 |         pass
30 | 


--------------------------------------------------------------------------------
/noisify/tests/recipes/test_machine_error.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.recipes.default_recipes import machine_error
 6 | 
 7 | 
 8 | class TestMachineError(unittest.TestCase):
 9 |     def test_multiple_series(self):
10 |         test_noisifier = machine_error(0)
11 |         test_input = [{'test1': 1}, {'test2': 2}]
12 |         test_output = [i.observed for i in test_noisifier(test_input)]
13 |         for input_term, output_term in zip(test_input, test_output):
14 |             self.assertEqual(input_term, output_term)
15 |         pass
16 | 
17 |     def test_single(self):
18 |         test_noisifier = machine_error(0)
19 |         test_input = {'test1': 1}
20 |         test_output = [i.observed for i in test_noisifier(test_input)][0]
21 |         self.assertEqual(test_input, test_output)
22 |         pass
23 | 
24 |     def test_noise(self):
25 |         test_noisifier = machine_error(100)  # Always interrupts
26 |         test_input = {'test1': 'hello', 'test2': 'world'}
27 |         test_output = [i.observed for i in test_noisifier(test_input)][0]
28 |         self.assertNotEqual(test_input, test_output)
29 |         pass
30 | 


--------------------------------------------------------------------------------
/noisify/attribute_readers/inspection_strategies.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | Inspection strategies are used by reporters to create attribute_readers for given objects when none are specified.
 4 | """
 5 | from noisify.attribute_readers import DictValue, ObjectAttribute
 6 | 
 7 | 
 8 | def dictionary_lookup(unknown_dictionary, attribute_faults=None):
 9 |     """
10 |     Generates attribute_readers for each key/value pair of a given dictionary, enables
11 |     reporters to map faults across dictionaries without further specification.
12 |     """
13 |     if hasattr(unknown_dictionary, 'keys'):
14 |         for identifier in unknown_dictionary.keys():
15 |             yield DictValue(identifier, faults=attribute_faults)
16 | 
17 | 
18 | def object_attributes_lookup(unknown_object, attribute_faults=None):
19 |     """
20 |     Generates attribute_readers for each attribute of a given object, enables
21 |     reporters to map faults across objects without further specification.
22 |     Ignores methods and private attributes marked with '_'.
23 |     """
24 |     for attribute in dir(unknown_object):
25 |         if not callable(attribute) and attribute[0] != '_':
26 |             yield ObjectAttribute(attribute, faults=attribute_faults)
27 | 


--------------------------------------------------------------------------------
/noisify/recipes/default_recipes.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | Default recipes, these are extremely simple and are mainly to provide examples for developing your own code.
 4 | """
 5 | from noisify.faults import TypographicalFault, ScrambleAttributes, GaussianNoise, InterruptionFault
 6 | from noisify.reporters import Noisifier, Reporter
 7 | 
 8 | 
 9 | def human_error(scale):
10 |     """
11 |     Simple example Noisifier recipe, applies typos and attribute scrambling to the input depending
12 |     on the scale given, recommended scale range from 1-10
13 |     """
14 |     return Noisifier(
15 |         reporter=Reporter(
16 |             faults=[TypographicalFault(likelihood=min(1, 0.1*scale), severity=0.1*scale),
17 |                     ScrambleAttributes(likelihood=0.1 * scale)]
18 |         ),
19 |         faults=None
20 |     )
21 | 
22 | 
23 | def machine_error(scale):
24 |     """
25 |     Simple example Noisifier recipe, applies gaussian noise and occasional interruptions to the input
26 |     depending on the scale given, recommended scale range from 1-10
27 |     """
28 |     return Noisifier(
29 |         reporter=Reporter(
30 |             faults=[GaussianNoise(sigma=0.1*scale),
31 |                     InterruptionFault(likelihood=min(1, 0.01*scale))]
32 |         ),
33 |         faults=None
34 |     )
35 | 


--------------------------------------------------------------------------------
/noisify/tests/ecosystem/test_pandas.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | import pandas as pd
 6 | import pandas.testing as pd_testing
 7 | from noisify.faults import GaussianNoise, InterruptionFault, CalibrationFault
 8 | 
 9 | 
10 | class TestPandas(unittest.TestCase):
11 |     def test_gaussian(self):
12 |         test_frame = pd.DataFrame({'col1': range(5), 'col2': range(5)})
13 |         noise = GaussianNoise(1)
14 |         noisy_frame = noise.impact(test_frame)
15 |         with self.assertRaises(AssertionError):
16 |             pd_testing.assert_frame_equal(test_frame, noisy_frame)
17 |         pass
18 | 
19 |     def test_interruption(self):
20 |         test_frame = pd.DataFrame({'col1': range(5), 'col2': range(5)})
21 |         interrupt = InterruptionFault(0.5)
22 |         noisy_frame = interrupt.impact(test_frame)
23 |         with self.assertRaises(AssertionError):
24 |             pd_testing.assert_frame_equal(test_frame, noisy_frame)
25 |         pass
26 | 
27 |     def test_calibration(self):
28 |         test_frame = pd.DataFrame({'col1': range(5), 'col2': range(5)})
29 |         calibrate = CalibrationFault(1.5)
30 |         noisy_frame = calibrate.impact(test_frame)
31 |         with self.assertRaises(AssertionError):
32 |             pd_testing.assert_frame_equal(test_frame, noisy_frame)
33 |         pass
34 | 


--------------------------------------------------------------------------------
/noisify/tests/ecosystem/test_numpy.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | import numpy as np
 6 | from noisify.faults import *
 7 | from noisify.recipes.default_recipes import human_error
 8 | 
 9 | class TestNumpy(unittest.TestCase):
10 |     def test_gaussian(self):
11 |         noise = GaussianNoise(sigma=1)
12 |         test_array = np.array(range(10))
13 |         out_array = noise.impact(test_array)
14 |         self.assertNotEqual(test_array.all(), out_array.all())
15 |         self.assertNotEqual(test_array[0]-out_array[0], test_array[1]-out_array[1])
16 |         pass
17 | 
18 |     def test_interruption(self):
19 |         interrupt = InterruptionFault(0.5)
20 |         test_array = np.array(range(10))
21 |         out_array = interrupt.impact(test_array)
22 |         self.assertNotEqual(test_array.tolist(), out_array.tolist())
23 |         pass
24 | 
25 |     def test_calibration(self):
26 |         calibrate = CalibrationFault(0.5)
27 |         test_array = np.array(range(10))
28 |         out_array = calibrate.impact(test_array)
29 |         self.assertNotEqual(test_array.tolist(), out_array.tolist())
30 |         pass
31 | 
32 |     def test_human_error(self):
33 |         error = human_error(10)
34 |         test_array = np.array(range(10))
35 |         out_array = list(error(test_array))[0].observed
36 |         self.assertNotEqual(test_array.tolist(), out_array.tolist())
37 |         pass


--------------------------------------------------------------------------------
/noisify/helpers/multi_dispatch.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | 
 5 | 
 6 | def register_implementation(priority=-1):
 7 |     """
 8 |     Decorator for use with MultipleDispatch derived classes, accepts a priority numeric. It does not need to be
 9 |     imported.
10 |     """
11 |     def wrap(func):
12 |         func._priority = priority
13 |         return func
14 |     return wrap
15 | 
16 | 
17 | class MultipleDispatch(type):
18 |     """
19 |     Metaclass for providing a priority queue of methods under cls._implementations which can be used for multiple
20 |     dispatch in a duck typing fashion. Larger priorities take precedence.
21 |     """
22 |     @classmethod
23 |     def __prepare__(mcs, name, bases):
24 |         return {'register_implementation': register_implementation}
25 | 
26 |     def __new__(cls, name, base, attrs):
27 |         implementations = [(method, method._priority) for method in attrs.values() if hasattr(method, '_priority')]
28 |         if implementations:
29 |             attrs['_implementations'] = [i for i in implementations]
30 |             for parent_implementations in (getattr(b, '_implementations', None) for b in base):
31 |                 if parent_implementations:
32 |                     attrs['_implementations'] += parent_implementations
33 |             attrs['_implementations'].sort(key=lambda x: x[1], reverse=True)
34 |             del attrs['register_implementation']
35 |         return super(MultipleDispatch, cls).__new__(cls, name, base, attrs)
36 | 


--------------------------------------------------------------------------------
/docs/community/release-process.rst:
--------------------------------------------------------------------------------
 1 | Release Process and Rules
 2 | =========================
 3 | 
 4 | All code that adds new features will be required to implement unit tests to ensure that it does not introduce
 5 | unexpected behaviour.
 6 | 
 7 | Pull requests that add new features will be very gladly accepted! Try and keep them small if possible. Larger requests
 8 | will naturally take longer for us to review. Please avoid adding any dependencies, if you're adding support for an extra
 9 | library then make sure this extra support is done in an optional way (importing a library in an implementation will
10 | skip the implementation if the library is not installed, please use this for ecosystem support).
11 | 
12 | Most importantly however, thank you for contributing back to Noisify!
13 | 
14 | Versioning follows the `Semantic Versioning <https://semver.org/>`_ framework.
15 | 
16 | Major Releases
17 | --------------
18 | 
19 | The first number in the version number is the major release (i.e ``vX.0.0``). Changes to the core
20 | API that are not backwards compatible will result in a new major release version.
21 | Releases of this nature will be infrequent.
22 | 
23 | Minor Releases
24 | --------------
25 | 
26 | Minor releases will change the second number of the version number (i.e ``v0.Y.0``),
27 | these releases will add new features, but will be fully backwards compatible with
28 | prior versions.
29 | 
30 | Hotfix Releases
31 | ---------------
32 | 
33 | Hotfix releases will change the final number of the version (i.e ``v0.0.Z``),
34 | these releases will consist of bug fixes between versions.
35 | 


--------------------------------------------------------------------------------
/noisify/tests/faults/test_attribute_faults.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.faults.attribute_faults import *
 6 | 
 7 | 
 8 | class TestBasicFaults(unittest.TestCase):
 9 |     def test_gaussian(self):
10 |         p_fault = GaussianNoise(sigma=0.1)
11 |         true_value = 100
12 |         observed = p_fault.impact(true_value)
13 |         error_amount = abs(true_value - observed) / true_value
14 |         self.assertLess(error_amount, 0.15)
15 |         self.assertNotEqual(true_value, observed)
16 |         pass
17 | 
18 |     def test_unit_fault(self):
19 |         u_fault = UnitFault(unit_modifier=lambda x : x+10)
20 |         true_value = 100
21 |         observed = u_fault.impact(true_value)
22 |         self.assertNotEqual(true_value, observed)
23 |         self.assertEqual(true_value+10, observed)
24 |         pass
25 | 
26 |     def test_calibration_fault(self):
27 |         c_fault = CalibrationFault(offset=10)
28 |         true_value = 100
29 |         observed = c_fault.impact(true_value)
30 |         self.assertNotEqual(true_value, observed)
31 |         self.assertEqual(true_value+10, observed)
32 |         pass
33 | 
34 |     def test_interruption(self):
35 |         i_fault = InterruptionFault(likelihood=1)
36 |         true_value = 100
37 |         observed = i_fault.impact(true_value)
38 |         self.assertEqual(observed, None)
39 |         pass
40 | 
41 |     def test_typo(self):
42 |         t_fault = TypographicalFault(likelihood=1, severity=1)
43 |         true_value = "Hello World"
44 |         observed = t_fault.impact(true_value)
45 |         self.assertNotEqual(true_value, observed)
46 |         pass
47 | 


--------------------------------------------------------------------------------
/noisify/tests/helpers/test_fallible.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.faults import GaussianNoise
 6 | from noisify.helpers.fallible import Fallible, evaluate_faults
 7 | from ast import literal_eval
 8 | 
 9 | 
10 | class TestFaultEvaluation(unittest.TestCase):
11 |     def test_strategy(self):
12 |         def fault_strategy():
13 |             return [GaussianNoise(sigma=0.1)]
14 |         faults = evaluate_faults(fault_strategy)
15 |         self.assertEqual(len(faults), 1)
16 |         self.assertIsInstance(faults[0], GaussianNoise)
17 |         self.assertEqual(literal_eval(faults[0].init_statement), {'sigma': 0.1})
18 |         pass
19 | 
20 |     def test_single_fault(self):
21 |         faults = evaluate_faults(GaussianNoise(sigma=0.1))
22 |         self.assertEqual(len(faults), 1)
23 |         self.assertIsInstance(faults[0], GaussianNoise)
24 |         self.assertEqual(literal_eval(faults[0].init_statement), {'sigma': 0.1})
25 |         pass
26 | 
27 |     def test_fault_collection(self):
28 |         faults = evaluate_faults([GaussianNoise(sigma=0.1), GaussianNoise(sigma=0.2)])
29 |         self.assertEqual(len(faults), 2)
30 |         for f in faults:
31 |             self.assertIsInstance(f, GaussianNoise)
32 |         self.assertEqual(literal_eval(faults[0].init_statement), {'sigma': 0.1})
33 |         self.assertEqual(literal_eval(faults[1].init_statement), {'sigma': 0.2})
34 |         pass
35 | 
36 | 
37 | class TestFallible(unittest.TestCase):
38 |     def test_fault_application(self):
39 |         constitutively_fallible_object = Fallible(GaussianNoise(sigma=0.1))
40 |         faults, result = constitutively_fallible_object.apply_all_faults(100)
41 |         self.assertEqual(len(faults), 1)
42 |         self.assertIsInstance(faults[0], GaussianNoise)
43 |         self.assertEqual(literal_eval(faults[0].init_statement), {'sigma': 0.1})
44 |         self.assertIsNot(100, result)
45 |         pass
46 | 


--------------------------------------------------------------------------------
/noisify/tests/attributes/test_attribute.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.attribute_readers import AttributeReader, DictValue, ObjectAttribute
 6 | from noisify.faults import GaussianNoise, InterruptionFault
 7 | 
 8 | 
 9 | class TestAttributeLookup(unittest.TestCase):
10 |     def test_object_attribute_lookup(self):
11 |         test_attribute = ObjectAttribute('test')
12 | 
13 |         class Foo:
14 |             test = 'test_attribute'
15 |         bar = Foo()
16 |         self.assertIs(test_attribute.get_value(bar), 'test_attribute')
17 |         pass
18 | 
19 |     def test_dict_lookup(self):
20 |         test_attribute = DictValue('test')
21 |         bar = {'test': 'test_attribute'}
22 |         self.assertIs(test_attribute.get_value(bar), 'test_attribute')
23 |         pass
24 | 
25 |     def test_dict_fail(self):
26 |         test_attribute = DictValue('test')
27 |         with self.assertRaises(TypeError):
28 |             test_attribute.get_value(None)
29 |         pass
30 | 
31 |     def test_attribute_fail(self):
32 |         test_attribute = ObjectAttribute('test')
33 |         with self.assertRaises(AttributeError):
34 |             test_attribute.get_value(None)
35 |         pass
36 | 
37 |     def test_addition(self):
38 |         test_attribute1 = AttributeReader('test', faults=GaussianNoise(sigma=1))
39 |         test_attribute2 = AttributeReader('test', faults=InterruptionFault(likelihood=1))
40 |         new_attribute = test_attribute1 + test_attribute2
41 |         self.assertIs(len(new_attribute.faults), 2)
42 |         self.assertIsInstance(new_attribute.faults[0], GaussianNoise)
43 |         self.assertIsInstance(new_attribute.faults[1], InterruptionFault)
44 |         pass
45 | 
46 |     def test_invalid_addition(self):
47 |         test_attribute1 = AttributeReader('test')
48 |         test_attribute2 = AttributeReader('test2')
49 |         with self.assertRaises(TypeError):
50 |             test_attribute1 + test_attribute2
51 |         pass
52 | 


--------------------------------------------------------------------------------
/noisify/helpers/fallible.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import copy
 5 | 
 6 | 
 7 | class Fallible:
 8 |     """
 9 |     Fallible mixin, adds faults to an object as well as getters and setters.
10 |     Also provides methods for applying faults to an object.
11 |     """
12 |     def __init__(self, faults):
13 |         if faults:
14 |             self.faults = evaluate_faults(faults)
15 |         else:
16 |             self.faults = []
17 | 
18 |     def add_fault(self, fault):
19 |         """
20 |         Add a fault to the fallible object
21 | 
22 |         :param fault:
23 |         :return:
24 |         """
25 |         self.faults.append(fault)
26 |         return self
27 | 
28 |     def apply_all_faults(self, incompletely_flawed_object):
29 |         """
30 |         Runs through the fallible objects faults and applies them to an object, returns
31 |         activated faults as well as the finished object
32 | 
33 |         :param incompletely_flawed_object:
34 |         :return:
35 |         """
36 |         applied_faults = []
37 |         for fault in self.faults:
38 |             applied_fault, result = fault.apply(incompletely_flawed_object)
39 |             if applied_fault:
40 |                 incompletely_flawed_object = result
41 |                 applied_faults.append(applied_fault)
42 |         return applied_faults, incompletely_flawed_object
43 | 
44 |     def __add__(self, other):
45 |         clone = copy.deepcopy(self)
46 |         clone.faults += other.faults
47 |         return clone
48 | 
49 | 
50 | def evaluate_faults(faults):
51 |     """
52 |     Enables faults to be given as a single fault, or a list of faults,
53 |     or a function to generate a fault or list of faults,
54 |     to the instantiation of the fallible object.
55 | 
56 |     :param faults:
57 |     :return:
58 |     """
59 |     from noisify.faults import Fault
60 |     if isinstance(faults, Fault):
61 |         return [faults]
62 |     try:
63 |         return evaluate_faults(faults())
64 |     except TypeError:
65 |         return [i for i in faults if isinstance(i, Fault)]
66 | 


--------------------------------------------------------------------------------
/noisify/faults/utilities.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | 
 4 | Fault utility functions, general purpose code that is used by multiple functions.
 5 | """
 6 | import random
 7 | 
 8 | 
 9 | def scramble(collection, scrambledness, confusion_range):
10 |     """
11 |     Scrambles the order of objects in a collection using a gaussian distribution, can lead to
12 |     duplicate objects
13 | 
14 |     :param collection:
15 |     :param scrambledness: How likely two objects are to be switched
16 |     :param confusion_range: How far apart objects can be confused with one another
17 |     :return:
18 |     """
19 |     new_collection = []
20 |     visited_indices = set()
21 |     for index, item in enumerate(collection):
22 |         if random.random() <= scrambledness:
23 |             index = int(abs(index + (confusion_range * (random.random() - 0.5))))
24 |         if index not in visited_indices and index < len(collection):
25 |             new_collection.append(collection[index])
26 |             visited_indices.add(index)
27 |         else:
28 |             closest_remaining = float('inf')
29 |             closest_index = None
30 |             for unvisited_index in range(len(collection)):
31 |                 if unvisited_index in visited_indices:
32 |                     continue
33 |                 difference = abs(unvisited_index - index)
34 |                 if difference < closest_remaining:
35 |                     closest_remaining = difference
36 |                     closest_index = unvisited_index
37 |             new_collection.append(collection[closest_index])
38 |     return new_collection
39 | 
40 | 
41 | def dropped_scramble(collection, scrambledness, confusion_range):
42 |     """
43 |     Scrambles objects in a collection, with a chance to lose some objects
44 | 
45 |     :param collection:
46 |     :param scrambledness: How likely two objects are to be switched
47 |     :param confusion_range: How far apart objects can be confused with one another
48 |     :return:
49 |     """
50 |     return [i for i in scramble(collection, scrambledness, confusion_range) if random.random() > scrambledness / 10]
51 | 


--------------------------------------------------------------------------------
/noisify/tests/faults/test_report_faults.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.faults import ScrambleAttributes, ConfuseSpecificAttributes, GaussianNoise
 6 | from noisify.reporters import Reporter
 7 | from noisify.attribute_readers import DictValue
 8 | 
 9 | 
10 | class TestReportFaults(unittest.TestCase):
11 |     def test_attribute_scrambling(self):
12 |         attributes_list = [DictValue('att%d' % index) for index in range(50)]
13 |         new_reporter = Reporter(attributes=attributes_list)
14 |         data = {'att%d' % index: index for index in range(50)}
15 |         output_data = new_reporter(data).observed
16 |         scramble_fault = ScrambleAttributes(likelihood=100)
17 |         output_data = scramble_fault.impact_dictionary(output_data)
18 |         self.assertNotEqual(data, output_data)
19 |         self.assertEqual({i for i in data.keys()}, {i for i in output_data.keys()})
20 |         pass
21 | 
22 |     def test_specific_attribute_confusion(self):
23 |         new_reporter = Reporter(attributes=[DictValue('att1'), DictValue('att2')],
24 |                                 faults=ConfuseSpecificAttributes('att1', 'att2', likelihood=1))
25 |         data = {'att1': 1, 'att2': 2}
26 |         output_data = new_reporter(data).observed
27 |         self.assertEqual(data['att1'], output_data['att2'])
28 |         self.assertEqual(data['att2'], output_data['att1'])
29 |         self.assertNotEqual(data, output_data)
30 |         pass
31 | 
32 |     def test_attribute_fault_mapping(self):
33 |         new_reporter = Reporter(attributes=[DictValue('att1'), DictValue('att2')], faults=GaussianNoise(sigma=1))
34 |         data = {'att1': 1, 'att2': 2}
35 |         output_data = new_reporter(data).observed
36 |         self.assertNotEqual(data, output_data)
37 |         pass
38 | 
39 |     def test_introspected_attribute_fault_mapping(self):
40 |         new_reporter = Reporter(faults=GaussianNoise(sigma=1))
41 |         data = {'att1': 1, 'att2': 2}
42 |         output_data = new_reporter(data).observed
43 |         self.assertNotEqual(data, output_data)
44 |         self.assertEqual(data.keys(), output_data.keys())
45 |         pass
46 | 


--------------------------------------------------------------------------------
/noisify/tests/reporters/test_series.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.reporters import Noisifier, Reporter
 6 | from noisify.attribute_readers import DictValue, ObjectAttribute
 7 | from noisify.faults import GaussianNoise
 8 | 
 9 | 
10 | class TestSeries(unittest.TestCase):
11 |     def test_dict_series_call(self):
12 |         new_prototype = Reporter(attributes=[DictValue('noisy', faults=GaussianNoise(sigma=0.1)),
13 |                                              DictValue('noiseless')])
14 |         series_builder = Noisifier(reporter=new_prototype)
15 |         data = [{'noisy': 100, 'noiseless': 100},
16 |                 {'noisy': 10, 'noiseless': 100},
17 |                 {'noisy': 100, 'noiseless': 10}]
18 |         result = series_builder(data)
19 |         self.assertEqual(len([i for i in result]), 3)
20 |         for truth, new in zip(data, result):
21 |             self.assertEqual(truth['noiseless'], new['noiseless'])
22 |             self.assertNotEqual(truth['noisy'], new['noisy'])
23 |         pass
24 | 
25 |     def test_object_series_call(self):
26 |         class Foo:
27 |             def __init__(self, noisy, noiseless):
28 |                 self.noisy = noisy
29 |                 self.noiseless = noiseless
30 | 
31 |         new_prototype = Reporter(attributes=[ObjectAttribute('noisy', faults=GaussianNoise(sigma=0.1)),
32 |                                              ObjectAttribute('noiseless')])
33 |         series_builder = Noisifier(reporter=new_prototype)
34 | 
35 |         data = [Foo(100, 100), Foo(10, 100), Foo(100, 10)]
36 |         result = series_builder(data)
37 | 
38 |         self.assertEqual(len([i for i in result]), 3)
39 |         for truth, new in zip(data, result):
40 |             self.assertEqual(truth.noiseless, new.noiseless)
41 |             self.assertNotEqual(truth.noisy, new.noisy)
42 |         pass
43 | 
44 |     def test_looping(self):
45 |         new_prototype = Reporter(attributes=[DictValue('noisy', faults=GaussianNoise(sigma=0.1)),
46 |                                              DictValue('noiseless')])
47 |         series_builder = Noisifier(reporter=new_prototype)
48 |         data = [{'noisy': 100, 'noiseless': 100},
49 |                 {'noisy': 10, 'noiseless': 50},
50 |                 {'noisy': 100, 'noiseless': 10}]
51 |         result = series_builder(data, loop=True)
52 |         for index, value in enumerate(result):
53 |             if index == 4:
54 |                 break
55 |         self.assertEqual(value['noiseless'], 50)
56 |         pass
57 | 


--------------------------------------------------------------------------------
/noisify/reporters/series.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | from noisify.helpers import Fallible
 5 | from .reporter import Reporter
 6 | from pprint import pformat
 7 | import itertools
 8 | 
 9 | 
10 | def is_atom(unknown_object):
11 |     """Determines whether an object is an atom or a collection"""
12 |     if hasattr(unknown_object, 'shape'):
13 |         return True
14 |     if hasattr(unknown_object, '__len__') and not hasattr(unknown_object, 'keys'):
15 |         return False
16 |     return True
17 | 
18 | 
19 | class Noisifier(Fallible):
20 |     """The Noisifier class handles pipelining objects through an underlying reporter class,
21 |     it can also be configured to apply faults at the pipeline level, such as confusing elements from one
22 |     object to another."""
23 |     def __init__(self, reporter=None, faults=None):
24 |         self.reports = []
25 |         Fallible.__init__(self, faults)
26 |         self.reporter = reporter or Reporter()
27 |         pass
28 | 
29 |     def get_series(self, source_truths, key=None, loop=False):
30 |         """
31 |         Calling the noisifier object directly on an object will call this method.
32 | 
33 |         :param source_truths: a series of objects (or a single object)
34 |         :param key: function which will extract a name from each object to be used as an
35 |         identifier for the resultant report.
36 |         :param loop: whether to generate indefinitely by looping over the source truths
37 |         :return: a report generator
38 |         """
39 |         if is_atom(source_truths):
40 |             source_truths = [source_truths]
41 |         if loop:
42 |             source_truths = itertools.cycle(source_truths)
43 |         if self.faults:
44 |             return self.apply_all_faults(self._create_reports(source_truths, key=key))
45 |         else:
46 |             return self._generate_reports(source_truths, key=key)
47 | 
48 |     def _create_reports(self, source_truths, key=None):
49 |         return [i for i in self._generate_reports(source_truths, key=key)]
50 | 
51 |     def _generate_reports(self, source_truths, key=None):
52 |         for truth in source_truths:
53 |             if key:
54 |                 yield self.reporter(truth, identifier=key(truth))
55 |             else:
56 |                 yield self.reporter(truth)
57 | 
58 |     def __add__(self, other):
59 |         output = Fallible.__add__(self, other)
60 |         output.reporter += other.reporter
61 |         return output
62 | 
63 |     def __call__(self, *args, **kwargs):
64 |         return self.get_series(*args, **kwargs)
65 | 
66 |     def __repr__(self):
67 |         return pformat({'Noisifier': self.reporter})
68 | 


--------------------------------------------------------------------------------
/noisify/tests/reporters/test_reports.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from noisify.reporters import Reporter
 6 | from noisify.attribute_readers import DictValue
 7 | from noisify.faults import GaussianNoise, InterruptionFault
 8 | 
 9 | 
10 | class TestReporters(unittest.TestCase):
11 |     def test_attribute_faults(self):
12 |         new_prototype = Reporter(attributes=[DictValue('noisy', faults=GaussianNoise(sigma=0.1)),
13 |                                              DictValue('noiseless')])
14 |         report0 = new_prototype({'noisy': 100, 'noiseless': 100})
15 |         self.assertEqual(report0.truth, {'noisy': 100, 'noiseless': 100})
16 |         self.assertEqual(report0.observed['noiseless'], 100)
17 |         self.assertNotEqual(report0.observed['noisy'], 100)
18 |         self.assertEqual(len(report0.triggered_faults['reporter']), 0)
19 |         self.assertEqual(len(report0.triggered_faults['noiseless']), 0)
20 |         self.assertEqual(len(report0.triggered_faults['noisy']), 1)
21 |         self.assertIsInstance(report0.triggered_faults['noisy'][0], GaussianNoise)
22 |         pass
23 | 
24 |     def test_auto_increment(self):
25 |         new_prototype = Reporter()
26 |         report0 = new_prototype.create_report({})
27 |         report1 = new_prototype.create_report({})
28 |         self.assertEqual(report0.identifier, 0)
29 |         self.assertEqual(report1.identifier, 1)
30 |         pass
31 | 
32 |     def test_addition(self):
33 |         prototype1 = Reporter(attributes=[DictValue('noisy', faults=GaussianNoise(sigma=0.1)),
34 |                                           DictValue('noisier', faults=GaussianNoise(sigma=0.2))])
35 |         prototype2 = Reporter(attributes=[DictValue('noised', faults=GaussianNoise(sigma=0.1)),
36 |                                           DictValue('noisier', faults=InterruptionFault(likelihood=0.1))])
37 |         new_prototype = prototype1 + prototype2
38 |         self.assertIs(len(new_prototype.attributes), 3)
39 |         self.assertIs(len(new_prototype.get_attribute_by_id('noisier').faults), 2)
40 |         pass
41 | 
42 | 
43 | class TestReports(unittest.TestCase):
44 |     def test_method_delegation(self):
45 |         new_prototype = Reporter(attributes=[DictValue('noisy', faults=GaussianNoise(sigma=0.1)),
46 |                                              DictValue('noiseless')])
47 |         report = new_prototype({'noisy': 100, 'noiseless': 100})
48 |         self.assertEqual(set(report.keys()), {'noisy', 'noiseless'})
49 |         pass
50 | 
51 |     def test_slicing(self):
52 |         new_prototype = Reporter(attributes=[DictValue('noisy', faults=GaussianNoise(sigma=0.1)),
53 |                                              DictValue('noiseless')])
54 |         report = new_prototype({'noisy': 100, 'noiseless': 100})
55 |         self.assertEqual(report['noiseless'], 100)
56 |         pass
57 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Noisify
 2 | 
 3 | [![Documentation Status](https://readthedocs.org/projects/noisify/badge/?version=latest)](https://noisify.readthedocs.io/en/latest/?badge=latest)
 4 | 
 5 | Noisify is a simple light-weight library for augmenting and modifying data by adding realistic noise.
 6 |  
 7 | ## Introduction
 8 | 
 9 | Add some human noise (typos, things in the wrong boxes etc.)
10 | 
11 |     >>> from noisify.recipes import human_error
12 |     >>> test_data = {'this': 1.0, 'is': 2, 'a': 'test!'}
13 |     >>> human_noise = human_error(5)
14 |     >>> print(list(human_noise(test_data)))
15 |     [{'a': 'tset!', 'this': 2, 'is': 1.0}]
16 |     >>> print(list(human_noise(test_data)))
17 |     [{'a': 0.0, 'this': 'test!', 'is': 2}]
18 | 
19 | Add some machine noise (gaussian noise, data collection interruptions etc.)
20 | 
21 |     >>> from noisify.recipes import machine_error
22 |     >>> machine_noise = machine_error(5)
23 |     >>> print(list(machine_noise(test_data)))
24 |     [{'this': 1.12786393038729, 'is': 2.1387080616716307, 'a': 'test!'}]
25 | 
26 | If you want both, just add them together
27 | 
28 |     >>> combined_noise = machine_error(5) + human_error(5)
29 |     >>> print(list(combined_noise(test_data)))
30 |     [{'this': 1.23854334573554, 'is': 20.77848220943227, 'a': 'tst!'}]
31 | 
32 | Add noise to numpy arrays
33 | 
34 |     >>> import numpy as np
35 |     >>> test_array = np.arange(10)
36 |     >>> print(test_array)
37 |     [0 1 2 3 4 5 6 7 8 9]
38 |     >>> print(list(combined_noise(test_array)))
39 |     [[0.09172393 2.52539794 1.38823741 2.85571154 2.85571154 6.37596668
40 |                       4.7135771  7.28358719 6.83600156 9.40973018]]
41 | 
42 | Read an image
43 | 
44 |     >>> from PIL import Image
45 |     >>> test_image = Image.open(noisify.jpg)
46 |     >>> test_image.show()
47 | 
48 | 
49 | ![alt text](docs/_static/noisify.jpg "Original Image")
50 | 
51 | And now with noise
52 | 
53 |     >>> from noisify.recipes import human_error, machine_error
54 |     >>> combined_noise = machine_error(5) + human_error(5)
55 |     >>> for out_image in combined_noise(test_image):
56 |     ...     out_image.show()
57 | 
58 | ![alt text](docs/_static/noisy_noisify.jpg "Noisy Image")
59 | 
60 | *Noisify* allows you to build flexible data augmentation pipelines for arbitrary objects.
61 | All pipelines are built from simple high level objects, plugged together like lego.
62 | Use noisify to stress test application interfaces, verify data cleaning pipelines, and to make your ML algorithms more
63 | robust to real world conditions.
64 | 
65 | ## Installation
66 | 
67 | #### Prerequisites
68 | Noisify relies on Python 3.5+
69 |  
70 | #### Installation from pipy
71 |     $ pip install noisify
72 | 
73 | ## Additional Information
74 | 
75 | Full documentation is available at [ReadTheDocs](https://noisify.readthedocs.io/en/latest/).
76 | ## Licence
77 | 
78 | Dstl (c) Crown Copyright 2019
79 | 
80 | Noisify is released under the MIT licence
81 | 


--------------------------------------------------------------------------------
/noisify/tests/ecosystem/test_pillow.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | """
 4 | import unittest
 5 | from PIL import Image
 6 | import hashlib
 7 | import numpy as np
 8 | from noisify.faults import GaussianNoise, InterruptionFault, CalibrationFault, ScrambleAttributes
 9 | import pkg_resources
10 | 
11 | 
12 | class TestPillow(unittest.TestCase):
13 |     def test_gaussian(self):
14 |         noise = GaussianNoise(30)
15 |         input_image = Image.open(str(pkg_resources.resource_filename(__name__, 'test_image.jpeg')))
16 |         initial_hash = hashlib.sha512()
17 |         initial_hash.update(np.array(input_image))
18 |         messy_image = noise.impact(input_image)
19 |         messy_hash = hashlib.sha512()
20 |         messy_hash.update(np.array(messy_image))
21 |         self.assertNotEqual(initial_hash.digest(), messy_hash.digest())
22 |         self.assertEqual(input_image.size, messy_image.size)
23 |         self.assertEqual(input_image.mode, messy_image.mode)
24 |         pass
25 | 
26 |     def test_interruption(self):
27 |         interrupt = InterruptionFault(0.5)
28 |         input_image = Image.open(str(pkg_resources.resource_filename(__name__, 'test_image.jpeg')))
29 |         initial_hash = hashlib.sha512()
30 |         initial_hash.update(np.array(input_image))
31 |         messy_image = interrupt.impact(input_image)
32 |         messy_hash = hashlib.sha512()
33 |         messy_hash.update(np.array(messy_image))
34 |         self.assertNotEqual(initial_hash.digest(), messy_hash.digest())
35 |         self.assertEqual(input_image.size, messy_image.size)
36 |         self.assertEqual(input_image.mode, messy_image.mode)
37 |         pass
38 | 
39 |     def test_calibration(self):
40 |         calibrate = CalibrationFault(25)
41 |         input_image = Image.open(str(pkg_resources.resource_filename(__name__, 'test_image.jpeg')))
42 |         initial_hash = hashlib.sha512()
43 |         initial_hash.update(np.array(input_image))
44 |         messy_image = calibrate.impact(input_image)
45 |         messy_hash = hashlib.sha512()
46 |         messy_hash.update(np.array(messy_image))
47 |         self.assertNotEqual(initial_hash.digest(), messy_hash.digest())
48 |         self.assertEqual(input_image.size, messy_image.size)
49 |         self.assertEqual(input_image.mode, messy_image.mode)
50 |         pass
51 | 
52 |     def test_scramble(self):
53 |         scramble = ScrambleAttributes(1)
54 |         input_image = Image.open(str(pkg_resources.resource_filename(__name__, 'test_image.jpeg')))
55 |         initial_hash = hashlib.sha512()
56 |         initial_hash.update(np.array(input_image))
57 |         messy_image = scramble.pillow_image(input_image)
58 |         messy_hash = hashlib.sha512()
59 |         messy_hash.update(np.array(messy_image))
60 |         self.assertNotEqual(initial_hash.digest(), messy_hash.digest())
61 |         self.assertEqual(input_image.size, messy_image.size)
62 |         self.assertEqual(input_image.mode, messy_image.mode)
63 |         pass
64 | 


--------------------------------------------------------------------------------
/noisify/attribute_readers/attribute_readers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | 
 4 | Attribute Readers allow faults to be directed to specific attributes of an input object. These do
 5 | not need to be literal attributes, they can be values in a dictionary or columns in a database for
 6 | example, as long as they can be accessed via a key.
 7 | """
 8 | from noisify.helpers import Fallible
 9 | from pprint import pformat
10 | 
11 | 
12 | class AttributeReader(Fallible):
13 |     """
14 |     The AttributeReader interface describes a mechanism to read and write values from an object
15 |     """
16 |     def __init__(self, attribute_identifier, faults=None):
17 |         """
18 |         Takes an identifier for the desired attribute, and a series of faults to be applied
19 |         to it.
20 | 
21 |         :param attribute_identifier:
22 |         :param faults:
23 |         """
24 |         Fallible.__init__(self, faults)
25 |         self.attribute_identifier = attribute_identifier
26 | 
27 |     def get_value(self, truth_object):
28 |         """(Part of the interface) Must return the ground truth for the given attribute of the original object"""
29 |         raise NotImplementedError
30 | 
31 |     def measure(self, truth_object):
32 |         """Takes a 'measurement' of the ground truth, applying all faults in the process"""
33 |         truth = self.get_value(truth_object)
34 |         return self.apply_all_faults(truth)
35 | 
36 |     def update_value(self, output_object, new_value):
37 |         """(Part of the interface) Must update the new output object at the given attribute key with a new value"""
38 |         raise NotImplementedError
39 | 
40 |     def __add__(self, other):
41 |         if self.attribute_identifier == other.attribute_identifier:
42 |             return Fallible.__add__(self, other)
43 |         else:
44 |             raise TypeError('Attribute addition requires attribute_readers of the same type')
45 | 
46 |     def __repr__(self):
47 |         return pformat((self.attribute_identifier, {'faults': [i for i in self.faults]}))
48 | 
49 | 
50 | class DictValue(AttributeReader):
51 |     """
52 |     Provides support for dictionary value lookups as attributes.
53 |     """
54 |     def get_value(self, truth_object):
55 |         """Queries the truth object using a dictionary lookup"""
56 |         return truth_object[self.attribute_identifier]
57 | 
58 |     def update_value(self, output_object, new_value):
59 |         """Sets using dictionary value assignment"""
60 |         output_object[self.attribute_identifier] = new_value
61 |         return output_object
62 | 
63 | 
64 | class ObjectAttribute(AttributeReader):
65 |     """
66 |     Provides support for literal object attributes as attributes.
67 |     """
68 |     def get_value(self, truth_object):
69 |         """Queries using getattr"""
70 |         return getattr(truth_object, self.attribute_identifier)
71 | 
72 |     def update_value(self, output_object, new_value):
73 |         """Sets using setattr"""
74 |         setattr(output_object, self.attribute_identifier, new_value)
75 |         return output_object
76 | 


--------------------------------------------------------------------------------
/docs/basics/advanced.rst:
--------------------------------------------------------------------------------
 1 | .. _advanced:
 2 | 
 3 | Advanced Usage
 4 | ==============
 5 | This guide covers more advanced topics in noisify.
 6 | 
 7 | Defining Faults
 8 | ---------------
 9 | Faults are defined by subclassing the base Fault class:
10 | 
11 |     >>> from noisify.faults import Fault
12 |     >>> import random
13 |     >>> class AddOneFault(Fault):
14 |     ...     def __init__(self, likelihood=1.0):
15 |     ...         self.likelihood = likelihood
16 |     ...
17 |     ...     @register_implementation(priority=1)
18 |     ...     def add_to_string(self, string_object):
19 |     ...         return string_object + "1"
20 |     ...
21 | 
22 | Let's unpack this definition.
23 | 
24 | We have the constructor, this behaves as expected. In this case adding a likelihood attribute to the object.
25 | 
26 | We also have an implementation. This describes how a fault will act on the data it is given.
27 | 
28 | Implementations And The Dispatch Queue
29 | --------------------------------------
30 | 
31 | The power of noisify lies in its ability to take a large variety of different data types and intelligently apply noise.
32 | This mechanism is managed through the Dispatch Queue.
33 | 
34 | When an implementation is written for a given fault, it is decorated using the @register_implementation(priority=x)
35 | decorator. This gives the implementation its place within the queue. When a fault is called upon an unknown object it
36 | will attempt to apply each implementation in the queue to it in sequence. If all fail it will return the original object
37 | unaffected. Bigger numbers come first in the queue, so in the below example numpy_array will be called before
38 | python_numeric.
39 | 
40 | Let's look at some source code for an example
41 | 
42 | 
43 |     >>> class GaussianNoise(AttributeFault):
44 |     ...     def __init__(self, sigma=0):
45 |     ...         AttributeFault.__init__(self, sigma=sigma)
46 |     ...         self.sigma = sigma
47 |     ...         pass
48 |     ...
49 |     ...     @register_implementation(priority=10)
50 |     ...     def numpy_array(self, array_like_object):
51 |     ...         import numpy as np
52 |     ...         noise_mask = np.random.normal(scale=self.sigma, size=array_like_object.size)
53 |     ...         return array_like_object + noise_mask
54 |     ...
55 |     ...     @register_implementation(priority=1)
56 |     ...     def python_numeric(self, python_numeric_object):
57 |     ...         return random.gauss(python_numeric_object, self.sigma)
58 | 
59 | This fault will apply a gaussian noise filter to the input data. If the python_numeric implementation is called on a
60 | numpy array then a single random value will be added to the entire array, this is not desired behaviour. To fix this a
61 | second implementation with higher priority kicks in for numpy array like objects, this adds a separate offset to each
62 | value independently.
63 | 
64 | Dispatch Through Type Annotations
65 | ---------------------------------
66 | 
67 | Dispatch should be handled through ducktyping where possible. However we recognise that cases exist where explicit
68 | dispatch on type is needed, this can be done through type annotations on the relevant implementations as follows.
69 | 
70 | >>> class TypographicalFault(AttributeFault):
71 | ...     @register_implementation(priority=1)
72 | ...     def impact_string(self, string_object: str):
73 | ...         return typo(string_object, self.severity)
74 | ...
75 | ...     @register_implementation(priority=1)
76 | ...     def impact_int(self, int_object: int):
77 | ...         return int(self.impact_string(str(int_object)) or 0)
78 | 
79 | Implementation Dispatch And Inheritance
80 | ---------------------------------------
81 | 
82 | Implementations are passed down through inheritance. The main example of this is the AttributeFault fault type,
83 | which adds a single implementation which will attempt to map the fault onto all elements of the input object. This can
84 | be given to a Reporter to cause it to apply the fault to all of its attributes. Negative priorities can be used in base
85 | class implementations to ensure that they are resolved last. Negative priorities should not be used in normal fault
86 | implementation annotation.
87 | 
88 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. Noisify documentation master file, created by
  2 |    sphinx-quickstart on Wed Feb 13 09:46:40 2019.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | Noisify: All purpose data augmentation
  7 | ======================================
  8 | 
  9 | Release v\ |version|. (:ref:`Installation <install>`)
 10 | 
 11 | 
 12 | **Noisify** is a simple light weight library for augmenting and modifying data by adding 'noise'.
 13 | 
 14 | 
 15 | -------------------
 16 | 
 17 | **Let's make some noise**::
 18 | 
 19 | Add some human noise (typos, things in the wrong boxes etc.)
 20 | 
 21 |     >>> from noisify.recipes import human_error
 22 |     >>> test_data = {'this': 1.0, 'is': 2, 'a': 'test!'}
 23 |     >>> human_noise = human_error(5)
 24 |     >>> print(list(human_noise(test_data)))
 25 |     [{'a': 'tset!', 'this': 2, 'is': 1.0}]
 26 |     >>> print(list(human_noise(test_data)))
 27 |     [{'a': 0.0, 'this': 'test!', 'is': 2}]
 28 | 
 29 | Add some machine noise (gaussian noise, data collection interruptions etc.)
 30 | 
 31 |     >>> from noisify.recipes import machine_error
 32 |     >>> machine_noise = machine_error(5)
 33 |     >>> print(list(machine_noise(test_data)))
 34 |     [{'this': 1.12786393038729, 'is': 2.1387080616716307, 'a': 'test!'}]
 35 | 
 36 | If you want both, just add them together
 37 | 
 38 |     >>> combined_noise = machine_error(5) + human_error(5)
 39 |     >>> print(list(combined_noise(test_data)))
 40 |     [{'this': 1.23854334573554, 'is': 20.77848220943227, 'a': 'tst!'}]
 41 | 
 42 | Add noise to numpy arrays
 43 | 
 44 |     >>> import numpy as np
 45 |     >>> test_array = np.arange(10)
 46 |     >>> print(test_array)
 47 |     [0 1 2 3 4 5 6 7 8 9]
 48 |     >>> print(list(combined_noise(test_array)))
 49 |     [[0.09172393 2.52539794 1.38823741 2.85571154 2.85571154 6.37596668
 50 |                       4.7135771  7.28358719 6.83600156 9.40973018]]
 51 | 
 52 | Read an image
 53 | 
 54 |    >>> from PIL import Image
 55 |    >>> test_image = Image.open(noisify.jpg)
 56 |    >>> test_image.show()
 57 | 
 58 | .. image:: _static/noisify.jpg
 59 |    :width: 339px
 60 |    :height: 158px
 61 |    :scale: 70 %
 62 |    :alt: unchanged image
 63 |    :align: center
 64 | 
 65 | And now with noise
 66 | 
 67 |    >>> from noisify.recipes import human_error, machine_error
 68 |    >>> combined_noise = machine_error(5) + human_error(5)
 69 |    >>> for out_image in combined_noise(test_image):
 70 |    ...     out_image.show()
 71 | 
 72 | .. image:: _static/noisy_noisify.jpg
 73 |    :width: 339px
 74 |    :height: 158px
 75 |    :scale: 70 %
 76 |    :alt: image with random noise
 77 |    :align: center
 78 | 
 79 | **Noisify** allows you to build flexible data augmentation pipelines for arbitrary objects.
 80 | All pipelines are built from simple high level objects, plugged together like lego.
 81 | Use noisify to stress test application interfaces, verify data cleaning pipelines, and to make your ML algorithms more
 82 | robust to real world conditions.
 83 | 
 84 | Features
 85 | ----------------
 86 | 
 87 | Noisify provides data augmentation through a simple high level abstraction
 88 | 
 89 | - Build reporters to apply augmentation to any object, images, dataframes, database interfaces etc.
 90 | - Compose augmentations from configurable flaw objects
 91 | - Build recipes to deploy pipelines simply
 92 | - Everything is composable, everything is polymorphic
 93 | 
 94 | Noisify is built for Python 3+.
 95 | 
 96 | The Basics
 97 | ----------
 98 | 
 99 | A brief high level guide of how to use noisify, mostly prose with illustrative examples.
100 | 
101 | .. toctree::
102 |    :maxdepth: 2
103 | 
104 |    basics/introduction
105 |    basics/install
106 |    basics/quickstart
107 |    basics/advanced
108 | 
109 | 
110 | The Community Guide
111 | -------------------
112 | 
113 | Our release process and community support process.
114 | 
115 | .. toctree::
116 |    :maxdepth: 2
117 | 
118 |    community/support
119 |    community/updates
120 |    community/release-process
121 | 
122 | The API Documentation / Guide
123 | -----------------------------
124 | 
125 | Full documentation of the noisify API
126 | 
127 | .. toctree::
128 |    :maxdepth: 2
129 | 
130 |    api
131 | 
132 | 


--------------------------------------------------------------------------------
/noisify/faults/report_faults.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. Dstl (c) Crown Copyright 2019
 3 | 
 4 | Report level faults typically comprise faults that depend on multiple attributes. For example switching attribute
 5 | values.
 6 | """
 7 | from .fault import Fault
 8 | from .utilities import scramble
 9 | import random
10 | import copy
11 | 
12 | 
13 | class ScrambleAttributes(Fault):
14 |     """Switches the values of different attribute_readers within the object. By default it will
15 |     apply to all attribute_readers."""
16 |     def __init__(self, likelihood=0.1, attribute_identifiers=None):
17 |         """
18 |         Swaps the values of different attribute_readers in an object, can be restricted to a subset
19 |         of all the attribute_readers using the optional attribute_identifiers keyword argument
20 | 
21 |         :param likelihood:
22 |         :param attribute_identifiers:
23 |         """
24 |         self.scrambledness = likelihood
25 |         self.attribute_identifiers = attribute_identifiers
26 |         Fault.__init__(self, likelihood=likelihood, attribute_identifiers=attribute_identifiers)
27 | 
28 |     @register_implementation(priority=1)
29 |     def numpy_array(self, array_like):
30 |         """Swaps random cells in a numpy array-like object"""
31 |         import numpy as np
32 |         old_value_indices = [i for i in np.ndenumerate(array_like)]
33 |         out_array = array_like.copy()
34 |         scrambled_values = scramble([i[1] for i in old_value_indices], self.scrambledness, 3)
35 |         for coordinate, value in zip((i[0] for i in old_value_indices), scrambled_values):
36 |             out_array[coordinate] = value
37 |         return out_array
38 | 
39 |     @register_implementation(priority=5)
40 |     def pillow_image(self, pillow_image):
41 |         """Swaps random pixels in a PIL Image"""
42 |         x_size, y_size = pillow_image.size
43 |         out_image = pillow_image.copy()
44 |         pixels = out_image.load()
45 |         for i in range(int(x_size * y_size * min(float(self.scrambledness)/10, 1.0) / 4)):
46 |             x1 = random.randint(0, x_size-1)
47 |             x2 = random.randint(0, x_size-1)
48 |             y1 = random.randint(0, y_size-1)
49 |             y2 = random.randint(0, y_size-1)
50 |             pixels[x2, y2] = pixels[x1, y1]
51 |         return out_image
52 | 
53 |     @register_implementation(priority=10)
54 |     def impact_dictionary(self, dictionary_object):
55 |         """Swaps random values in a dictionary"""
56 |         confusable_attribute_identifiers = list(dictionary_object.keys())
57 |         new_attribute_order = scramble(confusable_attribute_identifiers, self.scrambledness, 3)
58 |         output = {}
59 |         for expected_attribute, found_attribute in zip(confusable_attribute_identifiers, new_attribute_order):
60 |             output[expected_attribute] = dictionary_object[found_attribute]
61 |         return output
62 | 
63 | 
64 | class ConfuseSpecificAttributes(Fault):
65 |     """Swaps a specific pair of attribute values in a given object"""
66 |     def __init__(self, attribute1, attribute2, likelihood=0):
67 |         """Takes the two attribute_readers (as keys or strings) to be swapped and the likelihood
68 |         of the swap taking place"""
69 |         self.attribute1 = attribute1
70 |         self.attribute2 = attribute2
71 |         Fault.__init__(self, attribute1, attribute2, likelihood=likelihood)
72 | 
73 |     @register_implementation(priority=10)
74 |     def impact_dictionary(self, dictionary_object):
75 |         """Support for dictionary like objects"""
76 |         output = copy.deepcopy(dictionary_object)
77 |         output[self.attribute1], output[self.attribute2] = \
78 |             (dictionary_object[self.attribute2], dictionary_object[self.attribute1])
79 |         return output
80 | 
81 | 
82 | class LoseEntireReport(Fault):
83 |     """
84 |     Replaces entire report with None, activates according to set likelihood.
85 |     """
86 |     def __init__(self, likelihood=0):
87 |         """
88 |         Instantiate with likelihood of interruption as a 0-1 float.
89 | 
90 |         :param likelihood:
91 |         """
92 |         Fault.__init__(self, likelihood=likelihood)
93 |         pass
94 | 
95 |     @register_implementation(priority=10)
96 |     def impact_truth(self, truth):
97 |         """Just returns None!"""
98 |         return None
99 | 


--------------------------------------------------------------------------------
/noisify/reporters/reporter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | .. Dstl (c) Crown Copyright 2019
  3 | """
  4 | from noisify.helpers import Fallible
  5 | from noisify.attribute_readers import dictionary_lookup
  6 | from pprint import pformat
  7 | from copy import deepcopy
  8 | 
  9 | from noisify.reporters.report import Report
 10 | 
 11 | 
 12 | class Reporter(Fallible):
 13 |     """The most important class in Noisify!
 14 | 
 15 |     Reporters define how objects should be changed. They can be as specific or a general
 16 |     as needed.
 17 |     """
 18 |     def __init__(self, attributes=None, attribute_type=dictionary_lookup, faults=None):
 19 |         self.attributes = attributes or []
 20 |         self.attribute_introspection_strategy = attribute_type
 21 |         Fallible.__init__(self, faults=faults)
 22 |         self.report_index = 0
 23 | 
 24 |     def create_report(self, truth_object, identifier=None):
 25 |         """
 26 |         Calling the reporter object directly on an object will call this method.
 27 | 
 28 |         :param truth_object: Anything
 29 |         :param identifier: Optional identifier for the output report, defaults to a serial integer
 30 |         :return: A report for the given input object
 31 |         """
 32 |         identifier = identifier or self.report_index
 33 |         self.report_index += 1
 34 |         triggered_faults, measures = self._measure(truth_object)
 35 |         return Report(identifier, self._get_truth(truth_object), triggered_faults, measures)
 36 | 
 37 |     def _measure(self, truth_object):
 38 |         """
 39 |         :param truth_object: object to apply flaws to
 40 |         :return: a dictionary of faults triggered for different attribute_readers, and the final noised object
 41 |         """
 42 |         measurement, triggered_faults = self._get_attribute_measurements(truth_object)
 43 |         applied_faults, flawed_measurement = self.apply_all_faults(measurement)
 44 |         triggered_faults['reporter'] = applied_faults
 45 |         return triggered_faults, flawed_measurement
 46 | 
 47 |     def _get_attribute_measurements(self, truth_object):
 48 |         output_object = deepcopy(truth_object)
 49 |         triggered_faults = {}
 50 |         attributes = self._get_or_introspect_attributes(truth_object)
 51 |         if not attributes:
 52 |             return truth_object, {}
 53 |         for attribute in attributes:
 54 |             faults, new_value = attribute.measure(truth_object)
 55 |             attribute.update_value(output_object, new_value)
 56 |             triggered_faults[attribute.attribute_identifier] = faults
 57 |         return output_object, triggered_faults
 58 | 
 59 |     def _get_or_introspect_attributes(self, truth_object):
 60 |         return self.attributes or list(self.attribute_introspection_strategy(truth_object))
 61 | 
 62 |     def _get_truth(self, truth_object):
 63 |         attributes = self._get_or_introspect_attributes(truth_object)
 64 |         if not attributes:
 65 |             return truth_object
 66 |         truth = {}
 67 |         for attribute in attributes:
 68 |             truth[attribute.attribute_identifier] = attribute.get_value(truth_object)
 69 |         return truth
 70 | 
 71 |     def get_attribute_by_id(self, attribute_identifier):
 72 |         """Getter method for report attribute_readers"""
 73 |         for attribute in self.attributes:
 74 |             if attribute.attribute_identifier == attribute_identifier:
 75 |                 return attribute
 76 | 
 77 |     def __call__(self, *args, **kwargs):
 78 |         return self.create_report(*args, **kwargs)
 79 | 
 80 |     def __add__(self, other):
 81 |         output = Fallible.__add__(self, other)
 82 |         new_attributes = self.merge_attributes(other, output)
 83 |         output.attributes = new_attributes
 84 |         return output
 85 | 
 86 |     @staticmethod
 87 |     def merge_attributes(report1, report2):
 88 |         """Merges attribute_readers between two reporters, used for reporter addition"""
 89 |         report1_attributes = set(a.attribute_identifier for a in report1.attributes)
 90 |         report2_attributes = set(a.attribute_identifier for a in report2.attributes)
 91 |         new_attributes = []
 92 |         for attribute_id in report1_attributes & report2_attributes:
 93 |             local = report1.get_attribute_by_id(attribute_id) + report2.get_attribute_by_id(attribute_id)
 94 |             new_attributes.append(local)
 95 |         for attribute_id in report1_attributes - report2_attributes:
 96 |             new_attributes.append(report1.get_attribute_by_id(attribute_id))
 97 |         for attribute_id in report2_attributes - report1_attributes:
 98 |             new_attributes.append(report2.get_attribute_by_id(attribute_id))
 99 |         return new_attributes
100 | 
101 |     def __repr__(self):
102 |         return pformat({'Reporter':
103 |                     {'Attributes': [a for a in self.attributes],
104 |                      'Faults': [f for f in self.faults]}
105 |                 })
106 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | sys.path.insert(0, os.path.abspath('..'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'Noisify'
 23 | copyright = 'Dstl (c) Crown Copyright 2019'
 24 | author = 'Declan Crew'
 25 | 
 26 | import noisify
 27 | 
 28 | # The short X.Y version
 29 | version = '1.0.0'
 30 | #version = noisify.__version__
 31 | 
 32 | # The full version, including alpha/beta/rc tags.
 33 | release = '1.0.0'
 34 | #release = noisify.__version__
 35 | 
 36 | 
 37 | # -- General configuration ---------------------------------------------------
 38 | 
 39 | # If your documentation needs a minimal Sphinx version, state it here.
 40 | #
 41 | # needs_sphinx = '1.0'
 42 | 
 43 | # Add any Sphinx extension module names here, as strings. They can be
 44 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 45 | # ones.
 46 | extensions = [
 47 |     'sphinx.ext.autodoc',
 48 |     'sphinx.ext.coverage',
 49 |     'sphinx.ext.imgmath',
 50 |     'sphinx.ext.viewcode',
 51 |     'sphinx.ext.githubpages',
 52 | ]
 53 | 
 54 | # Add any paths that contain templates here, relative to this directory.
 55 | templates_path = ['_templates']
 56 | 
 57 | # The suffix(es) of source filenames.
 58 | # You can specify multiple suffix as a list of string:
 59 | #
 60 | # source_suffix = ['.rst', '.md']
 61 | source_suffix = '.rst'
 62 | 
 63 | # The master toctree document.
 64 | master_doc = 'index'
 65 | 
 66 | # The language for content autogenerated by Sphinx. Refer to documentation
 67 | # for a list of supported languages.
 68 | #
 69 | # This is also used if you do content translation via gettext catalogs.
 70 | # Usually you set "language" from the command line for these cases.
 71 | language = None
 72 | 
 73 | # List of patterns, relative to source directory, that match files and
 74 | # directories to ignore when looking for source files.
 75 | # This pattern also affects html_static_path and html_extra_path.
 76 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 77 | 
 78 | # The name of the Pygments (syntax highlighting) style to use.
 79 | pygments_style = None
 80 | 
 81 | 
 82 | # -- Options for HTML output -------------------------------------------------
 83 | 
 84 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 85 | # a list of builtin themes.
 86 | #
 87 | html_theme = 'alabaster'
 88 | 
 89 | # Theme options are theme-specific and customize the look and feel of a theme
 90 | # further.  For a list of options available for each theme, see the
 91 | # documentation.
 92 | #
 93 | # html_theme_options = {}
 94 | 
 95 | # Add any paths that contain custom static files (such as style sheets) here,
 96 | # relative to this directory. They are copied after the builtin static files,
 97 | # so a file named "default.css" will overwrite the builtin "default.css".
 98 | html_static_path = ['_static']
 99 | 
100 | # Custom sidebar templates, must be a dictionary that maps document names
101 | # to template names.
102 | #
103 | # The default sidebars (for documents that don't match any pattern) are
104 | # defined by theme itself.  Builtin themes are using these templates by
105 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
106 | # 'searchbox.html']``.
107 | #
108 | # html_sidebars = {}
109 | 
110 | 
111 | # -- Options for HTMLHelp output ---------------------------------------------
112 | 
113 | # Output file base name for HTML help builder.
114 | htmlhelp_basename = 'Noisifydoc'
115 | 
116 | 
117 | # -- Options for LaTeX output ------------------------------------------------
118 | 
119 | latex_elements = {
120 |     # The paper size ('letterpaper' or 'a4paper').
121 |     #
122 |     # 'papersize': 'letterpaper',
123 | 
124 |     # The font size ('10pt', '11pt' or '12pt').
125 |     #
126 |     # 'pointsize': '10pt',
127 | 
128 |     # Additional stuff for the LaTeX preamble.
129 |     #
130 |     # 'preamble': '',
131 | 
132 |     # Latex figure (float) alignment
133 |     #
134 |     # 'figure_align': 'htbp',
135 | }
136 | 
137 | # Grouping the document tree into LaTeX files. List of tuples
138 | # (source start file, target name, title,
139 | #  author, documentclass [howto, manual, or own class]).
140 | latex_documents = [
141 |     (master_doc, 'Noisify.tex', 'Noisify Documentation',
142 |      'Declan Crew', 'manual'),
143 | ]
144 | 
145 | latex_logo = '_static/dstl.jpg'
146 | html_logo = '_static/dstl.jpg'
147 | 
148 | # -- Options for manual page output ------------------------------------------
149 | 
150 | # One entry per manual page. List of tuples
151 | # (source start file, name, description, authors, manual section).
152 | man_pages = [
153 |     (master_doc, 'noisify', 'Noisify Documentation',
154 |      [author], 1)
155 | ]
156 | 
157 | 
158 | # -- Options for Texinfo output ----------------------------------------------
159 | 
160 | # Grouping the document tree into Texinfo files. List of tuples
161 | # (source start file, target name, title, author,
162 | #  dir menu entry, description, category)
163 | texinfo_documents = [
164 |     (master_doc, 'Noisify', 'Noisify Documentation',
165 |      author, 'Noisify', 'One line description of project.',
166 |      'Miscellaneous'),
167 | ]
168 | 
169 | 
170 | # -- Options for Epub output -------------------------------------------------
171 | 
172 | # Bibliographic Dublin Core info.
173 | epub_title = project
174 | 
175 | # The unique identifier of the text. This can be a ISBN number
176 | # or the project homepage.
177 | #
178 | # epub_identifier = ''
179 | 
180 | # A unique identification for the text.
181 | #
182 | # epub_uid = ''
183 | 
184 | # A list of files that should not be packed into the epub file.
185 | epub_exclude_files = ['search.html']
186 | 
187 | 
188 | # -- Extension configuration -------------------------------------------------
189 | 
190 | def skip_decorator(app, what, name, obj, skip, options):
191 |     if name == 'register_implementation':
192 |         return True
193 |     return skip
194 | 
195 | def setup(app):
196 |     app.connect('autodoc-skip-member', skip_decorator)
197 | 


--------------------------------------------------------------------------------
/noisify/faults/fault.py:
--------------------------------------------------------------------------------
  1 | """
  2 | .. Dstl (c) Crown Copyright 2019
  3 | 
  4 | The base classes for faults.
  5 | """
  6 | from noisify.helpers import SavedInitStatement
  7 | from typing import get_type_hints
  8 | import random
  9 | from noisify.helpers.multi_dispatch import MultipleDispatch
 10 | 
 11 | 
 12 | class Fault(SavedInitStatement, metaclass=MultipleDispatch):
 13 |     """
 14 |     Fault base class.
 15 | 
 16 |     Requires implementations to be registered in its subclasses.
 17 |     Subclasses register implementations with the "register_implementation(priority=x)" decorator.
 18 | 
 19 |     All implementations will be attempted using a try except loop which will except Type, Attribute and Import errors.
 20 |     If no implementations succeed, the Fault will return the original object, unchanged.
 21 | 
 22 |     By default faults are constitutively active, this can be overridden at instantiation by providing a
 23 |     'likelihood' keyword argument with a probability of activation as a float.
 24 | 
 25 |     Example Usage:
 26 | 
 27 |         >>> class AddOneFault(Fault):
 28 |         ...     def condition(self, triggering_object):
 29 |         ...         return True
 30 |         ...
 31 |         ...     @register_implementation(priority=2)
 32 |         ...     def make_uppercase(self, lowercase_string):
 33 |         ...         return lowercase_string.upper()
 34 |         ...
 35 |         ...     @register_implementation(priority=1)
 36 |         ...     def add_to_int_string(self, integer_object):
 37 |         ...         return int(str(integer_object) + "1")
 38 |         ...
 39 |         >>> adder = AddOneFault()
 40 |         >>> adder.impact("testing priority")
 41 |         'TESTING PRIORITY'
 42 |         >>> adder.impact(1234)
 43 |         12341
 44 | 
 45 |     This decorator will also honour any type hints in the decorated function.
 46 | 
 47 |     Example:
 48 | 
 49 |         >>> class AddOneFault(Fault):
 50 |         ...     @register_implementation(priority=1)
 51 |         ...     def make_uppercase(self, lowercase_string: str):
 52 |         ...         print('Called uppercase function')
 53 |         ...         return lowercase_string.upper()
 54 |         ...
 55 |         ...     @register_implementation(priority=2)
 56 |         ...     def add_to_int_string(self, integer_object: int):
 57 |         ...         print('Called integer adding function')
 58 |         ...         return int(str(integer_object) + "1")
 59 |         ...
 60 |         >>> adder = AddOneFault()
 61 |         >>> adder.impact("testing annotation")
 62 |         Called uppercase function
 63 |         'TESTING ANNOTATION'
 64 |         >>> adder.impact(1234)
 65 |         Called integer adding function
 66 |         12341
 67 | 
 68 |     """
 69 |     def __init__(self, *args, **kwargs):
 70 |         SavedInitStatement.__init__(self, *args, **kwargs)
 71 |         if 'likelihood' in kwargs:
 72 |             self.likelihood = kwargs['likelihood']
 73 |         else:
 74 |             self.likelihood = 1.0
 75 |         pass
 76 | 
 77 |     def condition(self, triggering_object):
 78 |         """
 79 |         Base condition method, applies fault either constitutively or according to a likelihood argument at
 80 |         instantiation.
 81 | 
 82 |         :param triggering_object: Can be used to create object-type dependant activation in overridden methods
 83 |         :return: Boolean of whether or not the fault applies
 84 |         """
 85 | 
 86 |         return random.random() < self.likelihood
 87 | 
 88 |     def apply(self, not_faulted_object):
 89 |         """
 90 |         Applies the fault to an object, returns self and the new object if the activation condition is met.
 91 | 
 92 |         :param not_faulted_object:
 93 |         :return: self or None, impacted_object
 94 |         """
 95 |         if self.condition(not_faulted_object):
 96 |             new_observation = self.impact(not_faulted_object)
 97 |             return self, new_observation
 98 |         return None, not_faulted_object
 99 | 
100 |     def impact(self, impacted_object):
101 |         """
102 |         Attempts to apply the fault to an object, cycles through all implementations until one succesfully executes.
103 |         If none execute it will return the original object, unharmed.
104 | 
105 |         :param impacted_object:
106 |         :return:
107 |         """
108 |         for implementation, priority in self._implementations:
109 |             type_hints = get_type_hints(implementation)
110 |             if type_hints:
111 |                 accepted_type = [i for i in type_hints.values()][0]
112 |                 if isinstance(impacted_object, accepted_type):
113 |                     return implementation(self, impacted_object)
114 |                 else:
115 |                     continue
116 |             try:
117 |                 return implementation(self, impacted_object)
118 |             except TypeError:
119 |                 continue
120 |             except AttributeError:
121 |                 continue
122 |             except ImportError:
123 |                 continue
124 |         return impacted_object
125 | 
126 |     @property
127 |     def name(self):
128 |         return type(self).__name__
129 | 
130 |     def __repr__(self):
131 |         return 'Fault: %s %s' % (self.name, self.init_statement)
132 | 
133 | 
134 | class AttributeFault(Fault):
135 |     """
136 |     Derived base class for attribute_readers, adds mapping behaviour which enables attribute faults to be added at
137 |     higher levels of data representation.
138 | 
139 |     For example:
140 | 
141 |         >>> from noisify.faults import GaussianNoise
142 |         >>> noise = GaussianNoise(sigma=0.5)
143 |         >>> noise.impact(100)
144 |         100.66812113455995
145 |         >>> noise.impact({'A group': 100, 'of numbers': 123})
146 |         {'of numbers': 122.83439465953323, 'A group': 99.69284150349345}
147 |     """
148 | 
149 |     def condition(self, triggering_object):
150 |         """
151 |         Overrides the condition method to be constitutively active at the initial mapping stage.
152 | 
153 |         :param triggering_object:
154 |         :return:
155 |         """
156 |         if isinstance(triggering_object, dict):
157 |             return True
158 |         else:
159 |             return Fault.condition(self, triggering_object)
160 | 
161 |     @register_implementation(priority=0)
162 |     def map_fault(self, truth_object):
163 |         """
164 |         Attempts to apply the fault to all subitems of the given object, in practice this means
165 |         calling the fault on all values of a dict.
166 | 
167 |         :param truth_object:
168 |         :return:
169 |         """
170 |         try:
171 |             for attribute, value in truth_object.items():
172 |                 if self.condition(value):
173 |                     truth_object[attribute] = self.impact(value)
174 |             return truth_object
175 |         except AttributeError:
176 |             raise TypeError
177 | 
178 | 


--------------------------------------------------------------------------------
/noisify/faults/attribute_faults.py:
--------------------------------------------------------------------------------
  1 | """
  2 | .. Dstl (c) Crown Copyright 2019
  3 | Basic attribute level faults, mostly basic numeric manipulations. A good place to get started.
  4 | """
  5 | from noisify.faults.utilities import dropped_scramble
  6 | from .fault import AttributeFault
  7 | import random
  8 | 
  9 | 
 10 | class GaussianNoise(AttributeFault):
 11 |     """
 12 |     Applies a gaussian noise to a numeric object. 
 13 | 
 14 |     >>> noise = GaussianNoise(sigma=0.5)
 15 |     >>> noise.impact(27)
 16 |     28.08656007204934
 17 | 
 18 |     Numpy arrays like objects apply noise separately to each element.
 19 | 
 20 |     >>> import numpy as np
 21 |     >>> test = np.arange(5)
 22 |     >>> noise.impact(test)
 23 |     array([0.56983913, 0.92835482, 2.36240306, 2.87398093, 3.92371237])
 24 |     """
 25 |     def __init__(self, sigma=0):
 26 |         """
 27 |         Instantiate with sigma, mu is set to the value of the passed in object.
 28 |         :param sigma:
 29 |         """
 30 |         AttributeFault.__init__(self, sigma=sigma)
 31 |         self.sigma = sigma
 32 |         pass
 33 | 
 34 |     @register_implementation(priority=15)
 35 |     def pil_image(self, image_object):
 36 |         """Support for PIL image objects, undetectable unless high sigma given"""
 37 |         from PIL import Image
 38 |         import numpy as np
 39 | 
 40 |         input_size = image_size(image_object)
 41 |         noise_mask = np.random.normal(scale=self.sigma, size=input_size)
 42 |         image_array = np.array(image_object)
 43 |         output = Image.fromarray(np.uint8(np.clip(image_array + noise_mask, 0, 255)))
 44 |         return output
 45 | 
 46 |     @register_implementation(priority=12)
 47 |     def pandas_df(self, data_frame):
 48 |         """Support for pandas dataframes"""
 49 |         import numpy as np
 50 |         noise_mask = np.random.normal(scale=self.sigma, size=data_frame.shape)
 51 |         return data_frame.add(noise_mask)
 52 | 
 53 |     @register_implementation(priority=10)
 54 |     def numpy_array(self, array_like_object):
 55 |         """Support for numpy arrays"""
 56 |         import numpy as np
 57 |         noise_mask = np.random.normal(scale=self.sigma, size=array_like_object.size)
 58 |         return array_like_object + noise_mask
 59 | 
 60 |     @register_implementation(priority=1)
 61 |     def python_numeric(self, python_numeric_object):
 62 |         """Support for basic Python numeric types"""
 63 |         return random.gauss(python_numeric_object, self.sigma)
 64 | 
 65 | 
 66 | 
 67 | class UnitFault(AttributeFault):
 68 |     """
 69 |     Applies a user defined adjustment to the input numeric object. Useful for modelling unit errors.
 70 | 
 71 |     >>> def celsius_to_kelvin(celsius_value):
 72 |     ...     return celsius_value + 273.15
 73 |     ...
 74 |     >>> kelvin_fault = UnitFault(unit_modifier=celsius_to_kelvin)
 75 |     >>> kelvin_fault.impact(21)
 76 |     294.15
 77 |     """
 78 |     def __init__(self, likelihood=1.0, unit_modifier=None):
 79 |         """
 80 |         Instantiate with a function or lambda to apply the necessary unit conversion to a numeric
 81 |         :param unit_modifier:
 82 |         """
 83 |         if not unit_modifier:
 84 |             raise NotImplementedError('You need to provide a function to convert the units')
 85 |         AttributeFault.__init__(self, likelihood=likelihood, unit_modifier=unit_modifier)
 86 |         self.unit_modifier = unit_modifier
 87 |         pass
 88 | 
 89 |     @register_implementation(priority=15)
 90 |     def pil_image(self, image_object):
 91 |         """Support for PIL images"""
 92 |         from PIL import Image
 93 |         import numpy as np
 94 | 
 95 |         input_size = image_size(image_object)
 96 |         image_array = np.array(image_object)
 97 |         output = Image.fromarray(np.uint8(np.clip(self.unit_modifier(image_array), 0, 255)))
 98 |         return output
 99 | 
100 |     @register_implementation(priority=1)
101 |     def numeric(self, numeric_object):
102 |         """Support for basic numeric types, including dataframes and numpy arrays"""
103 |         return self.unit_modifier(numeric_object)
104 | 
105 | 
106 | class CalibrationFault(UnitFault):
107 |     """
108 |     Subclass of UnitFault, adds a constant offset to the input numeric.
109 | 
110 |     >>> calibration_fault = CalibrationFault(10)
111 |     >>> calibration_fault.impact(200)
112 |     210
113 |     """
114 |     def __init__(self, offset=0):
115 |         """
116 |         :param offset: Numeric
117 |         """
118 |         def offsetter(value):
119 |             return value + offset
120 |         UnitFault.__init__(self, unit_modifier=offsetter)
121 |         pass
122 | 
123 | 
124 | class InterruptionFault(AttributeFault):
125 |     """
126 |     Replaces input with None, activates according to set likelihood.
127 | 
128 |     >>> interrupt = InterruptionFault(1.0)
129 |     >>> interrupt.impact('This can be anything')
130 | 
131 |     >>>
132 |     """
133 |     def __init__(self, likelihood=0):
134 |         """
135 |         :param likelihood: Probability as 0-1 float
136 |         """
137 |         AttributeFault.__init__(self, likelihood=likelihood)
138 |         pass
139 | 
140 |     @register_implementation(priority=15)
141 |     def pil_image(self, image_object):
142 |         """Support for PIL images"""
143 |         from PIL import Image
144 |         import numpy as np
145 |         input_size = image_size(image_object)
146 | 
147 |         image_array = np.array(image_object)
148 |         output = Image.fromarray(np.uint8(self.numpy_array(image_array)))
149 |         return output
150 | 
151 |     @register_implementation(priority=12)
152 |     def numpy_array(self, array_like_object):
153 |         """Support numpy arrays and pandas dataframes"""
154 |         import numpy as np
155 |         noise_mask = np.random.uniform(size=array_like_object.shape)
156 |         output_array = array_like_object.copy()
157 |         output_array[noise_mask < self.likelihood] = 0
158 |         return output_array
159 | 
160 |     @register_implementation(priority=-1)
161 |     def impact_truth(self, truth):
162 |         """Basic behaviour, just returns None!"""
163 |         return None
164 | 
165 | 
166 | class TypographicalFault(AttributeFault):
167 |     """
168 |     Applies a rough misspelling to the input using faults.utilities.typo()
169 | 
170 |     >>> from noisify.faults import TypographicalFault
171 |     >>> typo_fault = TypographicalFault(1.0, 1)
172 |     >>> typo_fault.impact('This is the original text')
173 |     'Thhiisith heiginal etxt'
174 |     """
175 |     def __init__(self, likelihood=0, severity=0):
176 |         """
177 |         Instantiate with a likelihood of making a typo, and a severity metric, severities significantly larger than 1
178 |         can lead to unstable behaviours
179 |         :param likelihood: Probability as 0-1 float
180 |         :param severity:
181 |         """
182 |         AttributeFault.__init__(self, likelihood=likelihood, severity=severity)
183 |         self.severity = severity
184 | 
185 |     @register_implementation(priority=1)
186 |     def impact_string(self, string_object: str):
187 |         """Scrambles strings"""
188 |         return typo(string_object, self.severity)
189 | 
190 |     @register_implementation(priority=1)
191 |     def impact_int(self, int_object: int):
192 |         """Scrambles ints"""
193 |         return int(self.impact_string(str(int_object)) or 0)
194 | 
195 |     @register_implementation(priority=1)
196 |     def impact_float(self, float_object: float):
197 |         """Scrambles floats, ensures still valid before returning"""
198 |         scrambled_float = self.impact_string(str(float_object))
199 |         point_found = False
200 |         clean_float = []
201 |         for char in scrambled_float:
202 |             if char == '.':
203 |                 if point_found:
204 |                     continue
205 |                 point_found = True
206 |             clean_float.append(char)
207 |         return float(''.join(clean_float) or 0)
208 | 
209 | 
210 | def typo(string, severity):
211 |     """
212 |     Roughly rearranges string with the occasional missed character, based on applying a gaussian noise filter
213 |     to the string character indexes and then rounding to the closest index.
214 | 
215 |     :param string:
216 |     :param severity:
217 |     :return: mistyped string
218 |     """
219 |     return ''.join(dropped_scramble(string, float(severity), 3))
220 | 
221 | 
222 | def get_mode_size(mode):
223 |     """Converts a PIL image mode string into a dimension cardinality"""
224 |     return len([i for i in mode if i.isupper()])
225 | 
226 | 
227 | def image_size(image_object):
228 |     channels = get_mode_size(image_object.mode)
229 |     if channels > 1:
230 |         return image_object.height, image_object.width, channels
231 |     else:
232 |         return image_object.height, image_object.width


--------------------------------------------------------------------------------
/docs/basics/quickstart.rst:
--------------------------------------------------------------------------------
  1 | .. _quickstart:
  2 | 
  3 | Quickstart
  4 | ==========
  5 | 
  6 | If Noisify is :ref:`installed <install>` we can get to work with some examples!
  7 | 
  8 | 
  9 | Augmenting with recipes
 10 | -----------------------
 11 | 
 12 | Basic augmentation can be done very simply using basic recipes.
 13 | 
 14 |     >>> from noisify.recipes import *
 15 | 
 16 | The built in recipes are designed to work with a wide variety of different object types. Let's give it a go with
 17 | a simple Python dict.
 18 | 
 19 |     >>> test_data = {'this': 1.0, 'is': 2, 'a': 'test!'}
 20 |     >>> human_noise = human_error(5)
 21 |     >>> print(human_noise(test_data))
 22 |     <generator object Noisifier.generate_reports at 0x7f2d67e0f570>
 23 | 
 24 | Recipes create Noisifier objects, these objects then generate observations based on what they are given. To get a simple
 25 | list, cast to list. The built in recipes take a single 'severity' argument. Bigger numbers lead to bigger effects on
 26 | the data.
 27 | 
 28 |     >>> print(list(human_noise(test_data)))
 29 |     [{'a': 'tset!', 'this': 2, 'is': 1.0}]
 30 | 
 31 | You can also use a noisifier on a list of data.
 32 | 
 33 |     >>> test_data = [{'test%d' % (index): "This is test run number %d" % index} for index in range(5)]
 34 |     >>> test_data
 35 |     [{'test0': 'This is test run number 0'},
 36 |      {'test1': 'This is test run number 1'},
 37 |      {'test2': 'This is test run number 2'},
 38 |      {'test3': 'This is test run number 3'},
 39 |      {'test4': 'This is test run number 4'}]
 40 |     >>> print(list(human_noise(test_data)))
 41 |     [{'test0': 'This is test run number 0'},
 42 |      {'test1': 'This is test run number 1'},
 43 |      {'test2': 'hT iis testt unn umber2'},
 44 |      {'test3': 'This is test run number 3'},
 45 |      {'test4': 'This is test run number 4'}]
 46 | 
 47 | Let's have a closer look at what human_noise does.
 48 | 
 49 |     >>> print(human_noise)
 50 |     {'Noisifier': {'Reporter': {'Attributes': [],
 51 |                    'Faults': [Fault: TypographicalFault {'likelihood': 0.5, 'severity': 0.5},
 52 |                               Fault: ScrambleAttributes {'likelihood': 0.5, 'attribute_identifiers': None}]}}}
 53 | 
 54 | That's a lot of information!
 55 | The main thing to focus on is the 'Reporter' entry. This contains attributes (which we'll get to later) and Faults.
 56 | Faults are the methods used to modify the incoming data steam, here you can see the two being used, typographical faults
 57 | which scramble text and numbers, and attribute scrambling, this swaps values between keys in incoming dictionaries.
 58 | 
 59 | Let's have a look at another recipe.
 60 | 
 61 |     >>> print(machine_error(5))
 62 |     {'Noisifier': {'Reporter': {'Attributes': [],
 63 |                    'Faults': [Fault: GaussianNoise {'sigma': 0.5},
 64 |                               Fault: InterruptionFault {'likelihood': 0.05}]}}}
 65 | 
 66 | Gaussian Noise is pretty self-explanatory, Interruption Fault leads to loss of data. Some values will be replaced with
 67 | None.
 68 | 
 69 | Applying Gaussian noise to a string doesn't make much sense. That's no issue here though, if noisify doesn't know how to
 70 | apply a given fault to a value, it won't try.
 71 | 
 72 |     >>> print(list(machine_error(5)(test_data)))
 73 |     [{'test0': 'This is test run number 0'},
 74 |      {'test1': 'This is test run number 1'},
 75 |      {'test2': None},
 76 |      {'test3': 'This is test run number 3'},
 77 |      {'test4': 'This is test run number 4'}]
 78 | 
 79 | 
 80 | Custom Noisifiers
 81 | -----------------
 82 | 
 83 | Imagine we have a series of medical records, people's height and weight are generally measured in metres and kilograms.
 84 | Occasionally however somebody has their weight entered in pounds and their height in inches.
 85 | Let's say we've built a mechanism to find these wrongly entered values and we want to test it, how do we create this
 86 | data?
 87 | And more importantly, how do we tell when the noisifier has actually changed these values?
 88 | 
 89 | We need to create a custom noisifier.
 90 | 
 91 | First let's create some data.
 92 | 
 93 |     >>> import random
 94 |     >>> def build_patient_record():
 95 |     ...     return {'height': random.gauss(1.7, 0.1), 'weight': random.gauss(85, 10)}
 96 |     >>> build_patient_record()
 97 |     {'weight': 79.0702693462696, 'height': 1.690377702784025}
 98 | 
 99 | Now let's create some conversion functions for metric to imperial.
100 | 
101 |     >>> def kilo_to_pounds(weight):
102 |     ...     return weight * 2.205
103 |     ...
104 |     >>> def metres_to_inches(height):
105 |     ...     return height * 39.37
106 |     ...
107 |     >>>
108 | 
109 | Now let's create our *attributes*, this enables us to associate specific faults with specific values of the record.
110 | There are many different ways attributes can be looked up and modified, in this case we're using dictionary lookups.
111 | 
112 |     >>> from noisify.attributes import DictValue
113 |     >>> from noisify.faults import UnitFault
114 |     >>> height = DictValue('height', faults=UnitFault(likelihood=0.25, unit_modifier=metres_to_inches))
115 |     >>> weight = DictValue('weight', faults=UnitFault(likelihood=0.25, unit_modifier=kilo_to_pounds))
116 | 
117 | Attributes take an identifier, this can be a key to a dictionary, or an attribute name of an object.
118 | 
119 | Now we build the reporter.
120 | 
121 |     >>> from noisify.reporters import Reporter
122 |     >>> patient_reporter = Reporter(attributes=[height, weight])
123 | 
124 | That was easy, the reporter can be called on individual records, but won't accept data series.
125 | 
126 |     >>> patient_reporter(build_patient_record())
127 |     {'height': 1.8157596382670191, 'weight': 199.97545102729777}
128 | 
129 | To apply more generally, create a noisifier.
130 | 
131 |     >>> from noisify.recipes import Noisifier
132 |     >>> patient_noise = Noisifier(reporter=patient_reporter)
133 | 
134 | Let's build some data and noisify it.
135 | 
136 |     >>> true_patients = [build_patient_record() for i in range(5)]
137 |     >>> true_patients
138 |     [{'height': 1.7831797462380368, 'weight': 84.70459461136014},
139 |      {'height': 1.7661108421633465, 'weight': 87.20572747494349},
140 |      {'height': 1.5047252739096044, 'weight': 102.7315276194823},
141 |      {'height': 1.9371269447064758, 'weight': 78.54807087351945},
142 |      {'height': 1.7624795973113694, 'weight': 76.47383227872784}]
143 |     >>> processed_patients = list(patient_noise(true_patients))
144 |     >>> processed_patients
145 |     [{'height': 1.7831797462380368, 'weight': 84.70459461136014},
146 |      {'height': 1.7661108421633465, 'weight': 192.2886290822504},
147 |      {'height': 59.24103403382112, 'weight': 102.7315276194823},
148 |      {'height': 76.26468781309394, 'weight': 78.54807087351945},
149 |      {'height': 1.7624795973113694, 'weight': 76.47383227872784}]
150 | 
151 | Report objects
152 | --------------
153 | 
154 | Noisify reporters return report objects. These contain the observation made, but they also contain other information.
155 | These are stored as additional attributes on the object.
156 | 
157 | The faults triggered on an object can be retrieved through the triggered_faults attribute. Continuing from our example
158 | above:
159 | 
160 |     >>> for patient in processed_patients:
161 |     ...     print(patient.triggered_faults)
162 |     {'reporter': [], 'height': [], 'weight': []}
163 |     {'reporter': [], 'height': [], 'weight': [Fault: UnitFault {'unit_modifier': <function kilo_to_pounds at 0x7f0b1fd17400>}]}
164 |     {'reporter': [], 'height': [Fault: UnitFault {'unit_modifier': <function metres_to_inches at 0x7f0b1fd17488>}], 'weight': []}
165 |     {'reporter': [], 'height': [Fault: UnitFault {'unit_modifier': <function metres_to_inches at 0x7f0b1fd17488>}], 'weight': []}
166 |     {'reporter': [], 'height': [], 'weight': []}
167 | 
168 | The ground truth is also stored.
169 | 
170 |     >>> for patient in processed_patients:
171 |     ...     print(patient.truth)
172 |     {'height': 1.7831797462380368, 'weight': 84.70459461136014}
173 |     {'height': 1.7661108421633465, 'weight': 87.20572747494349}
174 |     {'height': 1.5047252739096044, 'weight': 102.7315276194823}
175 |     {'height': 1.9371269447064758, 'weight': 78.54807087351945}
176 |     {'height': 1.7624795973113694, 'weight': 76.47383227872784}
177 | 
178 | Recipes
179 | -------
180 | 
181 | Recipes are simply factory functions for noisifiers. Consider the built in 'human_error' recipe.
182 | 
183 | 
184 |     >>> def human_error(scale):
185 |     ...     return Noisifier(
186 |     ...         reporter=Reporter(
187 |     ...             faults=[TypographicalFault(likelihood=min(1, 0.1*scale), severity=0.1*scale),
188 |     ...                     ScrambleAttributes(scrambledness=0.1*scale)]
189 |     ...         ),
190 |     ...         faults=None
191 |     ...     )
192 |     >>>
193 | 
194 | 
195 | Combining reporters and noisifiers
196 | ----------------------------------
197 | 
198 | The addition operator will combine reporters/ noisifiers into composites which will apply all faults from both original
199 | reporters.
200 | 
201 |     >>> from noisify.recipes import machine_error, human_error
202 |     >>> print(machine_error(5))
203 |     {'Noisifier': {'Reporter': {'Attributes': [],
204 |                   'Faults': [Fault: GaussianNoise {'sigma': 0.5},
205 |                              Fault: InterruptionFault {'likelihood': 0.05}]}}}
206 |     >>> print(human_error(5))
207 |     {'Noisifier': {'Reporter': {'Attributes': [],
208 |                   'Faults': [Fault: TypographicalFault {'likelihood': 0.5, 'severity': 0.5},
209 |                              Fault: ScrambleAttributes {'likelihood': 0.5, 'attribute_identifiers': None}]}}}
210 |     >>> print(machine_error(5) + human_error(5))
211 |     {'Noisifier': {'Reporter': {'Attributes': [],
212 |                   'Faults': [Fault: GaussianNoise {'sigma': 0.5},
213 |                              Fault: InterruptionFault {'likelihood': 0.05},
214 |                              Fault: TypographicalFault {'likelihood': 0.5, 'severity': 0.5},
215 |                              Fault: ScrambleAttributes {'likelihood': 0.5, 'attribute_identifiers': None}]}}}
216 | 
217 | 
218 | 
219 | For custom faults and adding new datatype handlers to faults, see the :ref:`advanced <advanced>` section.
220 | 


--------------------------------------------------------------------------------