License: " in l:
41 | return lines[i].replace("License: ", "").replace('
', '')
42 |
43 | raise ValueError("Unable to find license in html page")
44 |
45 |
46 | unknown_licenses = []
47 | library_license_dict = {}
48 | accepted_libraries = []
49 | refused_libraries = []
50 | maybe_libraries = []
51 |
52 |
53 | def is_license_in_list(license, license_list):
54 | for l in license_list:
55 | if l.lower() in license.lower():
56 | return True
57 | return False
58 |
59 |
60 | for library_name in python_dependencies:
61 | try:
62 | library_license = parse_html(pypi_pages[library_name])
63 | library_license_dict[library_name] = library_license
64 | print(f"{library_name}: {library_license}")
65 | # First checks if its refused_licenses, then if its in accepted_licenses, else add in the maybe list
66 |
67 | is_forbidden = is_license_in_list(library_license, args.forbidden_licenses)
68 | is_accepted = is_license_in_list(library_license, args.accepted_licenses)
69 | if is_forbidden and not is_accepted:
70 | refused_libraries.append(library_name)
71 | elif is_accepted and not is_forbidden:
72 | accepted_libraries.append(library_name)
73 | else:
74 | maybe_libraries.append(library_name)
75 |
76 | except Exception as e:
77 | print(f"{library_name}: {e}")
78 | unknown_licenses.append(library_name)
79 |
80 |
81 | def plurial(lst, _if='s', _else=''):
82 | return _if if len(lst) > 1 else _else
83 |
84 | if len(unknown_licenses) > 0:
85 | print(f"Couldn't find the license{plurial(unknown_licenses)} of the following dependencies: {unknown_licenses}")
86 |
87 | print(f"\nThe following dependenc{plurial(accepted_libraries, 'y', 'ies')} have an accepted license: {accepted_libraries}")
88 |
89 | if len(refused_libraries) > 0:
90 | print(f"The following dependencie{plurial(refused_libraries, 'y', 'ies')} have forbidden license(s):")
91 | for library_name in refused_libraries:
92 | print(f" {library_name}: {library_license_dict[library_name]}")
93 |
94 | if len(maybe_libraries) > 0:
95 | print(f"The following dependencie{plurial(maybe_libraries, 'y', 'ies')} have license which needs to be reviewed: ")
96 | for library_name in maybe_libraries:
97 | print(f" {library_name}: {library_license_dict[library_name]}")
98 |
99 |
100 | assert len(refused_libraries) == 0 and len(maybe_libraries) == 0 and len(unknown_licenses) == 0
101 |
--------------------------------------------------------------------------------
/.github/workflows/python_license_checker.yml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - master
5 | - dev
6 | pull_request:
7 | branches:
8 | - master
9 | - dev
10 | jobs:
11 | check-license:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - name: Checkout
15 | uses: actions/checkout@v1
16 | - name: Set up Python
17 | uses: actions/setup-python@v2
18 | with:
19 | python-version: 3.8.11
20 | - name: Install dependencies
21 | run: python -m pip install urllib3 && python -m pip install -r requirements.txt --no-cache-dir
22 | - name: Check license
23 | run: python .github/workflows/license_checker_v2.py --dependencies $(cut -d '=' -f 1 <<< $(pip freeze))
24 |
--------------------------------------------------------------------------------
/.github/workflows/testpythonpackage.yml:
--------------------------------------------------------------------------------
1 | name: Test Python Package
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | - dev
8 | pull_request:
9 | branches:
10 | - master
11 | - dev
12 |
13 | jobs:
14 | build:
15 |
16 | runs-on: ubuntu-latest
17 | strategy:
18 | max-parallel: 4
19 | fail-fast: false
20 | matrix:
21 | python-version: [3.7, 3.8, 3.9]
22 |
23 | steps:
24 | - uses: actions/checkout@v1
25 | - name: Set up Python ${{ matrix.python-version }}
26 | uses: actions/setup-python@v1
27 | with:
28 | python-version: ${{ matrix.python-version }}
29 | - name: Install dependencies
30 | run: |
31 | python -m pip install --upgrade pip
32 | pip install -r requirements.txt
33 | python setup.py install
34 | - name: Lint with flake8
35 | run: |
36 | pip install flake8
37 | # stop the build if there are Python syntax errors or undefined names
38 | flake8 neuraxle testing_neuraxle --count --select=E9,F63,F7,F82 --show-source --statistics
39 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
40 | flake8 neuraxle testing_neuraxle --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
41 | - name: Test with pytest
42 | run: |
43 | python setup.py test
44 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 | prof/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | db.sqlite3
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # Jupyter Notebook
74 | .ipynb_checkpoints
75 |
76 | # pyenv
77 | .python-version
78 |
79 | # celery beat schedule file
80 | celerybeat-schedule
81 |
82 | # SageMath parsed files
83 | *.sage.py
84 |
85 | # Environments
86 | venv
87 | .env
88 | .venv
89 | env/
90 | venv/
91 | ENV/
92 | env.bak/
93 | venv.bak/
94 |
95 | # Spyder project settings
96 | .spyderproject
97 | .spyproject
98 |
99 | # Rope project settings
100 | .ropeproject
101 |
102 | # mkdocs documentation
103 | /site
104 |
105 | # mypy
106 | .mypy_cache/
107 |
108 | # IDEs
109 | .idea
110 | .vscode/settings.json
111 | .style.yapf
112 | *-py.js
113 | *pmap.yml
114 | tmp
115 |
116 | # Other
117 | .DS_Store
118 | ___*
119 | todo.txt
120 | **cache/**
121 | **caching/**
122 | cache/**
123 | caching/**
124 | testing_neuraxle/examples/cache/**
125 | testing_neuraxle/cache/**
126 | testing_neuraxle/cache/*
127 | cov.xml
128 | profile.sh
129 |
130 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "Python: Current File with /Neuraxle workdir",
6 | "type": "python",
7 | "justMyCode": false,
8 | "request": "launch",
9 | "program": "${file}",
10 | "console": "integratedTerminal",
11 | "cwd": "${workspaceFolder}",
12 | "env": {
13 | "PYTHONPATH": "${cwd}"
14 | },
15 | "redirectOutput": true,
16 | }
17 | ]
18 | }
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Neuraxle
2 |
3 |
4 | ## First steps
5 |
6 | For contributing, first, read the README.
7 |
8 | We'd love to see you comment in an issue if you want to work on it.
9 |
10 | You can as well suggest new features by creating new issues. Don't hesitate to bring new ideas.
11 |
12 |
13 | ## Before coding
14 |
15 | New contributor? Follow this checklist to get started right on track:
16 |
17 | - [ ] Your local Git username is set to your GitHub username, and your local Git email is set to your [GitHub email](https://github.com/settings/emails). This is important to avoid breaking the cla-bot and for your contributions to be linked to your profile. If at least 1 contribution is not commited properly using the good credentials, the cla-bot will break until your [re-commit it](https://stackoverflow.com/questions/20002557/how-to-remove-a-too-large-file-in-a-commit-when-my-branch-is-ahead-of-master-by/39768343#39768343). Before commiting, [change your name and email](https://stackoverflow.com/a/42167480/2476920) to the good ones.
18 | - [ ] Use the PyCharm IDE with PyTest to test your code. Reformatting your code at every file save is a good idea, using [PyCharm's `Ctrl+Alt+L` shortcut](https://www.jetbrains.com/help/pycharm/reformat-and-rearrange-code.html). You may reorganize imports automatically as well, as long as your project root is well configured. Run the tests to see if everything works, and always ensure that all tests run before opening a pull request as well.
19 | - [ ] We recommend letting PyCharm manage the virtual environment by [creating a new one just for this project](https://www.jetbrains.com/help/pycharm/creating-virtual-environment.html#existing-environment), and [using PyTest as a test runner in PyCharm](https://www.jetbrains.com/help/pycharm/pytest.html#pytest-fixtures). This is not required, but should help in getting you started.
20 | - [ ] Please [make your pull request(s) editable](https://docs.github.com/en/github/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork), such as for us to add you to the list of contributors if you didn't add the entry, for example.
21 | - [ ] To contribute, first fork the project, then do your changes, and then [open a pull request in the main repository](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
22 | - [ ] Sign the [Contributor License Agreement (CLA)](https://docs.google.com/forms/d/e/1FAIpQLSfDP3eCQoV0tMq296OfbOpNn-QkHwfJQLkS0MVjSHiZQXPw2Q/viewform) to allow Neuraxio to use and publish your contributions under the Apache 2.0 license, in order for everyone to be able to use your open-source contributions. Follow the instructions of the cla-bot upon opening the pull request.
23 |
24 |
25 | ## Pull Requests
26 |
27 | You will then be able to open pull requests. The instructions in the [pull request template](https://www.neuraxle.org/stable/Neuraxle/.github/pull_request_template.html) will be shown to you upon creating each pull request.
28 |
29 |
30 | ## Code Reviews
31 |
32 | We do code review. We expect most of what we suggest to be fixed. This is a machine learning framework. This means that it is the basis for several other projects. Therefore, the code **must** be clean, understandeable (easy to read), and documented, as many people will read and use what you have coded. Please respect pep8 as much as possible, and try as much as possible to create clean code with a good Oriented Object Programming OOP design. It is normal and expected that your Pull Requests have lots of review comments.
33 |
34 |
35 | ## Reviewing other's code
36 |
37 | We love that contributors review each other's code as well.
38 |
39 |
40 | ## Publishing project to PyPI
41 |
42 | **For official project maintainers only:** you may follow these instructions to upload a new version of tha package on pip:
43 | - https://github.com/Neuraxio/Neuraxle/wiki/How-to-deploy-a-new-package-(or-version-of-package)-to-PyPI
44 |
--------------------------------------------------------------------------------
/assets/images/La-Cite-LP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/La-Cite-LP.png
--------------------------------------------------------------------------------
/assets/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/favicon.ico
--------------------------------------------------------------------------------
/assets/images/kimoby.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/kimoby.png
--------------------------------------------------------------------------------
/assets/images/neuraxio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/neuraxio.png
--------------------------------------------------------------------------------
/assets/images/neuraxle_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/neuraxle_logo.png
--------------------------------------------------------------------------------
/assets/images/solution_nexam_io.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/solution_nexam_io.jpg
--------------------------------------------------------------------------------
/assets/images/umaneo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/umaneo.png
--------------------------------------------------------------------------------
/coverage.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ./flake8.sh
3 | pytest -n 7 --cov-report html --cov-report xml:cov.xml --cov-config=.coveragerc --cov=neuraxle testing_neuraxle
4 | # pytest --cov-report html --cov=neuraxle testing_neuraxle; open htmlcov/index.html
5 |
6 |
--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | \>\>\> Practical Examples
2 | ============================
3 |
4 | Navigate the hands-on examples.
5 |
--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/examples/__init__.py
--------------------------------------------------------------------------------
/examples/_images/neuraxle_handler_methods.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/examples/_images/neuraxle_handler_methods.png
--------------------------------------------------------------------------------
/examples/_images/neuraxle_machine_learning_lifecycle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/examples/_images/neuraxle_machine_learning_lifecycle.png
--------------------------------------------------------------------------------
/examples/_images/neuraxle_time_series_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/examples/_images/neuraxle_time_series_data.png
--------------------------------------------------------------------------------
/examples/auto_ml/README.txt:
--------------------------------------------------------------------------------
1 | .. _real_world_examples:
2 |
3 | AutoML
4 | -------------------------------------
5 |
6 | This demonstrates how to use the AutoML loop, and the auto ml steps that are available in Neuraxle.
7 |
--------------------------------------------------------------------------------
/examples/caching/README.txt:
--------------------------------------------------------------------------------
1 | .. _real_world_examples:
2 |
3 | Caching
4 | -------------------------------------
5 |
6 | This demonstrates how to use checkpoints in Neuraxle.
7 |
--------------------------------------------------------------------------------
/examples/deployment/README.txt:
--------------------------------------------------------------------------------
1 | .. _real_world_examples:
2 |
3 | REST API Model Serving
4 | ---------------------------------------------------
5 |
6 | This demonstrates an easy way to deploy your Neuraxle model or pipeline to a REST API.
7 |
--------------------------------------------------------------------------------
/examples/deployment/plot_easy_rest_api_serving.py:
--------------------------------------------------------------------------------
1 | """
2 | Easy REST API Model Serving with Neuraxle
3 | ================================================
4 |
5 | This demonstrates an easy way to deploy your Neuraxle model or pipeline to a REST API.
6 |
7 | ..
8 | Copyright 2019, Neuraxio Inc.
9 |
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 |
14 | http://www.apache.org/licenses/LICENSE-2.0
15 |
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.
21 |
22 | ..
23 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
24 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
25 |
26 | """
27 |
28 | import numpy as np
29 | from flask import Flask
30 | from sklearn.cluster import KMeans
31 | from sklearn.datasets import load_boston
32 | from sklearn.decomposition import PCA, FastICA
33 | from sklearn.ensemble import GradientBoostingRegressor
34 | from sklearn.metrics import r2_score
35 | from sklearn.model_selection import train_test_split
36 | from sklearn.utils import shuffle
37 |
38 | from neuraxle.rest.flask import FlaskRestApiWrapper, JSONDataBodyDecoder, JSONDataResponseEncoder
39 | from neuraxle.pipeline import Pipeline
40 | from neuraxle.steps.sklearn import RidgeModelStacking
41 | from neuraxle.union import AddFeatures
42 |
43 |
44 | def main():
45 | boston = load_boston()
46 | X, y = shuffle(boston.data, boston.target, random_state=13)
47 | X = X.astype(np.float32)
48 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
49 |
50 | pipeline = Pipeline([
51 | AddFeatures([
52 | PCA(n_components=2),
53 | FastICA(n_components=2),
54 | ]),
55 | RidgeModelStacking([
56 | GradientBoostingRegressor(),
57 | KMeans(),
58 | ]),
59 | ])
60 |
61 | print("Fitting on train:")
62 | pipeline = pipeline.fit(X_train, y_train)
63 | print("")
64 | print("Transforming train and test:")
65 | y_train_predicted = pipeline.transform(X_train)
66 | y_test_predicted = pipeline.transform(X_test)
67 | print("")
68 | print("Evaluating transformed train:")
69 | score = r2_score(y_train_predicted, y_train)
70 | print('R2 regression score:', score)
71 | print("")
72 | print("Evaluating transformed test:")
73 | score = r2_score(y_test_predicted, y_test)
74 | print('R2 regression score:', score)
75 | print("Deploying the application by routing data to the transform method:")
76 |
77 | class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder):
78 | """This is a custom JSON decoder class that precedes the pipeline's transformation."""
79 |
80 | def decode(self, data_inputs):
81 | """
82 | Transform a JSON list object into an np.array object.
83 |
84 | :param data_inputs: json object
85 | :return: np array for data inputs
86 | """
87 | return np.array(data_inputs)
88 |
89 | class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder):
90 | """This is a custom JSON response encoder class for converting the pipeline's transformation outputs."""
91 |
92 | def encode(self, data_inputs) -> dict:
93 | """
94 | Convert predictions to a dict for creating a JSON Response object.
95 |
96 | :param data_inputs:
97 | :return:
98 | """
99 | return {
100 | 'predictions': list(data_inputs)
101 | }
102 |
103 | app = FlaskRestApiWrapper(
104 | json_decoder=CustomJSONDecoderFor2DArray(),
105 | wrapped=pipeline,
106 | json_encoder=CustomJSONEncoderOfOutputs()
107 | ).get_app()
108 |
109 | print("Finally, run the app by uncommenting this next line of code:")
110 |
111 | # app.run(debug=False, port=5000)
112 |
113 | print("You can now call your pipeline over HTTP with a (JSON) REST API.")
114 |
115 | # test_predictictions = requests.post(
116 | # url='http://127.0.0.1:5000/',
117 | # json=X_test.tolist()
118 | # )
119 | # print(test_predictictions)
120 | # print(test_predictictions.content)
121 |
122 | assert isinstance(app, Flask)
123 |
124 | return app
125 |
126 |
127 | if __name__ == "__main__":
128 | main()
129 |
--------------------------------------------------------------------------------
/examples/getting_started/README.txt:
--------------------------------------------------------------------------------
1 | .. _real_world_examples:
2 |
3 | Getting started
4 | -------------------------------------
5 |
6 | This demonstrates how to build basic pipelines with Neuraxle.
7 |
--------------------------------------------------------------------------------
/examples/getting_started/plot_force_handle_mixin.py:
--------------------------------------------------------------------------------
1 | """
2 | Create Pipeline Steps that require implementing only handler methods
3 | ========================================================================================================================
4 |
5 | If a pipeline step only needs to implement handler methods, then you can inherit from the
6 | ForceHandleMixin as demonstrated here. Handler methods are useful when :
7 |
8 | - You need to change the shape of the data container passed to the following steps, or the wrapped steps.
9 | - You want to apply side effects based on the data container, and the execution context.
10 | - You want to change the pipeline execution flow based on the data container, and the execution context.
11 |
12 | ..
13 | Copyright 2022, Neuraxio Inc.
14 |
15 | Licensed under the Apache License, Version 2.0 (the "License");
16 | you may not use this file except in compliance with the License.
17 | You may obtain a copy of the License at
18 |
19 | http://www.apache.org/licenses/LICENSE-2.0
20 |
21 | Unless required by applicable law or agreed to in writing, software
22 | distributed under the License is distributed on an "AS IS" BASIS,
23 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 | See the License for the specific language governing permissions and
25 | limitations under the License.
26 |
27 | ..
28 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
29 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
30 |
31 | """
32 | from typing import Tuple
33 |
34 | import numpy as np
35 | from neuraxle.base import DACT, BaseStep
36 | from neuraxle.base import ExecutionContext as CX
37 | from neuraxle.base import ForceHandleMixin
38 |
39 |
40 | class ForceHandleMixinStep(ForceHandleMixin, BaseStep):
41 | """
42 | Please make your steps inherit from ForceHandleMixin when they only implement handle_methods, but also
43 | when you want to make impossible the use of regular fit, transform, and fit_transform methods
44 | Also, make sure that BaseStep is the last step you inherit from.
45 | """
46 |
47 | def __init__(self):
48 | BaseStep.__init__(self)
49 | ForceHandleMixin.__init__(self)
50 |
51 | def _fit_data_container(self, data_container: DACT, context: CX) -> BaseStep:
52 | """
53 | Change the shape of the data container.
54 | and/or
55 | Apply any side effects based on the data container
56 | And/or
57 | Change the execution flow of the pipeline
58 | """
59 | context.logger.info("Handling the 'fit' with handler method!")
60 | return self
61 |
62 | def _transform_data_container(self, data_container: DACT, context: CX) -> DACT:
63 | """
64 | Change the shape of the data container.
65 | and/or
66 | Apply any side effects based on the data container
67 | And/or
68 | Change the execution flow of the pipeline
69 | """
70 | context.logger.info("Handling the 'transform' with handler method!")
71 | return data_container
72 |
73 | def _fit_transform_data_container(
74 | self, data_container: DACT, context: CX
75 | ) -> Tuple[BaseStep, DACT]:
76 | """
77 | Change the shape of the data container.
78 | and/or
79 | Apply any side effects based on the data container
80 | And/or
81 | Change the execution flow of the pipeline
82 | """
83 | context.logger.info("Handling the 'fit_transform' with handler method!")
84 | return self, data_container
85 |
86 |
87 | def main():
88 | p = ForceHandleMixinStep()
89 | data_inputs = np.array([0, 1])
90 | expected_outputs = np.array([0, 1])
91 |
92 | p = p.fit(data_inputs, expected_outputs)
93 | outputs = p.transform(data_inputs)
94 |
95 |
96 | if __name__ == '__main__':
97 | main()
98 |
--------------------------------------------------------------------------------
/examples/getting_started/plot_inverse_transform.py:
--------------------------------------------------------------------------------
1 | """
2 | Inverse Transforms in Neuraxle: How to Reverse a Prediction
3 | ============================================================
4 |
5 | This demonstrates how to make a prediction, and then to undo the prediction to get back the original inputs or an
6 | estimate of the original inputs. Not every pipeline steps have an inverse transform method, because not every operation
7 | is reversible.
8 |
9 | ..
10 | Copyright 2019, Neuraxio Inc.
11 |
12 | Licensed under the Apache License, Version 2.0 (the "License");
13 | you may not use this file except in compliance with the License.
14 | You may obtain a copy of the License at
15 |
16 | http://www.apache.org/licenses/LICENSE-2.0
17 |
18 | Unless required by applicable law or agreed to in writing, software
19 | distributed under the License is distributed on an "AS IS" BASIS,
20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 | See the License for the specific language governing permissions and
22 | limitations under the License.
23 |
24 | ..
25 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
26 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
27 |
28 | """
29 |
30 | import numpy as np
31 |
32 | from neuraxle.pipeline import Pipeline
33 | from neuraxle.steps.numpy import MultiplyByN
34 |
35 |
36 | def main():
37 | p = Pipeline([MultiplyByN(multiply_by=2)])
38 |
39 | data_inputs = np.array([1, 2])
40 | generated_outputs = p.transform(data_inputs)
41 | regenerated_inputs = p.inverse_transform(generated_outputs)
42 |
43 | assert np.array_equal(regenerated_inputs, data_inputs)
44 | assert np.array_equal(generated_outputs, 2 * data_inputs)
45 |
46 |
47 | if __name__ == "__main__":
48 | main()
49 |
--------------------------------------------------------------------------------
/examples/getting_started/plot_label_encoder_across_multiple_columns.py:
--------------------------------------------------------------------------------
1 | """
2 | Create label encoder across multiple columns
3 | ==================================================
4 |
5 | You can apply label encoder to all columns using the ColumnTransformer step.
6 |
7 | This demonstrates how to use properly transform columns using neuraxle.
8 |
9 | For more info, see the `thread here `__.
10 |
11 | ..
12 | Copyright 2019, Neuraxio Inc.
13 |
14 | Licensed under the Apache License, Version 2.0 (the "License");
15 | you may not use this file except in compliance with the License.
16 | You may obtain a copy of the License at
17 |
18 | http://www.apache.org/licenses/LICENSE-2.0
19 |
20 | Unless required by applicable law or agreed to in writing, software
21 | distributed under the License is distributed on an "AS IS" BASIS,
22 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | See the License for the specific language governing permissions and
24 | limitations under the License.
25 |
26 | """
27 |
28 | import numpy as np
29 | import pandas
30 | from sklearn.preprocessing import LabelEncoder
31 |
32 | from neuraxle.steps.column_transformer import ColumnTransformer
33 | from neuraxle.steps.loop import FlattenForEach
34 |
35 | # Discussion:
36 | # https://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn
37 | df = pandas.DataFrame({
38 | 'pets': ['cat', 'dog', 'cat', 'monkey', 'dog', 'dog'],
39 | 'owner': ['Champ', 'Ron', 'Brick', 'Champ', 'Veronica', 'Ron'],
40 | 'location': ['San_Diego', 'New_York', 'New_York', 'San_Diego', 'San_Diego', 'New_York']
41 | })
42 |
43 |
44 | def _apply_same_encoder_to_all_columns():
45 | """
46 | One shared LabelEncoder will be applied on all the data to encode it.
47 | """
48 | p = FlattenForEach(LabelEncoder(), then_unflatten=True)
49 |
50 | p, predicted_output = p.fit_transform(df.values)
51 |
52 | expected_output = np.array([
53 | [6, 7, 6, 8, 7, 7],
54 | [1, 3, 0, 1, 5, 3],
55 | [4, 2, 2, 4, 4, 2]
56 | ]).transpose()
57 | assert np.array_equal(predicted_output, expected_output)
58 |
59 |
60 | def _apply_different_encoders_to_columns():
61 | """
62 | One standalone LabelEncoder will be applied on the pets,
63 | and another one will be shared for the columns owner and location.
64 | """
65 | p = ColumnTransformer([
66 | # A different encoder will be used for column 0 with name "pets":
67 | (0, FlattenForEach(LabelEncoder(), then_unflatten=True)),
68 | # A shared encoder will be used for column 1 and 2, "owner" and "location":
69 | ([1, 2], FlattenForEach(LabelEncoder(), then_unflatten=True)),
70 | ], n_dimension=2)
71 |
72 | p, predicted_output = p.fit_transform(df.values)
73 |
74 | expected_output = np.array([
75 | [0, 1, 0, 2, 1, 1],
76 | [1, 3, 0, 1, 5, 3],
77 | [4, 2, 2, 4, 4, 2]
78 | ]).transpose()
79 | assert np.array_equal(predicted_output, expected_output)
80 |
81 |
82 | def main():
83 | _apply_same_encoder_to_all_columns()
84 | _apply_different_encoders_to_columns()
85 |
86 |
87 | if __name__ == "__main__":
88 | main()
89 |
--------------------------------------------------------------------------------
/examples/getting_started/plot_nested_pipelines.py:
--------------------------------------------------------------------------------
1 | """
2 | Create Nested Pipelines in Neuraxle
3 | ================================================
4 |
5 | You can create pipelines within pipelines using the composition design pattern.
6 |
7 | This demonstrates how to create pipelines within pipelines, and how to access the steps and their
8 | attributes in the nested pipelines.
9 |
10 | For more info, see the `thread here `__.
11 |
12 | ..
13 | Copyright 2019, Neuraxio Inc.
14 |
15 | Licensed under the Apache License, Version 2.0 (the "License");
16 | you may not use this file except in compliance with the License.
17 | You may obtain a copy of the License at
18 |
19 | http://www.apache.org/licenses/LICENSE-2.0
20 |
21 | Unless required by applicable law or agreed to in writing, software
22 | distributed under the License is distributed on an "AS IS" BASIS,
23 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 | See the License for the specific language governing permissions and
25 | limitations under the License.
26 |
27 | ..
28 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
29 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
30 |
31 | """
32 |
33 | import numpy as np
34 | from sklearn.decomposition import PCA
35 | from sklearn.preprocessing import StandardScaler
36 |
37 | from neuraxle.base import Identity
38 | from neuraxle.pipeline import Pipeline
39 |
40 |
41 | def main():
42 | np.random.seed(42)
43 | X = np.random.randint(5, size=(100, 5))
44 |
45 | # Create and fit the pipeline:
46 | pipeline = Pipeline([
47 | StandardScaler(),
48 | Identity(),
49 | Pipeline([
50 | Identity(),
51 | Identity(), # Note: an Identity step is a step that does nothing.
52 | Identity(), # We use it here for demonstration purposes.
53 | Pipeline([
54 | Identity(),
55 | PCA(n_components=2)
56 | ])
57 | ])
58 | ])
59 | pipeline, X_t = pipeline.fit_transform(X)
60 |
61 | # Get the components:
62 | pca_components = pipeline["Pipeline"]["Pipeline"][-1].get_wrapped_sklearn_predictor().components_
63 | assert pca_components.shape == (2, 5)
64 |
65 | # Discussion:
66 | # https://stackoverflow.com/questions/28822756/getting-model-attributes-from-scikit-learn-pipeline/58359509#58359509
67 |
68 |
69 | if __name__ == "__main__":
70 | main()
71 |
--------------------------------------------------------------------------------
/examples/getting_started/plot_non_fittable_mixin.py:
--------------------------------------------------------------------------------
1 | """
2 | Create Pipeline Steps in Neuraxle that doesn't fit or transform
3 | ================================================================
4 |
5 | If a pipeline step doesn't need to be fitted and only transforms data (e.g.: taking the logarithm of the data),
6 | then you can inherit from the NonFittableMixin as demonstrated here, which will override the fit method properly
7 | for you. You can also use a NonTransformableMixin if your step doesn't transform anything, which is rarer. If your step
8 | simply just does nothing to the data, then you could even use the Identity class of Neuraxle, which is simply a class
9 | that inherits from both the NonFittableMixin, the NonTransformableMixin, and BaseStep.
10 |
11 | Mixins are an old Object Oriented Programming (OOP) design pattern that resurfaces when designing
12 | Machine Learning Pipelines. Those are add-ons to classes to implement some methods in some specific ways already.
13 | A mixin doesn't inherit from BaseStep itself, because we can combine many of them in one class. However, a mixin must
14 | suppose that the object that inherits from the mixin also inherits from it's base class. Here, our base class is the
15 | BaseStep class.
16 |
17 | ..
18 | Copyright 2019, Neuraxio Inc.
19 |
20 | Licensed under the Apache License, Version 2.0 (the "License");
21 | you may not use this file except in compliance with the License.
22 | You may obtain a copy of tche License at
23 |
24 | http://www.apache.org/licenses/LICENSE-2.0
25 |
26 | Unless required by applicable law or agreed to in writing, software
27 | distributed under the License is distributed on an "AS IS" BASIS,
28 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29 | See the License for the specific language governing permissions and
30 | limitations under the License.
31 |
32 | ..
33 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
34 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
35 |
36 | """
37 | import numpy as np
38 |
39 | from neuraxle.base import NonTransformableMixin, Identity, BaseStep, NonFittableMixin
40 | from neuraxle.pipeline import Pipeline
41 |
42 |
43 | class NonFittableStep(NonFittableMixin, BaseStep):
44 | """
45 | Fit method is automatically implemented as changing nothing.
46 | Please make your steps inherit from NonFittableMixin, when they don't need any fitting.
47 | Also, make sure that BaseStep is the last step you inherit from.
48 | Note that we could also define the inverse_transform method in the present object.
49 | """
50 | def __init__(self):
51 | BaseStep.__init__(self)
52 | NonFittableMixin.__init__(self)
53 |
54 | def transform(self, data_inputs):
55 | # insert your transform code here
56 | print("NonFittableStep: I transformed.")
57 | return data_inputs
58 |
59 |
60 | class NonTransformableStep(NonTransformableMixin, BaseStep):
61 | """
62 | Transform method is automatically implemented as returning data inputs as it is.
63 | Please make your steps inherit from NonTransformableMixin, when they don't need any transformations.
64 | Also, make sure that BaseStep is the last step you inherit from.
65 | """
66 | def __init__(self):
67 | BaseStep.__init__(self)
68 | NonTransformableMixin.__init__(self)
69 |
70 | def fit(self, data_inputs, expected_outputs=None) -> 'NonTransformableStep':
71 | # insert your fit code here
72 | print("NonTransformableStep: I fitted.")
73 | return self
74 |
75 |
76 | def main():
77 | p = Pipeline([
78 | NonFittableStep(),
79 | NonTransformableStep(),
80 | Identity() # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin.
81 | ])
82 |
83 | some_data = np.array([0, 1])
84 | p = p.fit(some_data)
85 | # Out:
86 | # NonFittableStep: I transformed.
87 | # NonTransformableStep: I fitted.
88 |
89 | out = p.transform(some_data)
90 | # Out:
91 | # NonFittableStep: I transformed.
92 |
93 | assert np.array_equal(out, some_data)
94 | # Data is unchanged as we've done nothing in the only transform.
95 |
96 |
97 | if __name__ == "__main__":
98 | main()
99 |
--------------------------------------------------------------------------------
/examples/hyperparams/README.txt:
--------------------------------------------------------------------------------
1 | .. _real_world_examples:
2 |
3 | Hyperparameters
4 | -------------------------------------
5 |
6 | This demonstrates how to add hyperparameters to Neuraxle pipelines.
7 |
--------------------------------------------------------------------------------
/examples/hyperparams/plot_hyperparams.py:
--------------------------------------------------------------------------------
1 | """
2 | Manipulate Hyperparameter Spaces for Hyperparameter Tuning
3 | ===========================================================
4 |
5 | This demonstrates how to manipulate hyperparameters and hyperparameter spaces.
6 |
7 | ..
8 | Copyright 2019, Neuraxio Inc.
9 |
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 |
14 | http://www.apache.org/licenses/LICENSE-2.0
15 |
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.
21 |
22 | ..
23 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
24 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
25 |
26 | """
27 |
28 | from sklearn.decomposition import PCA
29 |
30 | from neuraxle.base import Identity
31 | from neuraxle.hyperparams.distributions import RandInt
32 | from neuraxle.hyperparams.space import HyperparameterSpace
33 | from neuraxle.pipeline import Pipeline
34 | from neuraxle.steps.numpy import MultiplyByN
35 |
36 |
37 | def main():
38 | p = Pipeline([
39 | ('step1', MultiplyByN()),
40 | ('step2', MultiplyByN()),
41 | Pipeline([
42 | Identity(),
43 | Identity(),
44 | PCA(n_components=4)
45 | ])
46 | ])
47 |
48 | p.set_hyperparams_space({
49 | 'step1__multiply_by': RandInt(42, 50),
50 | 'step2__multiply_by': RandInt(-10, 0),
51 | 'Pipeline__PCA__n_components': RandInt(2, 3)
52 | })
53 |
54 | samples = p.get_hyperparams_space().rvs()
55 | p.set_hyperparams(samples)
56 |
57 | samples = p.get_hyperparams()
58 | assert 42 <= samples['step1__multiply_by'] <= 50
59 | assert -10 <= samples['step2__multiply_by'] <= 0
60 | assert samples['Pipeline__PCA__n_components'] in [2, 3]
61 | assert p['Pipeline']['PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3]
62 |
63 |
64 | if __name__ == "__main__":
65 | main()
66 |
--------------------------------------------------------------------------------
/examples/operations/plot_apply_method.py:
--------------------------------------------------------------------------------
1 | """
2 | Apply recursive operations to a pipeline
3 | ===========================================================
4 |
5 | This demonstrates how to apply a method to each pipeline step.
6 |
7 | ..
8 | Copyright 2019, Neuraxio Inc.
9 |
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 |
14 | http://www.apache.org/licenses/LICENSE-2.0
15 |
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.
21 |
22 | """
23 | import json
24 |
25 | from scipy.stats import randint
26 |
27 | from neuraxle.base import Identity
28 | from neuraxle.hyperparams.space import RecursiveDict, HyperparameterSamples, HyperparameterSpace
29 | from neuraxle.pipeline import Pipeline
30 |
31 |
32 | class IdentityWithRvs(Identity):
33 | def _rvs(self):
34 | return HyperparameterSamples(self.hyperparams_space.rvs())
35 |
36 |
37 | def rvs(step) -> RecursiveDict:
38 | return HyperparameterSamples(step.hyperparams_space.rvs())
39 |
40 |
41 | def main():
42 | p = Pipeline([
43 | IdentityWithRvs().set_hyperparams_space(HyperparameterSpace({
44 | 'a': randint(low=2, high=5)
45 | })),
46 | IdentityWithRvs().set_hyperparams_space(HyperparameterSpace({
47 | 'b': randint(low=100, high=400)
48 | }))
49 | ])
50 |
51 | samples: HyperparameterSamples = p.apply(rvs)
52 | print('p.apply(rvs) ==>')
53 | print(json.dumps(samples, indent=4))
54 |
55 | # or equivalently:
56 |
57 | samples: HyperparameterSamples = p.apply('_rvs')
58 | print('p.apply(\'_rvs\') ==>')
59 | print(json.dumps(samples, indent=4))
60 |
61 |
62 | if __name__ == '__main__':
63 | main()
64 |
--------------------------------------------------------------------------------
/examples/parallel/README.txt:
--------------------------------------------------------------------------------
1 | .. _real_world_examples:
2 |
3 | Parallel
4 | -------------------------------------
5 |
6 | This demonstrates how parallel processing works in Neuraxle.
--------------------------------------------------------------------------------
/examples/parallel/plot_streaming_pipeline.py:
--------------------------------------------------------------------------------
1 | """
2 | Parallel processing in Neuraxle
3 | ===================================================================
4 |
5 | This demonstrates how to stream data in parallel in a Neuraxle pipeline.
6 | The pipeline steps' parallelism here will be obvious.
7 |
8 | The pipeline has two steps:
9 | 1. Preprocessing: the step that process the data simply sleeps.
10 | 2. Model: the model simply multiplies the data by two.
11 |
12 | This can be used with scikit-learn as well to transform things in parallel,
13 | and any other library such as tensorflow.
14 |
15 | Pipelines benchmarked:
16 | 1. We first use a classical pipeline and evaluate the time.
17 | 2. Then we use a minibatched pipeline and we evaluate the time.
18 | 3. Then we use a parallel pipeline and we evaluate the time.
19 |
20 | We expect the parallel pipeline to be faster due to having more workers
21 | in parallel, as well as starting the model's transformations at the same
22 | time that other batches are being preprocessed, using queues.
23 |
24 |
25 | ..
26 | Copyright 2022, Neuraxio Inc.
27 |
28 | Licensed under the Apache License, Version 2.0 (the "License");
29 | you may not use this file except in compliance with the License.
30 | You may obtain a copy of the License at
31 |
32 | http://www.apache.org/licenses/LICENSE-2.0
33 |
34 | Unless required by applicable law or agreed to in writing, software
35 | distributed under the License is distributed on an "AS IS" BASIS,
36 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 | See the License for the specific language governing permissions and
38 | limitations under the License.
39 |
40 |
41 | """
42 | import time
43 | import numpy as np
44 | from neuraxle.base import ExecutionContext as CX
45 |
46 | from neuraxle.distributed.streaming import SequentialQueuedPipeline
47 | from neuraxle.pipeline import BasePipeline, Pipeline, MiniBatchSequentialPipeline
48 | from neuraxle.steps.loop import ForEach
49 | from neuraxle.steps.misc import Sleep
50 | from neuraxle.steps.numpy import MultiplyByN
51 |
52 |
53 | def eval_run_time(pipeline: BasePipeline):
54 | pipeline.setup(CX())
55 | a = time.time()
56 | output = pipeline.transform(list(range(100)))
57 | b = time.time()
58 | seconds = b - a
59 | return seconds, output
60 |
61 |
62 | def main():
63 | """
64 | The task is to sleep 0.02 seconds for each data input and then multiply by 2.
65 | """
66 | sleep_time = 0.02
67 | preprocessing_and_model_steps = [ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]
68 |
69 | # Classical pipeline - all at once with one big batch:
70 | p = Pipeline(preprocessing_and_model_steps)
71 | time_vanilla_pipeline, output_classical = eval_run_time(p)
72 | print(f"Classical 'Pipeline' execution time: {time_vanilla_pipeline} seconds.")
73 |
74 | # Classical minibatch pipeline - minibatch size 5:
75 | p = MiniBatchSequentialPipeline(preprocessing_and_model_steps,
76 | batch_size=5)
77 | time_minibatch_pipeline, output_minibatch = eval_run_time(p)
78 | print(f"Minibatched 'MiniBatchSequentialPipeline' execution time: {time_minibatch_pipeline} seconds.")
79 |
80 | # Parallel pipeline - minibatch size 5 with 4 parallel workers per step that
81 | # have a max queue size of 10 batches between preprocessing and the model:
82 | p = SequentialQueuedPipeline(preprocessing_and_model_steps,
83 | n_workers_per_step=4, max_queued_minibatches=10, batch_size=5)
84 | time_parallel_pipeline, output_parallel = eval_run_time(p)
85 | print(f"Parallel 'SequentialQueuedPipeline' execution time: {time_parallel_pipeline} seconds.")
86 |
87 | assert np.array_equal(output_classical, output_minibatch)
88 | assert np.array_equal(output_classical, output_parallel)
89 | assert time_parallel_pipeline < time_minibatch_pipeline, str((time_parallel_pipeline, time_vanilla_pipeline))
90 |
91 |
92 | if __name__ == '__main__':
93 | main()
94 |
--------------------------------------------------------------------------------
/examples/sklearn/README.txt:
--------------------------------------------------------------------------------
1 | .. _real_world_examples:
2 |
3 | Neuraxle hyperparameter examples
4 | -------------------------------------
5 |
6 | This demonstrates how to use sklearn classes in a Neuraxle pipeline.
7 |
--------------------------------------------------------------------------------
/examples/sklearn/plot_boston_housing_meta_optimization.py:
--------------------------------------------------------------------------------
1 | """
2 | Boston Housing Regression with Meta Optimization
3 | ================================================
4 |
5 | This is an automatic machine learning example. It is more sophisticated than the other simple regression example.
6 | Not only a pipeline is defined, but also an hyperparameter space is defined for the pipeline. Then, a random search is
7 | performed to find the best possible combination of hyperparameters by sampling randomly in the hyperparameter space.
8 |
9 | ..
10 | Copyright 2022, Neuraxio Inc.
11 |
12 | Licensed under the Apache License, Version 2.0 (the "License");
13 | you may not use this file except in compliance with the License.
14 | You may obtain a copy of the License at
15 |
16 | http://www.apache.org/licenses/LICENSE-2.0
17 |
18 | Unless required by applicable law or agreed to in writing, software
19 | distributed under the License is distributed on an "AS IS" BASIS,
20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 | See the License for the specific language governing permissions and
22 | limitations under the License.
23 |
24 | ..
25 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
26 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
27 |
28 | """
29 |
30 | import numpy as np
31 | from sklearn.cluster import KMeans
32 | from sklearn.datasets import load_boston
33 | from sklearn.decomposition import PCA, FastICA
34 | from sklearn.ensemble import GradientBoostingRegressor
35 | from sklearn.linear_model import Ridge
36 | from sklearn.metrics import r2_score, mean_squared_error
37 | from sklearn.model_selection import train_test_split
38 | from sklearn.utils import shuffle
39 |
40 | from neuraxle.hyperparams.distributions import RandInt, LogUniform, Boolean
41 | from neuraxle.hyperparams.space import HyperparameterSpace
42 | from neuraxle.metaopt.auto_ml import AutoML, ValidationSplitter
43 | from neuraxle.metaopt.callbacks import MetricCallback
44 | from neuraxle.pipeline import Pipeline
45 | from neuraxle.steps.numpy import NumpyTranspose
46 | from neuraxle.steps.sklearn import SKLearnWrapper
47 | from neuraxle.union import AddFeatures, ModelStacking
48 |
49 |
50 | def main(tmpdir):
51 | boston = load_boston()
52 | X, y = shuffle(boston.data, boston.target, random_state=13)
53 | X = X.astype(np.float32)
54 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
55 |
56 | # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
57 | # within the classes ar their definition if using custom classes, or also it could be defined after declaring the
58 | # pipeline using a flat dict or a nested dict.
59 |
60 | p = Pipeline([
61 | AddFeatures([
62 | SKLearnWrapper(
63 | PCA(n_components=2),
64 | HyperparameterSpace({"n_components": RandInt(1, 3)})
65 | ),
66 | SKLearnWrapper(
67 | FastICA(n_components=2),
68 | HyperparameterSpace({"n_components": RandInt(1, 3)})
69 | ),
70 | ]),
71 | ModelStacking([
72 | SKLearnWrapper(
73 | GradientBoostingRegressor(),
74 | HyperparameterSpace({
75 | "n_estimators": RandInt(50, 300), "max_depth": RandInt(1, 4),
76 | "learning_rate": LogUniform(0.07, 0.7)
77 | })
78 | ),
79 | SKLearnWrapper(
80 | KMeans(),
81 | HyperparameterSpace({"n_clusters": RandInt(5, 10)})
82 | ),
83 | ],
84 | joiner=NumpyTranspose(),
85 | judge=SKLearnWrapper(
86 | Ridge(),
87 | HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})),
88 | )
89 | ])
90 |
91 | print("Meta-fitting on train:")
92 | auto_ml = AutoML(
93 | p,
94 | validation_splitter=ValidationSplitter(0.20),
95 | n_trials=10,
96 | epochs=1, # 1 epoch here due to using sklearn models that just fit once.
97 | callbacks=[MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)],
98 | )
99 |
100 | fitted_random_search = auto_ml.fit(X_train, y_train)
101 | print("")
102 |
103 | print("Transforming train and test:")
104 | y_train_predicted = fitted_random_search.predict(X_train)
105 | y_test_predicted = fitted_random_search.predict(X_test)
106 |
107 | print("")
108 |
109 | print("Evaluating transformed train:")
110 | score_transform = r2_score(y_train_predicted, y_train)
111 | print('R2 regression score:', score_transform)
112 |
113 | print("")
114 |
115 | print("Evaluating transformed test:")
116 | score_test = r2_score(y_test_predicted, y_test)
117 | print('R2 regression score:', score_test)
118 |
119 |
120 | if __name__ == "__main__":
121 | main('cache')
122 |
--------------------------------------------------------------------------------
/examples/sklearn/plot_boston_housing_regression_with_model_stacking.py:
--------------------------------------------------------------------------------
1 | """
2 | Boston Housing Regression
3 | ==========================
4 |
5 | This example solves a regression problem using a pipeline with the following steps:
6 |
7 | - Feature augmentation with PCA and Fast ICA,
8 | - A Pre-regression using an ensemble containing gradient boosted, and a KMeans clustering for even more features in the stacking,
9 | - The model stacking using a ridge regression.
10 |
11 | This example also prints the shapes of the objects between the pipeline elements.
12 |
13 | ..
14 | Copyright 2019, Neuraxio Inc.
15 |
16 | Licensed under the Apache License, Version 2.0 (the "License");
17 | you may not use this file except in compliance with the License.
18 | You may obtain a copy of the License at
19 |
20 | http://www.apache.org/licenses/LICENSE-2.0
21 |
22 | Unless required by applicable law or agreed to in writing, software
23 | distributed under the License is distributed on an "AS IS" BASIS,
24 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | See the License for the specific language governing permissions and
26 | limitations under the License.
27 |
28 | ..
29 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
30 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
31 |
32 | """
33 |
34 | import numpy as np
35 | from sklearn.cluster import KMeans
36 | from sklearn.datasets import load_boston
37 | from sklearn.decomposition import PCA, FastICA
38 | from sklearn.ensemble import GradientBoostingRegressor
39 | from sklearn.metrics import r2_score
40 | from sklearn.model_selection import train_test_split
41 | from sklearn.utils import shuffle
42 |
43 | from neuraxle.pipeline import Pipeline
44 | from neuraxle.steps.numpy import NumpyShapePrinter
45 | from neuraxle.steps.sklearn import RidgeModelStacking
46 | from neuraxle.union import AddFeatures
47 |
48 |
49 | def main():
50 | boston = load_boston()
51 | X, y = shuffle(boston.data, boston.target, random_state=13)
52 | X = X.astype(np.float32)
53 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
54 |
55 | p = Pipeline([
56 | NumpyShapePrinter(),
57 | AddFeatures([
58 | PCA(n_components=2),
59 | FastICA(n_components=2),
60 | ]),
61 | NumpyShapePrinter(),
62 | RidgeModelStacking([
63 | GradientBoostingRegressor(),
64 | GradientBoostingRegressor(n_estimators=500),
65 | GradientBoostingRegressor(max_depth=5),
66 | KMeans(),
67 | ]),
68 | NumpyShapePrinter(),
69 | ])
70 |
71 | print("Fitting on train:")
72 | p = p.fit(X_train, y_train)
73 | print("")
74 | print("Transforming train and test:")
75 | y_train_predicted = p.predict(X_train)
76 | y_test_predicted = p.predict(X_test)
77 | print("")
78 | print("Evaluating transformed train:")
79 | score_train = r2_score(y_train_predicted, y_train)
80 | print('R2 regression score:', score_train)
81 | print("")
82 | print("Evaluating transformed test:")
83 | score_test = r2_score(y_test_predicted, y_test)
84 | print('R2 regression score:', score_test)
85 |
86 | assert y_train_predicted.shape == (379,)
87 | assert y_test_predicted.shape == (127,)
88 | assert isinstance(score_train, float)
89 | assert isinstance(score_test, float)
90 |
91 | return y_train_predicted, y_test_predicted, score_train, score_test
92 |
93 |
94 | if __name__ == "__main__":
95 | main()
96 |
--------------------------------------------------------------------------------
/flake8.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | flake8 neuraxle testing_neuraxle --count --max-line-length=120 --select=E9,F63,F7,F82 --statistics --show-source
3 |
4 |
--------------------------------------------------------------------------------
/neuraxle/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.8.1"
2 |
--------------------------------------------------------------------------------
/neuraxle/distributed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/distributed/__init__.py
--------------------------------------------------------------------------------
/neuraxle/hyperparams/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/hyperparams/__init__.py
--------------------------------------------------------------------------------
/neuraxle/logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/logging/__init__.py
--------------------------------------------------------------------------------
/neuraxle/logging/warnings.py:
--------------------------------------------------------------------------------
1 | """
2 | Neuraxle's Deprecation Warnings
3 | ====================================
4 | Code evolves through time. When updating Neuraxle, you may find
5 | that some old arguments you were using or some classes you were
6 | using changed. Warnings will be printed using the methods here.
7 |
8 | ..
9 | Copyright 2019, Neuraxio Inc.
10 |
11 | Licensed under the Apache License, Version 2.0 (the "License");
12 | you may not use this file except in compliance with the License.
13 | You may obtain a copy of the License at
14 |
15 | http://www.apache.org/licenses/LICENSE-2.0
16 |
17 | Unless required by applicable law or agreed to in writing, software
18 | distributed under the License is distributed on an "AS IS" BASIS,
19 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 | See the License for the specific language governing permissions and
21 | limitations under the License.
22 |
23 | """
24 |
25 | import warnings
26 |
27 |
28 | SILENCE_DEPRECATION_WARNING = False
29 |
30 |
31 | def silence_all_deprecation_warnings():
32 | """
33 | Turn off all of the neuraxle.logging.warnings for deprecations.
34 | """
35 | global SILENCE_DEPRECATION_WARNING
36 | SILENCE_DEPRECATION_WARNING = True
37 |
38 |
39 | def warn_deprecated_class(self, replacement_class: type = None, as_per_version: str = None):
40 | global SILENCE_DEPRECATION_WARNING
41 | if not SILENCE_DEPRECATION_WARNING and replacement_class is not None:
42 | warnings.warn(
43 | _deprecated_class_msg(replacement_class, as_per_version) + _deact_msg_instructions()
44 | )
45 | return self
46 |
47 |
48 | class RaiseDeprecatedClass:
49 | """
50 | Use this class and call it's __init__ method to raise a
51 | DeprecationWarning and point to the good class to use.
52 | """
53 |
54 | def __init__(self, replacement_class: type = None, since_version: str = None) -> None:
55 | raise_deprecated_class(replacement_class, since_version)
56 |
57 |
58 | def raise_deprecated_class(replacement_class: type = None, since_version: str = None):
59 | raise DeprecationWarning(_deprecated_class_msg(replacement_class, since_version))
60 |
61 |
62 | def _deprecated_class_msg(self, replacement_class: type = None, since_version: str = None) -> str:
63 | return (
64 | f"The class `{self.__class__.__name__}` is deprecated"
65 | f" since version `neuraxle>={since_version}`." if since_version is not None else "."
66 | f" Please consider using the class `{replacement_class.__name__}` instead: visit https://www.neuraxle.org/stable/search.html?q={replacement_class.__name__} for more information" if (
67 | hasattr(replacement_class, "__name__") and replacement_class.__name__ is not None) else " Visit https://www.neuraxle.org/stable/api.html for more information."
68 | )
69 |
70 |
71 | def warn_deprecated_arg(self, arg_name, default_value, value, replacement_argument_name, replacement_class: type = None):
72 | global SILENCE_DEPRECATION_WARNING
73 | if not SILENCE_DEPRECATION_WARNING and default_value != value:
74 | if isinstance(replacement_class, type):
75 | replacement_class = replacement_class.__name__
76 | warnings.warn(
77 | f"Argument `{arg_name}={value}` for class `{self.__class__.__name__}` is deprecated. "
78 | f"Please consider using `{replacement_argument_name}` "
79 | f"or the class `{replacement_class}` " if replacement_class is not None else ""
80 | f"instead. "
81 | f"{_deact_msg_instructions()}"
82 | )
83 | return self
84 |
85 |
86 | def _deact_msg_instructions() -> str:
87 | return (
88 | " If you want to disable these warnings,"
89 | " call `neuraxle.logging.warnings.silence_all_deprecation_warnings()`."
90 | )
91 |
--------------------------------------------------------------------------------
/neuraxle/metaopt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/metaopt/__init__.py
--------------------------------------------------------------------------------
/neuraxle/metaopt/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/metaopt/data/__init__.py
--------------------------------------------------------------------------------
/neuraxle/metaopt/hyperopt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/metaopt/hyperopt/__init__.py
--------------------------------------------------------------------------------
/neuraxle/metaopt/repositories/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/metaopt/repositories/__init__.py
--------------------------------------------------------------------------------
/neuraxle/rest/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/rest/__init__.py
--------------------------------------------------------------------------------
/neuraxle/rest/flask.py:
--------------------------------------------------------------------------------
1 | """
2 | Neuraxle's Flask Wrapper classes
3 | ====================================
4 | The flask wrapper classes are used to easily serve pipeline predictions using a flask rest api.
5 |
6 | ..
7 | Copyright 2019, Neuraxio Inc.
8 |
9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 |
13 | http://www.apache.org/licenses/LICENSE-2.0
14 |
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 |
21 | ..
22 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning
23 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc.
24 |
25 | """
26 | from abc import ABC, abstractmethod
27 |
28 | import numpy as np
29 | from flask import Response
30 |
31 | from neuraxle.base import BaseStep
32 | from neuraxle.pipeline import Pipeline
33 |
34 |
35 | class JSONDataBodyDecoder(BaseStep, ABC):
36 | """
37 | Class to be used within a FlaskRESTApiWrapper to convert input json to actual data (e.g.: arrays)
38 | """
39 |
40 | def transform(self, data_inputs):
41 | return self.decode(data_inputs)
42 |
43 | @abstractmethod
44 | def decode(self, data_inputs: dict):
45 | """
46 | Will convert data_inputs to a dict or a compatible data structure for jsonification
47 |
48 | :param encoded data_inputs (dict parsed from json):
49 | :return: data_inputs (as a data structure compatible with pipeline's data inputs)
50 | """
51 | raise NotImplementedError("TODO: inherit from the `JSONDataBodyDecoder` class and implement this method.")
52 |
53 |
54 | class JSONDataResponseEncoder(BaseStep, ABC):
55 | """
56 | Base class to be used within a FlaskRESTApiWrapper to convert prediction output to json response.
57 | """
58 |
59 | def transform(self, data_inputs) -> Response:
60 | """
61 | Transform processed data inputs into a flask response object.
62 |
63 | :param data_inputs:
64 | :return: flask response object
65 | """
66 | from flask import jsonify
67 | return jsonify(self.encode(data_inputs))
68 |
69 | @abstractmethod
70 | def encode(self, data_inputs) -> dict:
71 | """
72 | Convert data_inputs to a dict or a compatible data structure for jsonification.
73 |
74 | :param data_inputs (a data structure outputted by the pipeline after a transform):
75 | :return: encoded data_inputs (jsonifiable dict)
76 | """
77 | raise NotImplementedError("TODO: inherit from the `JSONDataResponseEncoder` class and implement this method.")
78 |
79 |
80 | class FlaskRestApiWrapper(Pipeline):
81 | """
82 | Wrap a pipeline to easily deploy it to a REST API. Just provide a json encoder and a json decoder.
83 |
84 | Usage example:
85 |
86 | ```
87 | class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder):
88 | '''This is a custom JSON decoder class that precedes the pipeline's transformation.'''
89 |
90 | def decode(self, data_inputs: dict):
91 | values_in_json_2d_arr: List[List[int]] = data_inputs["values"]
92 | return np.array(values_in_json_2d_arr)
93 |
94 | class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder):
95 | '''This is a custom JSON response encoder class for converting the pipeline's transformation outputs.'''
96 |
97 | def encode(self, data_inputs) -> dict:
98 | return {
99 | 'predictions': list(data_inputs)
100 | }
101 |
102 | app = FlaskRestApiWrapper(
103 | json_decoder=CustomJSONDecoderFor2DArray(),
104 | wrapped=Pipeline(...),
105 | json_encoder=CustomJSONEncoderOfOutputs(),
106 | ).get_app()
107 |
108 | app.run(debug=False, port=5000)
109 | ```
110 | """
111 |
112 | def __init__(
113 | self,
114 | json_decoder: JSONDataBodyDecoder,
115 | wrapped: BaseStep,
116 | json_encoder: JSONDataResponseEncoder,
117 | route='/'):
118 | Pipeline.__init__(self, [
119 | json_decoder,
120 | wrapped,
121 | json_encoder
122 | ])
123 | self.route: str = route
124 |
125 | def get_app(self):
126 | """
127 | This methods returns a REST API wrapping the pipeline.
128 |
129 | :return: a Flask app (as given by `app = Flask(__name__)` and then configured).
130 | """
131 | from flask import Flask, request
132 | from flask_restful import Api, Resource
133 |
134 | app = Flask(__name__)
135 | api = Api(app)
136 | wrapped = self
137 |
138 | class RESTfulRes(Resource):
139 | def get(self):
140 | return wrapped.transform(request.get_json())
141 |
142 | api.add_resource(
143 | RESTfulRes,
144 | self.route
145 | )
146 |
147 | return app
148 |
--------------------------------------------------------------------------------
/neuraxle/steps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/steps/__init__.py
--------------------------------------------------------------------------------
/neuraxle/steps/features.py:
--------------------------------------------------------------------------------
1 | """
2 | Featurization Steps
3 | ==========================================================
4 | You can find here steps that featurize your data.
5 |
6 | ..
7 | Copyright 2019, Neuraxio Inc.
8 |
9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 |
13 | http://www.apache.org/licenses/LICENSE-2.0
14 |
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 |
21 | """
22 | from neuraxle.pipeline import Pipeline
23 | from neuraxle.steps.flow import ChooseOneOrManyStepsOf
24 | from neuraxle.steps.numpy import NumpyFFT, NumpyAbs, NumpyFlattenDatum, NumpyConcatenateInnerFeatures, NumpyMean, \
25 | NumpyMedian, NumpyMin, NumpyMax, NumpyArgMax
26 | from neuraxle.union import FeatureUnion
27 |
28 |
29 | class FFTPeakBinWithValue(FeatureUnion):
30 | """
31 | Compute peak fft bins (int), and their magnitudes' value (float), to concatenate them.
32 | This is intended to be used only after a NumpyFFT absolute step.
33 |
34 | .. seealso::
35 | :class:`~neuraxle.base.BaseStep`,
36 | :class:`~neuraxle.base.NonFittableMixin`,
37 | :class:`~neuraxle.steps.numpy.NumpyFFT`,
38 | :class:`Cheap3DTo2DTransformer`
39 | """
40 | def __init__(self):
41 | super().__init__([
42 | NumpyArgMax(axis=-2),
43 | NumpyMax(axis=-2)
44 | ], joiner=NumpyConcatenateInnerFeatures())
45 |
46 |
47 | class Cheap3DTo2DTransformer(ChooseOneOrManyStepsOf):
48 | """
49 | Prebuild class to featurize 3D data into 2D data for simple classification or regression, for instance.
50 |
51 | You can enable, or disable features using hyperparams :
52 |
53 | .. code-block:: python
54 |
55 | step = Cheap3DTo2DTransformer().set_hyperparams(hyperparams={
56 | 'FFT__enabled': True,
57 | 'NumpyMean__enabled': True,
58 | 'NumpyMedian__enabled': True,
59 | 'NumpyMin__enabled': True,
60 | 'NumpyMax__enabled': True
61 | })
62 |
63 | .. seealso::
64 | :class:`~neuraxle.steps.flow.ChooseOneOrManyStepsOf`,
65 | :class:`~neuraxle.steps.numpy.NumpyFFT`,
66 | :class:`~neuraxle.steps.numpy.NumpyAbs`,
67 | :class:`~neuraxle.steps.numpy.NumpyFlattenDatum`,
68 | :class:`FFTPeakBinWithValue`,
69 | :class:`~neuraxle.steps.numpy.NumpyConcatenateInnerFeatures`,
70 | :class:`~neuraxle.steps.numpy.NumpyMean`,
71 | :class:`~neuraxle.steps.numpy.NumpyMedian`,
72 | :class:`~neuraxle.steps.numpy.NumpyMin`,
73 | :class:`~neuraxle.steps.numpy.NumpyMax`
74 | """
75 |
76 | def __init__(self):
77 | super().__init__([
78 | Pipeline([
79 | NumpyFFT(),
80 | NumpyAbs(),
81 | FeatureUnion([
82 | NumpyFlattenDatum(), # Reshape from 3D to flat 2D: flattening data except on batch size
83 | FFTPeakBinWithValue() # Extract 2D features from the 3D FFT bins
84 | ], joiner=NumpyConcatenateInnerFeatures())
85 | ]).set_name('FFT'),
86 | NumpyMean(),
87 | NumpyMedian(),
88 | NumpyMin(),
89 | NumpyMax()
90 | ])
91 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools
2 | pytest>=6.0.0
3 | pytest-cov>=2.6.1
4 | numpy>=1.16.2
5 | matplotlib==3.3.4
6 | scikit-learn>=0.24.1
7 | scipy>=1.4.1
8 | pandas>=1.3.5
9 | joblib>=0.13.2
10 | flask==1.1.4
11 | flask-restful>=0.3.9
12 | SQLAlchemy==1.4.26
13 | markupsafe==2.0.1
14 | pytest-timeout>=2.1.0
15 |
--------------------------------------------------------------------------------
/run_quick_tests.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | py.test testing_neuraxle/ -n 10 --ignore=testing_neuraxle/metaopt/test_tpe.py --ignore=testing_neuraxle/examples/test_examples.py --disable-pytest-warnings --durations=10 --timeout=100 $1 $2 $3 $4
3 |
4 |
--------------------------------------------------------------------------------
/run_slow_tests.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | py.test -n 10 testing_neuraxle/metaopt/test_tpe.py testing_neuraxle/examples/test_examples.py --disable-pytest-warnings --durations=10 --timeout=100
3 |
4 |
--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | py.test -n 10 testing_neuraxle/ --disable-pytest-warnings --durations=10 --timeout=100
3 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
3 |
4 |
5 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """
2 | Neuraxle setup file
3 | ====================================
4 | Setup file specifying the python version and so forth.
5 |
6 | ..
7 | Copyright 2019, Neuraxio Inc.
8 |
9 | Licensed under the Apache License, Version 2.0 (the "License");
10 | you may not use this file except in compliance with the License.
11 | You may obtain a copy of the License at
12 |
13 | http://www.apache.org/licenses/LICENSE-2.0
14 |
15 | Unless required by applicable law or agreed to in writing, software
16 | distributed under the License is distributed on an "AS IS" BASIS,
17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | See the License for the specific language governing permissions and
19 | limitations under the License.
20 |
21 | """
22 |
23 | from setuptools import setup, find_packages
24 |
25 | from neuraxle import __version__ as _VERSION
26 |
27 | with open('README.rst') as _f:
28 | _README = _f.read()
29 |
30 | setup(
31 | name='neuraxle',
32 | version=_VERSION,
33 | description='Neuraxle is a Machine Learning (ML) library for building neat pipelines, providing the right '
34 | 'abstractions to both ease research, development, and deployment of your ML applications.',
35 | long_description=_README,
36 | classifiers=[
37 | "Development Status :: 4 - Beta",
38 | "Intended Audience :: Developers",
39 | "Intended Audience :: Education",
40 | "Intended Audience :: Financial and Insurance Industry",
41 | "Intended Audience :: Healthcare Industry",
42 | "Intended Audience :: Information Technology",
43 | "Intended Audience :: Manufacturing",
44 | "Intended Audience :: Science/Research",
45 | "Intended Audience :: System Administrators",
46 | "Intended Audience :: Telecommunications Industry",
47 | 'License :: OSI Approved :: Apache Software License',
48 | "Natural Language :: English",
49 | "Operating System :: OS Independent",
50 | 'Programming Language :: Python :: 3.7',
51 | 'Programming Language :: Python :: 3.8',
52 | 'Programming Language :: Python :: 3.9',
53 | "Topic :: Adaptive Technologies",
54 | "Topic :: Office/Business",
55 | "Topic :: Scientific/Engineering",
56 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
57 | "Topic :: Scientific/Engineering :: Artificial Life",
58 | "Topic :: Scientific/Engineering :: Bio-Informatics",
59 | "Topic :: Scientific/Engineering :: Image Recognition",
60 | "Topic :: Scientific/Engineering :: Information Analysis",
61 | "Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator",
62 | "Topic :: Scientific/Engineering :: Mathematics",
63 | "Topic :: Scientific/Engineering :: Medical Science Apps.",
64 | "Topic :: Scientific/Engineering :: Physics",
65 | "Topic :: Software Development",
66 | "Topic :: Software Development :: Assemblers",
67 | "Topic :: Software Development :: Build Tools",
68 | "Topic :: Software Development :: Libraries",
69 | "Topic :: Software Development :: Libraries :: Python Modules",
70 | "Topic :: Software Development :: Object Brokering",
71 | "Topic :: Software Development :: Pre-processors",
72 | "Topic :: Software Development :: Quality Assurance",
73 | "Topic :: Software Development :: Testing",
74 | "Topic :: System",
75 | "Topic :: System :: Clustering",
76 | "Topic :: System :: Distributed Computing",
77 | "Topic :: System :: Networking",
78 | # Topic :: System :: Systems Administration,
79 | "Topic :: Text Processing",
80 | "Topic :: Text Processing :: Filters",
81 | "Topic :: Text Processing :: Linguistic",
82 | "Topic :: Utilities",
83 | "Typing :: Typed"
84 | ],
85 | url='https://github.com/Neuraxio/Neuraxle',
86 | download_url='https://github.com/Neuraxio/Neuraxle/tarball/{}'.format(
87 | _VERSION),
88 | author='Neuraxio Inc.',
89 | author_email='guillaume.chevalier@neuraxio.com',
90 | packages=find_packages(include=['neuraxle*']),
91 | test_suite="testing_neuraxle",
92 | setup_requires=["pytest-runner"],
93 | install_requires=[
94 | 'numpy>=1.16.2',
95 | 'scipy>=1.4.1',
96 | 'scikit-learn>=0.24.1',
97 | 'matplotlib==3.3.4',
98 | 'joblib>=0.13.2',
99 | 'Flask>=1.1.4',
100 | 'Flask-RESTful>=0.3.9',
101 | 'markupsafe==2.0.1',
102 | 'pandas>=1.3.5',
103 | ],
104 | tests_require=[
105 | "pytest",
106 | "pytest-cov",
107 | "pytest-timeout>=2.1.0",
108 | "scikit-learn>=0.24.1"
109 | ],
110 | include_package_data=True,
111 | license='Apache 2.0',
112 | keywords='pipeline pipelines data science machine learning deep learning neuraxle sklearn scikit-learn scipy numpy pandas tensorflow'
113 | )
114 |
115 | print("""
116 | ____________________________________________________________________
117 |
118 | Thank you for installing
119 | _ _ __
120 | | \ | | | |
121 | | \| | ___ _ _ _ __ ___ __ __ | | ___
122 | | . ` |/ _ \| | | || ' _||__ \\\\ \/ / | | / _ \\
123 | | |\ || __|| |_| | | | / _ | > < | | | __|
124 | |_| \_|\___| \__,_||___| \_,_|/_/\_\ |__|\___|
125 |
126 |
127 | Learn more:
128 | - https://www.neuraxle.org/stable/index.html
129 |
130 | Contribute:
131 | - https://gitter.im/Neuraxle/community
132 |
133 | Open issue:
134 | - https://github.com/Neuraxio/Neuraxle
135 |
136 | Ask questions:
137 | - https://stackoverflow.com/questions/tagged/neuraxle
138 | ____________________________________________________________________
139 | """)
140 |
--------------------------------------------------------------------------------
/testing_neuraxle/__init__.py:
--------------------------------------------------------------------------------
1 | from neuraxle.logging.warnings import silence_all_deprecation_warnings
2 |
3 |
4 | silence_all_deprecation_warnings()
5 |
--------------------------------------------------------------------------------
/testing_neuraxle/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/api/__init__.py
--------------------------------------------------------------------------------
/testing_neuraxle/api/test_flask.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for Flask.
3 | ============================================
4 |
5 | ..
6 | Copyright 2019, Neuraxio Inc.
7 |
8 | Licensed under the Apache License, Version 2.0 (the "License");
9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 |
12 | http://www.apache.org/licenses/LICENSE-2.0
13 |
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 |
20 | """
21 |
22 | import numpy as np
23 |
24 | from neuraxle.rest.flask import JSONDataResponseEncoder, JSONDataBodyDecoder, FlaskRestApiWrapper
25 | from neuraxle.base import BaseTransformer
26 |
27 |
28 | def setup_api():
29 | class Decoder(JSONDataBodyDecoder):
30 | """This is a custom JSON decoder class that precedes the pipeline's transformation."""
31 |
32 | def decode(self, data_inputs):
33 | """
34 | Transform a JSON list object into an np.array object.
35 |
36 | :param data_inputs: json object
37 | :return: np array for data inputs
38 | """
39 | return np.array(data_inputs)
40 |
41 | class Encoder(JSONDataResponseEncoder):
42 | """This is a custom JSON response encoder class for converting the pipeline's transformation outputs."""
43 |
44 | def encode(self, data_inputs) -> dict:
45 | """
46 | Convert predictions to a dict for creating a JSON Response object.
47 |
48 | :param data_inputs:
49 | :return:
50 | """
51 | return {
52 | 'predictions': np.array(data_inputs).tolist()
53 | }
54 |
55 | class Multiplier(BaseTransformer):
56 | def transform(self, data_inputs):
57 | return 2 * data_inputs
58 |
59 | app = FlaskRestApiWrapper(
60 | json_decoder=Decoder(),
61 | wrapped=Multiplier(),
62 | json_encoder=Encoder()
63 | ).get_app()
64 |
65 | app.testing = True
66 |
67 | test_client = app.test_client()
68 |
69 | return test_client
70 |
71 |
72 | def test_api_wrapper_works():
73 | test_client = setup_api()
74 | data_inputs = [
75 | [0, 1, 2],
76 | [3, 4, 5],
77 | ]
78 |
79 | json_response = test_client.get('/', json=data_inputs)
80 |
81 | predictions_np_arr = np.array(json_response.json["predictions"])
82 | expected_outputs = 2 * np.array(data_inputs)
83 | assert np.array_equal(predictions_np_arr, expected_outputs)
84 |
--------------------------------------------------------------------------------
/testing_neuraxle/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/examples/__init__.py
--------------------------------------------------------------------------------
/testing_neuraxle/examples/test_examples.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from py._path.local import LocalPath
3 |
4 |
5 | def test_auto_ml_loop_clean_kata(tmpdir: LocalPath):
6 | from examples.auto_ml.plot_automl_loop_clean_kata import main
7 | main(tmpdir)
8 |
9 |
10 | def test_easy_rest_api_serving():
11 | from examples.deployment.plot_easy_rest_api_serving import main
12 | main()
13 |
14 |
15 | def test_force_handle_mixin():
16 | from examples.getting_started.plot_force_handle_mixin import main
17 | main()
18 |
19 |
20 | def test_inverse_transform():
21 | from examples.getting_started.plot_inverse_transform import main
22 | main()
23 |
24 |
25 | def test_label_encoder_across_multiple_columns():
26 | from examples.getting_started.plot_label_encoder_across_multiple_columns import main
27 | main()
28 |
29 |
30 | def test_nested_pipelines():
31 | from examples.getting_started.plot_nested_pipelines import main
32 | main()
33 |
34 |
35 | def test_non_fittable_mixin():
36 | from examples.getting_started.plot_non_fittable_mixin import main
37 | main()
38 |
39 |
40 | def test_hyperparams():
41 | from examples.hyperparams.plot_hyperparams import main
42 | main()
43 |
44 |
45 | def test_apply():
46 | from examples.operations.plot_apply_method import main
47 | main()
48 |
49 |
50 | def test_parallel_streaming():
51 | from examples.parallel.plot_streaming_pipeline import main
52 | main()
53 |
54 |
55 | def test_boston_housing_meta_optimization(tmpdir: LocalPath):
56 | from examples.sklearn.plot_boston_housing_meta_optimization import main
57 | main(tmpdir)
58 |
59 |
60 | def test_boston_housing_regression_with_model_stacking():
61 | from examples.sklearn.plot_boston_housing_regression_with_model_stacking import main
62 | main()
63 |
64 |
65 | def test_cyclical_feature_engineering():
66 | from examples.sklearn.plot_cyclical_feature_engineering import predictions
67 | print(predictions)
68 | assert predictions is not None
69 |
--------------------------------------------------------------------------------
/testing_neuraxle/hyperparams/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/hyperparams/__init__.py
--------------------------------------------------------------------------------
/testing_neuraxle/hyperparams/test_space.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for Hyperparameters Distribution Spaces
3 | =============================================
4 |
5 | ..
6 | Copyright 2019, Neuraxio Inc.
7 |
8 | Licensed under the Apache License, Version 2.0 (the "License");
9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 |
12 | http://www.apache.org/licenses/LICENSE-2.0
13 |
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 |
20 | """
21 | import copy
22 | from collections import OrderedDict
23 |
24 | import pytest
25 | import scipy
26 | from neuraxle.hyperparams.distributions import (Boolean, Choice,
27 | FixedHyperparameter,
28 | HyperparameterDistribution,
29 | LogNormal, LogUniform, Normal,
30 | PriorityChoice, Quantized,
31 | RandInt, Uniform)
32 | from neuraxle.hyperparams.scipy_distributions import Gaussian, Poisson
33 | from neuraxle.hyperparams.space import (FlatDict, HyperparameterSamples,
34 | HyperparameterSpace, RecursiveDict)
35 |
36 | HYPERPARAMS_FLAT_AND_DICT_PAIRS = [(
37 | # Pair 1:
38 | {
39 | "a__learning_rate": 7
40 | },
41 | {
42 | "a": {
43 | "learning_rate": 7
44 | }
45 | }),
46 | # Pair 2:
47 | ({
48 | "b__a__learning_rate": 7,
49 | "b__learning_rate": 9
50 | },
51 | {
52 | "b": {
53 | "a": {
54 | "learning_rate": 7
55 | },
56 | "learning_rate": 9
57 | }
58 | }),
59 | ]
60 |
61 |
62 | @pytest.mark.parametrize("class_to_test", [RecursiveDict, HyperparameterSamples])
63 | @pytest.mark.parametrize("flat,expected_dic", HYPERPARAMS_FLAT_AND_DICT_PAIRS)
64 | def test_flat_to_dict_hyperparams(flat: dict, expected_dic: dict, class_to_test):
65 | from_flat_dic = class_to_test(flat)
66 | from_nested_dic = class_to_test(expected_dic)
67 |
68 | assert from_flat_dic == from_nested_dic
69 | assert from_flat_dic.to_flat_dict() == flat
70 | assert from_nested_dic.to_flat_dict() == flat
71 | assert from_nested_dic.to_nested_dict() == expected_dic
72 | assert from_flat_dic.to_nested_dict() == expected_dic
73 |
74 |
75 | HYPE_SPACE = HyperparameterSpace(OrderedDict({
76 | "a__b__c": PriorityChoice([0, 1, False, "Test"]),
77 | "a__b__q__c": Quantized(Uniform(-10, 10)),
78 | "a__b__q__q": Quantized(Uniform(-10, 10)),
79 | "a__c": Choice([0, 1, False, "Test"]),
80 | "a__e__q__c": Choice([0, 1, False, "Test"]),
81 | "a__test": Boolean(),
82 | "d__param": RandInt(-10, 10),
83 | "d__u": Uniform(-10, 10),
84 | "e__alpha": Normal(0.0, 1.0),
85 | "e__f__g": LogNormal(0.0, 2.0),
86 | "e__other": LogUniform(0.001, 10),
87 | "p__could_also_be_as_fixed": FixedHyperparameter("also hey"),
88 | "scipy__gaussian": Gaussian(-1, 1),
89 | "scipy__poisson": Poisson(1.0, 2.0),
90 | "scipy__scipy__gaussian": scipy.stats.randint(0, 10)
91 | }))
92 |
93 |
94 | def test_hyperparams_space_rvs_outputs_samples():
95 | space = copy.deepcopy(HYPE_SPACE)
96 |
97 | samples = space.rvs()
98 |
99 | assert isinstance(samples, HyperparameterSamples)
100 | assert len(samples) == len(space)
101 | for k, v in samples.iter_flat():
102 | assert k in space
103 | assert not isinstance(v, HyperparameterDistribution)
104 |
105 |
106 | @pytest.mark.parametrize("hd", list(HYPE_SPACE.to_flat_dict().values()))
107 | def test_hyperparams_space_rvs_outputs_in_range(hd: HyperparameterDistribution):
108 | for _ in range(20):
109 |
110 | sample = hd.rvs()
111 |
112 | assert sample in hd
113 |
114 |
115 | def test_wildcards():
116 | EXPECTED_WILDCARDS = [
117 | "*b__c",
118 | "*b*c",
119 | "*q",
120 | "a__c",
121 | "*e*c",
122 | "*test",
123 | "*param",
124 | "*u",
125 | "*alpha",
126 | "*g",
127 | "*other",
128 | "*could_also_be_as_fixed",
129 | "scipy__gaussian",
130 | "*poisson",
131 | "*scipy__gaussian",
132 | ]
133 |
134 | wildcards: FlatDict = HYPE_SPACE.to_wildcards()
135 |
136 | for wc, ewc in zip(wildcards.keys(), EXPECTED_WILDCARDS):
137 | assert wc == ewc, f"{wc} != {ewc}, but should be equal as expected."
138 | for wv, ewv in zip(wildcards.values(), HYPE_SPACE.to_flat_dict().values()):
139 | assert wv == ewv, f"{str(wv)} != {str(ewv)}, but should remain the same."
140 |
--------------------------------------------------------------------------------
/testing_neuraxle/metaopt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/metaopt/__init__.py
--------------------------------------------------------------------------------
/testing_neuraxle/metaopt/test_automl_redesign.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Callable, Optional
3 |
4 | import numpy as np
5 | import pytest
6 | from neuraxle.base import BaseStep
7 | from neuraxle.base import ExecutionContext as CX
8 | from neuraxle.base import Identity, NonFittableMixin
9 | from neuraxle.data_container import DataContainer as DACT
10 | from neuraxle.hyperparams.distributions import Boolean, Choice, LogUniform, RandInt
11 | from neuraxle.hyperparams.space import HyperparameterSpace
12 | from neuraxle.metaopt.auto_ml import AutoML, RandomSearchSampler
13 | from neuraxle.metaopt.callbacks import EarlyStoppingCallback, MetricCallback, ScoringCallback
14 | from neuraxle.metaopt.context import AutoMLContext
15 | from neuraxle.metaopt.data.vanilla import ScopedLocation
16 | from neuraxle.metaopt.repositories.repo import HyperparamsRepository, VanillaHyperparamsRepository
17 | from neuraxle.metaopt.validation import ValidationSplitter
18 | from neuraxle.pipeline import Pipeline
19 | from neuraxle.steps.numpy import NumpyRavel
20 | from neuraxle.steps.output_handlers import OutputTransformerWrapper
21 | from neuraxle.steps.sklearn import SKLearnWrapper
22 | from sklearn.linear_model import LogisticRegression
23 | from sklearn.metrics import accuracy_score, mean_squared_error
24 | from sklearn.preprocessing import StandardScaler
25 | from testing_neuraxle.metaopt.test_automl_repositories import CX_WITH_REPO_CTORS, TmpDir
26 |
27 |
28 | def _create_data_source():
29 | data_inputs = np.random.random((25, 50)).astype(np.float32)
30 | expected_outputs = (np.random.random((25,)) > 0.5).astype(np.int32)
31 | return data_inputs, expected_outputs
32 |
33 |
34 | class SetNoneEO(Identity):
35 |
36 | def __init__(self):
37 | Identity.__init__(self)
38 |
39 | def _will_process(self, dact: DACT, cx: CX):
40 | dact, cx = Identity._will_process(self, dact, cx)
41 | dact = dact.with_eo(None)
42 | return dact, cx
43 |
44 |
45 | class FailingStep(NonFittableMixin, BaseStep):
46 |
47 | def __init__(self):
48 | BaseStep.__init__(self)
49 | NonFittableMixin.__init__(self)
50 |
51 | def _will_process(self, dact: DACT, cx: CX):
52 | raise ValueError("This error should be found in the logs of the test.")
53 | return dact, cx
54 |
55 |
56 | def _create_pipeline(has_failing_step=False):
57 | return Pipeline([
58 | StandardScaler(),
59 | OutputTransformerWrapper(NumpyRavel()),
60 | SKLearnWrapper(
61 | LogisticRegression(),
62 | HyperparameterSpace({
63 | 'C': LogUniform(0.01, 10.0),
64 | 'fit_intercept': Boolean(),
65 | 'penalty': Choice(['none', 'l2']),
66 | 'max_iter': RandInt(20, 200)
67 | })
68 | ),
69 | FailingStep() if has_failing_step else Identity(),
70 | SetNoneEO(),
71 | ])
72 |
73 |
74 | @pytest.mark.parametrize('cx_repo_ctor', CX_WITH_REPO_CTORS)
75 | @pytest.mark.parametrize('has_failing_step', [False, True])
76 | def test_automl_api_entry_point(tmpdir, cx_repo_ctor: Callable[[Optional[TmpDir]], AutoMLContext], has_failing_step: bool):
77 | data_inputs, expected_outputs = _create_data_source()
78 | dact = DACT(data_inputs=data_inputs, expected_outputs=expected_outputs)
79 | pipeline = _create_pipeline(has_failing_step=has_failing_step)
80 | # TODO: # HyperbandControllerLoop(), ClusteringParallelFor() ?
81 |
82 | a: AutoML = AutoML(
83 | pipeline=pipeline,
84 | validation_splitter=ValidationSplitter(0.20),
85 | hyperparams_optimizer=RandomSearchSampler(),
86 | hyperparams_repository=VanillaHyperparamsRepository(cache_folder=os.path.join(tmpdir, "hp")),
87 | scoring_callback=ScoringCallback(mean_squared_error),
88 | callbacks=[
89 | MetricCallback('accuracy', metric_function=accuracy_score, higher_score_is_better=False),
90 | EarlyStoppingCallback(max_epochs_without_improvement=3)
91 | ],
92 | continue_loop_on_error=True,
93 | n_trials=4,
94 | epochs=5,
95 | refit_best_trial=False,
96 | )
97 | cx: CX = cx_repo_ctor()
98 | repo: HyperparamsRepository = cx.repo
99 |
100 | a = a.handle_fit(dact, cx)
101 |
102 | if has_failing_step:
103 | assert 'ValueError("This error should be found in the logs of the test.")' in repo.get_log_from_logging_handler(
104 | cx.logger, ScopedLocation())
105 | else:
106 | a, _out = a.to_force_refit_best_trial().handle_fit_transform(dact, cx)
107 | assert _out is not None
108 |
--------------------------------------------------------------------------------
/testing_neuraxle/metaopt/test_automl_reports.py:
--------------------------------------------------------------------------------
1 |
2 | from typing import List
3 |
4 | import pytest
5 | from neuraxle.metaopt.data.reporting import (BaseReport, ClientReport,
6 | MetricResultsReport,
7 | ProjectReport, RoundReport,
8 | TrialReport, TrialSplitReport,
9 | dataclass_2_report)
10 | from neuraxle.metaopt.data.vanilla import BaseDataclass
11 | from testing_neuraxle.metaopt.test_automl_dataclasses import (
12 | ALL_DATACLASSES, HYPERPARAMS_DIMS_WILDCARDS, SOME_CLIENT_DATACLASS,
13 | SOME_METRIC_NAME, SOME_PROJECT_DATACLASS, SOME_ROUND_DATACLASS, SOME_TRIAL_DATACLASS)
14 |
15 |
16 | def test_project_report_to_clients_with_best_scores_df():
17 | pr = ProjectReport(SOME_PROJECT_DATACLASS)
18 |
19 | df = pr.to_clients_with_best_scores_df()
20 |
21 | assert ClientReport.CLIENT_ID_COLUMN_NAME in df.columns
22 |
23 |
24 | def test_client_report_to_rounds_with_best_scores_df():
25 | cr = ClientReport(SOME_CLIENT_DATACLASS)
26 |
27 | df = cr.to_rounds_with_best_scores_df()
28 |
29 | assert RoundReport.ROUND_ID_COLUMN_NAME in df.columns
30 |
31 |
32 | def test_round_dc_to_scatterplot_df():
33 | rr = RoundReport(SOME_ROUND_DATACLASS)
34 |
35 | df = rr.to_round_scatterplot_df(SOME_METRIC_NAME, HYPERPARAMS_DIMS_WILDCARDS)
36 |
37 | assert SOME_METRIC_NAME in df.columns
38 | assert TrialReport.TRIAL_ID_COLUMN_NAME in df.columns
39 | for d in HYPERPARAMS_DIMS_WILDCARDS:
40 | assert d in df.columns
41 |
42 |
43 | def test_round_dc_to_scores_over_time_df():
44 | rr = RoundReport(SOME_ROUND_DATACLASS)
45 |
46 | df = rr.to_scores_over_time_df(SOME_METRIC_NAME, HYPERPARAMS_DIMS_WILDCARDS)
47 |
48 | assert SOME_METRIC_NAME in df.columns
49 | assert TrialReport.TRIAL_ID_COLUMN_NAME in df.columns
50 | assert MetricResultsReport.EPOCH_COLUMN_NAME in df.columns
51 | for d in HYPERPARAMS_DIMS_WILDCARDS:
52 | assert d in df.columns
53 |
54 |
55 | def test_round_metric_names():
56 | rr = RoundReport(SOME_ROUND_DATACLASS)
57 |
58 | assert rr.get_metric_names() == [SOME_METRIC_NAME]
59 |
60 |
61 | @pytest.mark.parametrize("discard_singles,expected_hp_dims", ([False, HYPERPARAMS_DIMS_WILDCARDS], [True, []]))
62 | def test_round_hp_wildcards_scenario(discard_singles: bool, expected_hp_dims: List[str]):
63 | rr = RoundReport(SOME_ROUND_DATACLASS)
64 |
65 | hp_wildcards = rr.list_hyperparameters_wildcards(discard_singles=discard_singles)
66 |
67 | assert hp_wildcards == expected_hp_dims
68 |
69 |
70 | @pytest.mark.parametrize('dc', ALL_DATACLASSES[1:])
71 | def test_reports_has_sufficient_dc_info(dc: BaseDataclass):
72 | r: dataclass_2_report[dc.__class__] = BaseReport.from_dc(dc)
73 | df = r.info_df()
74 |
75 | assert len(dc.to_dict()) - 3 == len(df.index), (
76 | f"Dataclass dc={dc} should have rows for each attribute that isn't the "
77 | f"class name, id, or subdataclasses collections. Got df={df.to_string()}."
78 | )
79 |
80 |
81 | def test_trial_report_to_scores_over_time_df():
82 | tr = TrialReport(SOME_TRIAL_DATACLASS)
83 |
84 | df = tr.to_scores_over_time_df(SOME_METRIC_NAME)
85 |
86 | assert TrialSplitReport.TRIAL_SPLIT_ID_COLUMN_NAME in df.columns
87 | assert MetricResultsReport.EPOCH_COLUMN_NAME in df.columns
88 | assert MetricResultsReport.TRAIN_VAL_COLUMN_NAME in df.columns
89 | assert SOME_METRIC_NAME in df.columns
90 |
--------------------------------------------------------------------------------
/testing_neuraxle/metaopt/test_automl_sequence_validation_splitter.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | from neuraxle.base import ExecutionContext as CX
5 | from neuraxle.data_container import DataContainer as DACT
6 | from neuraxle.hyperparams.distributions import RandInt
7 | from neuraxle.hyperparams.space import FlatDict, HyperparameterSpace
8 | from neuraxle.metaopt.auto_ml import AutoML
9 | from neuraxle.metaopt.callbacks import MetricCallback
10 | from neuraxle.metaopt.context import AutoMLContext
11 | from neuraxle.metaopt.data.aggregates import Round
12 | from neuraxle.metaopt.data.vanilla import ScopedLocation
13 | from neuraxle.metaopt.optimizer import (GridExplorationSampler,
14 | RandomSearchSampler)
15 | from neuraxle.metaopt.validation import (KFoldCrossValidationSplitter,
16 | ValidationSplitter)
17 | from neuraxle.pipeline import Pipeline
18 | from neuraxle.steps.numpy import MultiplyByN
19 | from sklearn.metrics import mean_squared_error
20 |
21 |
22 | def test_automl_sequence_splitter(tmpdir):
23 | # Setting seed for better reproducibility
24 | np.random.seed(68)
25 |
26 | # Given
27 | data_inputs = np.array(range(100))
28 | expected_outputs = np.array(range(100, 200))
29 |
30 | hyperparameter_space = HyperparameterSpace({
31 | 'multiplication_1__multiply_by': RandInt(1, 3),
32 | 'multiplication_2__multiply_by': RandInt(1, 3),
33 | 'multiplication_3__multiply_by': RandInt(1, 3),
34 | })
35 |
36 | pipeline = Pipeline([
37 | ('multiplication_1', MultiplyByN()),
38 | ('multiplication_2', MultiplyByN()),
39 | ('multiplication_3', MultiplyByN())
40 | ]).set_hyperparams_space(hyperparameter_space)
41 |
42 | auto_ml = AutoML(
43 | pipeline=pipeline,
44 | hyperparams_optimizer=RandomSearchSampler(),
45 | validation_splitter=KFoldCrossValidationSplitter(k_fold=4),
46 | callbacks=[MetricCallback("MSE", mean_squared_error, False)],
47 | )
48 |
49 | # When
50 | auto_ml = auto_ml.handle_fit(
51 | DACT(data_inputs=data_inputs, expected_outputs=expected_outputs), CX(tmpdir))
52 | predicted_outputs = auto_ml.transform(data_inputs)
53 |
54 | # Then
55 | actual_mse = ((predicted_outputs - expected_outputs) ** 2).mean()
56 | assert actual_mse < 20000
57 |
58 |
59 | def test_automl_validation_splitter(tmpdir):
60 | # Setting seed for reproducibility
61 | np.random.seed(75)
62 | # Given
63 | cx = AutoMLContext.from_context()
64 | data_inputs = np.array(range(1000, 1020))
65 | expected_outputs = np.array(range(2020, 2040))
66 | hyperparameter_space = HyperparameterSpace({
67 | 'multiplication_1__multiply_by': RandInt(1, 3),
68 | 'multiplication_2__multiply_by': RandInt(1, 3),
69 | })
70 | pipeline = Pipeline([
71 | ('multiplication_1', MultiplyByN()),
72 | ('multiplication_2', MultiplyByN()),
73 | ]).set_hyperparams_space(hyperparameter_space)
74 |
75 | hp_search = AutoML(
76 | pipeline=pipeline,
77 | validation_splitter=ValidationSplitter(validation_size=0.2),
78 | scoring_callback=MetricCallback("MSE", mean_squared_error, False),
79 | hyperparams_optimizer=GridExplorationSampler(9),
80 | n_trials=8,
81 | ).with_context(cx)
82 |
83 | # When
84 | hp_search = hp_search.fit(data_inputs, expected_outputs)
85 | predicted_outputs = hp_search.transform(data_inputs)
86 |
87 | # Then
88 | optimal_mse = mean_squared_error(expected_outputs, data_inputs * 2)
89 | actual_mse = mean_squared_error(expected_outputs, predicted_outputs)
90 | assert actual_mse == optimal_mse
91 |
92 |
93 | def test_grid_exploration_sampler_can_try_everything():
94 | hp_space = HyperparameterSpace({
95 | 'a': RandInt(1, 3),
96 | 'b': RandInt(1, 3),
97 | 'c': RandInt(1, 3),
98 | })
99 | max_trials = 3 * 3 * 3
100 | ges = GridExplorationSampler(max_trials)
101 | _round: Round = Round.from_context(AutoMLContext.from_context(loc=ScopedLocation.default(0)))
102 | _round.with_optimizer(ges, hp_space)
103 |
104 | for _ in range(max_trials):
105 | with _round.new_rvs_trial():
106 | pass
107 |
108 | trials_hps: List[FlatDict] = _round.report.get_all_hyperparams(as_flat=True)
109 | unique_trials = set([tuple(r.items()) for r in trials_hps])
110 | assert len(unique_trials) == max_trials
111 |
--------------------------------------------------------------------------------
/testing_neuraxle/metaopt/test_database_repo.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 |
4 | import pytest
5 | from neuraxle.metaopt.data.vanilla import (DEFAULT_CLIENT, DEFAULT_PROJECT,
6 | ClientDataclass, ProjectDataclass,
7 | RootDataclass, ScopedLocation)
8 | from neuraxle.metaopt.repositories.db import (Base, ClientNode, DataClassNode,
9 | ProjectNode,
10 | ScopedLocationTreeNode,
11 | SQLLiteHyperparamsRepository)
12 | from sqlalchemy import and_, create_engine
13 | from sqlalchemy.orm import sessionmaker
14 | from testing_neuraxle.metaopt.test_automl_dataclasses import \
15 | SOME_PROJECT_DATACLASS
16 |
17 |
18 | def get_sqlite_session_with_root(tmpdir):
19 | sqlite_filepath = os.path.join(tmpdir, "sqlite.db")
20 | engine = create_engine(f"sqlite:///{sqlite_filepath}", echo=True, future=True)
21 | Session = sessionmaker()
22 | Session.configure(bind=engine)
23 | session = Session()
24 | Base.metadata.create_all(engine)
25 | session.commit()
26 |
27 | root_dcn = DataClassNode(RootDataclass())
28 | root = ScopedLocationTreeNode(root_dcn, None)
29 | session.add(root)
30 |
31 | session.commit()
32 | return session, root
33 |
34 |
35 | def test_sqlalchemy_sqllite_nodes_star_shema_joins(tmpdir):
36 | session, root = get_sqlite_session_with_root(tmpdir)
37 |
38 | def_proj = ProjectNode(ProjectDataclass(project_name="def_proj"))
39 | project = ScopedLocationTreeNode(def_proj, parent=root)
40 | session.add(project)
41 | session.commit()
42 |
43 | def_client = ClientNode(ClientDataclass(client_name="def_client"))
44 | client = ScopedLocationTreeNode(def_client, parent=project)
45 | session.add(client)
46 | session.commit()
47 |
48 | session.expunge_all()
49 | q = session.query(
50 | ScopedLocationTreeNode.project_name, ScopedLocationTreeNode.client_name
51 | )
52 | assert q[0] == (None, None)
53 | assert q[1] == ("def_proj", None)
54 | assert q[2] == ("def_proj", "def_client")
55 |
56 |
57 | def test_root_db_node_can_be_queried(tmpdir):
58 | session = get_sqlite_session_with_root(tmpdir)[0]
59 |
60 | root_tree_node = session.query(ScopedLocationTreeNode).filter(
61 | and_(*[
62 | getattr(ScopedLocationTreeNode, attr) == None
63 | for attr in ScopedLocation.__dataclass_fields__
64 | ])
65 | ).one()
66 |
67 | assert root_tree_node.project_name is None
68 | assert root_tree_node.client_name is None
69 | assert root_tree_node.round_number is None
70 |
71 |
72 | @pytest.mark.parametrize("deep", [True, False])
73 | def test_can_use_sqlite_db_repo_to_save_and_load_and_overwrite_simple_project(tmpdir, deep):
74 | repo = SQLLiteHyperparamsRepository(tmpdir)
75 | project: ProjectDataclass = SOME_PROJECT_DATACLASS
76 | project_loc = ScopedLocation.default().at_dc(project)
77 |
78 | repo.save(project, project_loc, deep=deep)
79 | project_reloaded = repo.load(project_loc, deep=deep)
80 | repo.save(project_reloaded, project_loc, deep=deep)
81 |
--------------------------------------------------------------------------------
/testing_neuraxle/metaopt/test_validation_splitter.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from neuraxle.base import ExecutionContext as CX
3 | from neuraxle.data_container import DataContainer as DACT
4 | from neuraxle.hyperparams.space import HyperparameterSpace
5 | from neuraxle.metaopt.auto_ml import Trainer
6 | from neuraxle.metaopt.callbacks import MetricCallback
7 | from neuraxle.metaopt.data.aggregates import Round
8 | from neuraxle.metaopt.optimizer import GridExplorationSampler
9 | from neuraxle.metaopt.validation import ValidationSplitter
10 | from neuraxle.steps.misc import FitTransformCallbackStep, TapeCallbackFunction
11 | from sklearn.metrics import mean_squared_error
12 |
13 |
14 | def test_validation_splitter_handler_methods_should_split_data(tmpdir):
15 | transform_callback = TapeCallbackFunction()
16 | fit_callback = TapeCallbackFunction()
17 | pipeline = FitTransformCallbackStep(
18 | transform_callback_function=transform_callback,
19 | fit_callback_function=fit_callback,
20 | transform_function=lambda di: di * 2
21 | )
22 | metric: MetricCallback = MetricCallback("MSE", mean_squared_error, False)
23 | validation_split_wrapper = Trainer(
24 | callbacks=[metric],
25 | validation_splitter=ValidationSplitter(validation_size=0.1),
26 | n_epochs=1,
27 | )
28 |
29 | data_inputs = np.random.randint(low=1, high=100, size=(100, 5))
30 | expected_outputs = np.random.randint(low=1, high=100, size=(100, 5))
31 | dact = DACT(di=data_inputs, eo=expected_outputs)
32 |
33 | round_scope: Round = Round.dummy().with_metric(metric.name).save(deep=False)
34 | with round_scope.with_optimizer(GridExplorationSampler(), HyperparameterSpace()).new_rvs_trial() as trial_scope:
35 | trained_pipeline: FitTransformCallbackStep = validation_split_wrapper.train(
36 | pipeline, dact, trial_scope, return_trained_pipelines=True)[0]
37 |
38 | predicted_outputs = trained_pipeline.predict(data_inputs)
39 | fit_callback = trained_pipeline.fit_callback_function
40 | transform_callback = trained_pipeline.transform_callback_function
41 |
42 | assert np.array_equal(predicted_outputs, data_inputs * 2)
43 |
44 | # should fit on train split
45 | assert np.array_equal(fit_callback.data[0][0], data_inputs[0:90])
46 | assert np.array_equal(fit_callback.data[0][1], expected_outputs[0:90])
47 |
48 | # should transform on test split
49 | assert np.array_equal(transform_callback.data[0], data_inputs[0:90])
50 | assert np.array_equal(transform_callback.data[1], data_inputs[90:])
51 |
52 | # should predict on all data at the end
53 | assert np.array_equal(transform_callback.data[2], data_inputs)
54 |
55 | with round_scope.last_trial() as trial_scope:
56 | assert trial_scope.get_avg_validation_score(metric.name) is not None
57 |
--------------------------------------------------------------------------------
/testing_neuraxle/mocks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/mocks/__init__.py
--------------------------------------------------------------------------------
/testing_neuraxle/mocks/step_mocks.py:
--------------------------------------------------------------------------------
1 | from neuraxle.base import BaseStep, TruncableSteps, MetaStep, BaseTransformer
2 | from neuraxle.hyperparams.distributions import LogUniform, Quantized, RandInt, Boolean
3 | from neuraxle.hyperparams.space import HyperparameterSpace, HyperparameterSamples
4 |
5 | HYPERPARAMETERS_SPACE = HyperparameterSpace({
6 | 'learning_rate': LogUniform(0.0001, 0.1),
7 | 'l2_weight_reg': LogUniform(0.0001, 0.1),
8 | 'momentum': LogUniform(0.01, 1.0),
9 | 'hidden_size': Quantized(LogUniform(16, 512)),
10 | 'num_layers': RandInt(1, 4),
11 | 'num_lstm_layers': RandInt(1, 2),
12 | 'use_xavier_init': Boolean(),
13 | 'use_max_pool_else_avg_pool': Boolean(),
14 | 'dropout_drop_proba': LogUniform(0.3, 0.7)
15 | })
16 |
17 | HYPERPARAMETERS = HyperparameterSamples({
18 | 'learning_rate': 0.1,
19 | 'l2_weight_reg': 0.001,
20 | 'hidden_size': 32,
21 | 'num_layers': 3,
22 | 'num_lstm_layers': 1,
23 | 'use_xavier_init': True,
24 | 'use_max_pool_else_avg_pool': True,
25 | 'dropout_drop_proba': 0.5,
26 | 'momentum': 0.1
27 | })
28 |
29 | AN_INPUT = "I am an input"
30 | AN_EXPECTED_OUTPUT = "I am an expected output"
31 |
32 |
33 | class SomeStep(BaseTransformer):
34 | def __init__(self, hyperparams_space: HyperparameterSpace = None, output=AN_EXPECTED_OUTPUT):
35 | super().__init__(hyperparams=None, hyperparams_space=hyperparams_space)
36 | self.output = output
37 |
38 | def transform(self, data_inputs):
39 | return [self.output] * len(data_inputs)
40 |
41 |
42 | class SomeStepWithHyperparams(BaseStep):
43 | def __init__(self):
44 | super().__init__(
45 | hyperparams=HYPERPARAMETERS,
46 | hyperparams_space=HYPERPARAMETERS_SPACE,
47 | name="MockStep"
48 | )
49 |
50 | def transform(self, data_inputs):
51 | pass
52 |
53 | def fit(self, data_inputs, expected_outputs=None):
54 | pass
55 |
56 |
57 | class SomeMetaStepWithHyperparams(MetaStep):
58 | def __init__(self):
59 | MetaStep.__init__(self, wrapped=SomeStepWithHyperparams())
60 |
61 | def transform(self, data_inputs):
62 | pass
63 |
64 | def fit(self, data_inputs, expected_outputs=None):
65 | pass
66 |
67 |
68 | class SomeTruncableStep(TruncableSteps):
69 | def __init__(self):
70 | TruncableSteps.__init__(self,
71 | hyperparams=HYPERPARAMETERS,
72 | hyperparams_space=HYPERPARAMETERS_SPACE,
73 | steps_as_tuple=(SomeStepWithHyperparams(), SomeStepWithHyperparams())
74 | )
75 |
76 | def transform(self, data_inputs):
77 | pass
78 |
79 | def fit(self, data_inputs, expected_outputs=None):
80 | pass
81 |
82 |
83 | class SomeSplitStep(BaseStep):
84 | def transform(self, data_inputs):
85 | pass
86 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/steps/__init__.py
--------------------------------------------------------------------------------
/testing_neuraxle/steps/neuraxle_test_case.py:
--------------------------------------------------------------------------------
1 | """
2 | Neuraxle Test Case Class
3 | ========================================
4 |
5 | ..
6 | Copyright 2019, Neuraxio Inc.
7 |
8 | Licensed under the Apache License, Version 2.0 (the "License");
9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 |
12 | http://www.apache.org/licenses/LICENSE-2.0
13 |
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 |
20 | """
21 | import numpy as np
22 |
23 | from neuraxle.base import ExecutionMode
24 |
25 |
26 | class NeuraxleTestCase:
27 | def __init__(
28 | self,
29 | pipeline,
30 | callbacks,
31 | expected_callbacks_data,
32 | hyperparams_space=None,
33 | hyperparams=None,
34 | expected_processed_outputs=None,
35 | execution_mode=None,
36 | more_arguments=None,
37 | data_inputs=None,
38 | expected_outputs=None
39 | ):
40 | self.expected_outputs = expected_outputs
41 | self.data_inputs = data_inputs
42 | self.execution_mode = execution_mode
43 | self.pipeline = pipeline
44 | self.callbacks = callbacks
45 | self.expected_callbacks_data = expected_callbacks_data
46 | self.hyperparams = hyperparams
47 | self.hyperparams_space = hyperparams_space
48 | self.expected_processed_outputs = expected_processed_outputs
49 | self.more_arguments = more_arguments
50 |
51 | def assert_callback_data_is_as_expected(self):
52 | for callback, expected_callback_data in zip(self.callbacks, self.expected_callbacks_data):
53 | if len(callback.data) > 0:
54 | if isinstance(callback.data[0], tuple):
55 | for (expected_di, expected_eo), (actual_di, actual_eo) in zip(expected_callback_data, callback.data):
56 | assert np.array_equal(expected_di, actual_di)
57 | assert np.array_equal(expected_eo, actual_eo)
58 | else:
59 | assert np.array_equal(
60 | np.array(callback.data),
61 | expected_callback_data
62 | )
63 | else:
64 | assert np.array_equal(
65 | np.array([]),
66 | np.array(expected_callback_data)
67 | )
68 |
69 | def assert_expected_processed_outputs(self, processed_outputs):
70 | if self.execution_mode != ExecutionMode.FIT:
71 | assert np.array_equal(processed_outputs, self.expected_processed_outputs)
72 |
73 | def execute(self):
74 | for c in self.callbacks:
75 | c.reset()
76 |
77 | processed_outputs = None
78 | if self.execution_mode == ExecutionMode.TRANSFORM:
79 | processed_outputs = self.pipeline.transform(self.data_inputs)
80 | if self.execution_mode == ExecutionMode.FIT_TRANSFORM:
81 | self.pipeline, processed_outputs = self.pipeline.fit_transform(self.data_inputs, self.expected_outputs)
82 | if self.execution_mode == ExecutionMode.FIT:
83 | self.pipeline = self.pipeline.fit(self.data_inputs, self.expected_outputs)
84 |
85 | return processed_outputs
86 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_assertion_steps.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | import numpy as np
4 | import pytest
5 | from neuraxle.base import AssertExpectedOutputIsNone, BaseStep
6 | from neuraxle.base import ExecutionContext as CX
7 | from neuraxle.base import ExecutionPhase, HandleOnlyMixin, NonFittableMixin
8 | from neuraxle.data_container import DataContainer as DACT
9 | from neuraxle.pipeline import Pipeline
10 |
11 |
12 | class SomeAssertionStep(NonFittableMixin, HandleOnlyMixin, BaseStep):
13 | def __init__(self):
14 | BaseStep.__init__(self)
15 | HandleOnlyMixin.__init__(self)
16 |
17 | def _transform_data_container(self, data_container: DACT, context: CX) -> DACT:
18 | _, data_inputs, expected_outputs = data_container.tolist().unpack()
19 | if expected_outputs is not None:
20 | self._assert_equals(data_inputs, expected_outputs, "Assertion failed", context)
21 | return data_inputs
22 |
23 |
24 | class TestAssertionMethodInSteps(TestCase):
25 |
26 | def test_assertion_step_logs_and_raises_with_pipeline(self):
27 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
28 | expected_outputs = data_inputs * 2
29 | dact = DACT(data_inputs, None, expected_outputs)
30 | p = Pipeline([SomeAssertionStep()])
31 |
32 | with self.assertLogs() as captured:
33 | with pytest.raises(AssertionError):
34 | p.handle_fit_transform(dact, context=CX())
35 |
36 | self.assertIn("Assertion failed", captured.output[0])
37 |
38 | def test_assertion_step_just_logs_with_pipeline_in_prod(self):
39 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
40 | expected_outputs = data_inputs * 2
41 | dact = DACT(data_inputs, None, expected_outputs)
42 | p = Pipeline([SomeAssertionStep()])
43 | context = CX(execution_phase=ExecutionPhase.PROD)
44 | try:
45 | p = p.handle_fit(dact, context=context)
46 | except AssertionError:
47 | pass
48 |
49 | with self.assertLogs() as captured:
50 | p.handle_predict(dact, context=context)
51 |
52 | # assert that the log still at least contains the expected message:
53 | self.assertIn("Assertion failed", captured.output[0])
54 |
55 |
56 | def test_expectedoutputnull_raise_exception_when_notnull(tmpdir):
57 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
58 | expected_outputs = data_inputs * 2
59 |
60 | p = Pipeline([AssertExpectedOutputIsNone()])
61 |
62 | with pytest.raises(AssertionError) as error_info:
63 | p.fit_transform(data_inputs, expected_outputs)
64 |
65 |
66 | def test_expectedoutputnull_is_fine_when_null(tmpdir):
67 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
68 | expected_outputs = None
69 |
70 | p = Pipeline([AssertExpectedOutputIsNone()])
71 | p.fit_transform(data_inputs, expected_outputs)
72 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_column_selector_2d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from neuraxle.steps.column_transformer import ColumnSelector2D, ColumnsSelectorND, NumpyColumnSelector2D
5 |
6 |
7 | @pytest.mark.parametrize('column_selector_2d_class', [ColumnSelector2D, NumpyColumnSelector2D])
8 | def test_column_selector_2d_should_select_range(column_selector_2d_class):
9 | step = column_selector_2d_class(range(0, 10))
10 | data_inputs, expected_outputs = _create_data_source((20, 20))
11 |
12 | outputs = step.transform(data_inputs)
13 |
14 | assert np.array_equal(outputs, data_inputs[..., :10])
15 |
16 |
17 | @pytest.mark.parametrize('column_selector_2d_class', [ColumnSelector2D, NumpyColumnSelector2D])
18 | def test_column_selector_2d_should_select_int(column_selector_2d_class):
19 | step = column_selector_2d_class(10)
20 | data_inputs, expected_outputs = _create_data_source((20, 20))
21 |
22 | outputs = step.transform(data_inputs)
23 |
24 | expected_data_inputs = np.expand_dims(data_inputs[..., 10], axis=-1)
25 | assert np.array_equal(outputs, expected_data_inputs)
26 |
27 |
28 | @pytest.mark.parametrize('column_selector_2d_class', [ColumnSelector2D, NumpyColumnSelector2D])
29 | def test_column_selector_2d_should_select_slice(column_selector_2d_class):
30 | step = column_selector_2d_class(slice(0, 10, 1))
31 | data_inputs, expected_outputs = _create_data_source((20, 20))
32 |
33 | outputs = step.transform(data_inputs)
34 |
35 | assert np.array_equal(outputs, data_inputs[..., :10])
36 |
37 |
38 | @pytest.mark.parametrize('column_selector_2d_class', [ColumnSelector2D, NumpyColumnSelector2D])
39 | def test_column_selector_2d_should_select_list_of_indexes(column_selector_2d_class):
40 | step = column_selector_2d_class([0, 1, 2])
41 | data_inputs, expected_outputs = _create_data_source((20, 20))
42 |
43 | outputs = step.transform(data_inputs)
44 |
45 | assert np.array_equal(outputs, data_inputs[..., :3])
46 |
47 |
48 | def test_column_selector_nd_should_transform_with_column_selector_2d():
49 | step = ColumnsSelectorND(0, n_dimension=2)
50 | data_inputs, expected_outputs = _create_data_source((20, 20))
51 |
52 | outputs = step.transform(data_inputs)
53 |
54 | assert np.array_equal(outputs, np.expand_dims(data_inputs[..., 0], axis=-1))
55 |
56 |
57 | def _create_data_source(shape):
58 | data_inputs = np.random.random(shape).astype(np.float32)
59 | expected_outputs = np.random.random(shape).astype(np.float32)
60 | return data_inputs, expected_outputs
61 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_concatenate_data_container.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from neuraxle.base import ExecutionContext as CX
4 | from neuraxle.data_container import DataContainer as DACT
5 | from neuraxle.pipeline import Pipeline
6 | from neuraxle.steps.data import InnerConcatenateDataContainer, ZipBatchDataContainer
7 |
8 | TIMESTEPS = 10
9 | FEATURES = 5
10 | VALIDATION_SIZE = 0.1
11 | BATCH_SIZE = 32
12 | N_EPOCHS = 10
13 | SHAPE_3D = (BATCH_SIZE, TIMESTEPS, FEATURES)
14 | SHAPE_2D = (BATCH_SIZE, TIMESTEPS)
15 | SHAPE_1D = BATCH_SIZE
16 |
17 |
18 | def test_inner_concatenate_data_should_merge_3d_with_3d():
19 | # Given
20 | data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
21 | data_inputs_3d_second, expected_outputs_3d_second = _create_data_source(SHAPE_3D)
22 | data_container_3d_second = DACT(data_inputs=data_inputs_3d_second,
23 | expected_outputs=expected_outputs_3d_second)
24 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
25 | .add_sub_data_container('2d', data_container_3d_second)
26 |
27 | # When
28 | p = Pipeline([
29 | InnerConcatenateDataContainer(sub_data_container_names=['2d'])
30 | ])
31 |
32 | data_container = p.handle_transform(data_container, CX())
33 |
34 | # Then
35 | assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] * 2)
36 | assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] * 2)
37 | assert np.array_equal(data_container.data_inputs[..., -SHAPE_3D[2]:], data_container_3d_second.data_inputs)
38 | assert np.array_equal(data_container.expected_outputs[..., -SHAPE_3D[2]:],
39 | data_container_3d_second.expected_outputs)
40 |
41 |
42 | def test_inner_concatenate_data_should_merge_2d_with_3d():
43 | # Given
44 | data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
45 | data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D)
46 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d)
47 | data_container_3d = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
48 | .add_sub_data_container('2d', data_container_2d)
49 |
50 | # When
51 | p = Pipeline([
52 | InnerConcatenateDataContainer(sub_data_container_names=['2d'])
53 | ])
54 |
55 | data_container_3d = p.handle_transform(data_container_3d, CX())
56 |
57 | # Then
58 | assert data_container_3d.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1)
59 | assert data_container_3d.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1)
60 | assert np.array_equal(data_container_3d.data_inputs[..., -1], data_container_2d.data_inputs)
61 | assert np.array_equal(data_container_3d.expected_outputs[..., -1], data_container_2d.expected_outputs)
62 |
63 |
64 | def test_inner_concatenate_data_should_merge_1d_with_3d():
65 | # Given
66 | data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
67 | data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D)
68 | data_container_1d = DACT(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d)
69 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
70 | .add_sub_data_container('1d', data_container_1d)
71 |
72 | # When
73 | p = Pipeline([
74 | InnerConcatenateDataContainer(sub_data_container_names=['1d'])
75 | ])
76 |
77 | data_container = p.handle_transform(data_container, CX())
78 |
79 | # Then
80 | broadcasted_data_inputs_1d = np.broadcast_to(np.expand_dims(data_container_1d.data_inputs, axis=-1),
81 | shape=(SHAPE_3D[0], SHAPE_3D[1]))
82 | broadcasted_expected_outputs_1d = np.broadcast_to(np.expand_dims(data_container_1d.expected_outputs, axis=-1),
83 | shape=(SHAPE_3D[0], SHAPE_3D[1]))
84 |
85 | assert np.array_equal(data_container.data_inputs[..., -1], broadcasted_data_inputs_1d)
86 | assert np.array_equal(data_container.expected_outputs[..., -1], broadcasted_expected_outputs_1d)
87 |
88 | assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1)
89 | assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1)
90 |
91 |
92 | def test_inner_concatenate_data_should_merge_1d_with_2d():
93 | # Given
94 | data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D)
95 | data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D)
96 | data_container_1d = DACT(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d)
97 | data_container = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) \
98 | .add_sub_data_container('1d', data_container_1d)
99 |
100 | # When
101 | p = Pipeline([
102 | InnerConcatenateDataContainer(sub_data_container_names=['1d'])
103 | ])
104 |
105 | data_container = p.handle_transform(data_container, CX())
106 |
107 | # Then
108 | assert data_container.data_inputs.shape == (SHAPE_2D[0], SHAPE_2D[1] + 1)
109 | assert data_container.expected_outputs.shape == (SHAPE_2D[0], SHAPE_2D[1] + 1)
110 | assert np.array_equal(data_container.data_inputs[..., -1], data_container_1d.data_inputs)
111 | assert np.array_equal(data_container.expected_outputs[..., -1], data_container_1d.expected_outputs)
112 |
113 |
114 | def test_outer_concatenate_data_should_merge_2d_with_3d():
115 | # Given
116 | data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
117 | data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D)
118 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d)
119 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
120 | .add_sub_data_container('2d', data_container_2d)
121 |
122 | # When
123 | p = Pipeline([
124 | ZipBatchDataContainer(sub_data_container_names=['2d'])
125 | ])
126 |
127 | data_container = p.handle_transform(data_container, CX())
128 |
129 | # Then
130 | for i, (first_di, second_di) in enumerate(zip(data_inputs_3d, data_inputs_2d)):
131 | assert np.array_equal(data_container.data_inputs[i][0], first_di)
132 | assert np.array_equal(data_container.data_inputs[i][1], second_di)
133 |
134 | for i, (first_eo, second_eo) in enumerate(zip(expected_outputs_3d, expected_outputs_2d)):
135 | assert np.array_equal(data_container.expected_outputs[i][0], first_eo)
136 | assert np.array_equal(data_container.expected_outputs[i][1], second_eo)
137 |
138 |
139 | def _create_data_source(shape):
140 | data_inputs = np.random.random(shape).astype(np.float32)
141 | expected_outputs = np.random.random(shape).astype(np.float32)
142 | return data_inputs, expected_outputs
143 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_data_shuffling.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from neuraxle.pipeline import Pipeline
4 | from neuraxle.steps.data import DataShuffler
5 | from neuraxle.steps.misc import TapeCallbackFunction, FitTransformCallbackStep
6 |
7 |
8 | def test_data_shuffling_should_shuffle_data_inputs_and_expected_outputs():
9 | callback_fit = TapeCallbackFunction()
10 | callback_transform = TapeCallbackFunction()
11 | data_shuffler = Pipeline([
12 | DataShuffler(seed=42, increment_seed_after_each_fit=True),
13 | FitTransformCallbackStep(callback_transform, callback_fit)
14 | ])
15 | data_inputs = np.array(range(10))
16 | expected_outputs = np.array(range(10, 20))
17 |
18 | outputs = data_shuffler.fit_transform(data_inputs, expected_outputs)
19 |
20 | assert not np.array_equal(outputs, data_inputs)
21 | assert not np.array_equal(callback_fit.data[0][0], data_inputs)
22 | assert not np.array_equal(callback_fit.data[0][1], expected_outputs)
23 | assert not np.array_equal(callback_transform.data, data_inputs)
24 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_epochs_repeater.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from neuraxle.base import ExecutionMode
5 | from neuraxle.pipeline import Pipeline
6 | from neuraxle.steps.data import EpochRepeater
7 | from neuraxle.steps.misc import TapeCallbackFunction, FitTransformCallbackStep
8 | from testing_neuraxle.steps.neuraxle_test_case import NeuraxleTestCase
9 |
10 | DATA_INPUTS = np.array(range(10))
11 | EXPECTED_OUTPUTS = np.array(range(10, 20))
12 |
13 | callback_fit = TapeCallbackFunction()
14 | callback_transform = TapeCallbackFunction()
15 | EPOCHS = 2
16 |
17 |
18 | @pytest.mark.parametrize("test_case", [
19 | NeuraxleTestCase(
20 | pipeline=Pipeline([
21 | EpochRepeater(FitTransformCallbackStep(callback_transform, callback_fit), epochs=EPOCHS)
22 | ]),
23 | callbacks=[callback_fit, callback_transform],
24 | expected_callbacks_data=[
25 | [(DATA_INPUTS, EXPECTED_OUTPUTS), (DATA_INPUTS, EXPECTED_OUTPUTS)],
26 | [DATA_INPUTS]
27 | ],
28 | data_inputs=DATA_INPUTS,
29 | expected_outputs=EXPECTED_OUTPUTS,
30 | expected_processed_outputs=DATA_INPUTS,
31 | execution_mode=ExecutionMode.FIT_TRANSFORM
32 | ),
33 | NeuraxleTestCase(
34 | pipeline=Pipeline([
35 | EpochRepeater(FitTransformCallbackStep(callback_transform, callback_fit), epochs=EPOCHS)
36 | ]),
37 | callbacks=[callback_fit, callback_transform],
38 | expected_callbacks_data=[
39 | [],
40 | [DATA_INPUTS]
41 | ],
42 | data_inputs=DATA_INPUTS,
43 | expected_outputs=EXPECTED_OUTPUTS,
44 | expected_processed_outputs=DATA_INPUTS,
45 | execution_mode=ExecutionMode.TRANSFORM
46 | ),
47 | NeuraxleTestCase(
48 | pipeline=Pipeline([
49 | EpochRepeater(FitTransformCallbackStep(callback_transform, callback_fit), epochs=EPOCHS)
50 | ]).set_train(False),
51 | callbacks=[callback_fit, callback_transform],
52 | expected_callbacks_data=[
53 | [],
54 | [DATA_INPUTS]
55 | ],
56 | data_inputs=DATA_INPUTS,
57 | expected_outputs=EXPECTED_OUTPUTS,
58 | expected_processed_outputs=DATA_INPUTS,
59 | execution_mode=ExecutionMode.TRANSFORM
60 | ),
61 | NeuraxleTestCase(
62 | pipeline=Pipeline([
63 | EpochRepeater(FitTransformCallbackStep(callback_transform, callback_fit), epochs=EPOCHS)
64 | ]),
65 | callbacks=[callback_fit, callback_transform],
66 | expected_callbacks_data=[
67 | [(DATA_INPUTS, EXPECTED_OUTPUTS), (DATA_INPUTS, EXPECTED_OUTPUTS)],
68 | []
69 | ],
70 | data_inputs=DATA_INPUTS,
71 | expected_outputs=EXPECTED_OUTPUTS,
72 | execution_mode=ExecutionMode.FIT
73 | )
74 | ])
75 | def test_epoch_repeater(test_case):
76 | processed_outputs = test_case.execute()
77 |
78 | test_case.assert_expected_processed_outputs(processed_outputs)
79 | test_case.assert_callback_data_is_as_expected()
80 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_expand_dim.py:
--------------------------------------------------------------------------------
1 | from typing import List, Iterable
2 |
3 | import numpy as np
4 |
5 | from neuraxle.hyperparams.space import HyperparameterSamples
6 | from neuraxle.pipeline import Pipeline
7 | from neuraxle.steps.flow import ExpandDim
8 | from neuraxle.steps.misc import HandleCallbackStep, TapeCallbackFunction
9 |
10 |
11 | def test_expand_dim_transform():
12 | di = np.array(range(10))
13 | eo = None
14 | fit_callback, transform_callback, fit_transform_callback = (
15 | TapeCallbackFunction(), TapeCallbackFunction(), TapeCallbackFunction())
16 | p = Pipeline([
17 | ExpandDim(
18 | HandleCallbackStep(fit_callback, transform_callback, fit_transform_callback)
19 | )
20 | ])
21 |
22 | outputs = p.transform(di)
23 |
24 | assert np.array_equal(outputs, di)
25 | assert fit_callback.data == []
26 | assert np.array_equal(
27 | np.array(transform_callback.data[0][0].di),
28 | np.array([di])
29 | )
30 | assert np.array_equal(
31 | np.array(transform_callback.data[0][0].eo),
32 | np.array([eo])
33 | )
34 | assert fit_transform_callback.data == []
35 |
36 |
37 | def test_expand_dim_fit():
38 | handle_fit_callback = TapeCallbackFunction()
39 | handle_transform_callback = TapeCallbackFunction()
40 | handle_fit_transform_callback = TapeCallbackFunction()
41 | p = Pipeline([
42 | ExpandDim(
43 | HandleCallbackStep(
44 | handle_fit_callback,
45 | handle_transform_callback,
46 | handle_fit_transform_callback
47 | )
48 | )
49 | ])
50 |
51 | p = p.fit(np.array(range(10)), np.array(range(10)))
52 |
53 | assert handle_transform_callback.data == []
54 | assert handle_fit_transform_callback.data == []
55 | assert np.array_equal(
56 | np.array(handle_fit_callback.data[0][0].data_inputs),
57 | np.array([np.array(range(10))])
58 | )
59 | assert np.array_equal(
60 | np.array(handle_fit_callback.data[0][0].expected_outputs),
61 | np.array([np.array(range(10))])
62 | )
63 |
64 |
65 | def test_expand_dim_fit_transform():
66 | handle_fit_callback = TapeCallbackFunction()
67 | handle_transform_callback = TapeCallbackFunction()
68 | handle_fit_transform_callback = TapeCallbackFunction()
69 | p = Pipeline([
70 | ExpandDim(
71 | HandleCallbackStep(
72 | handle_fit_callback,
73 | handle_transform_callback,
74 | handle_fit_transform_callback
75 | )
76 | )
77 | ])
78 |
79 | p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10)))
80 |
81 | assert np.array_equal(outputs, np.array(range(10)))
82 | assert handle_transform_callback.data == []
83 | assert handle_fit_callback.data == []
84 | assert np.array_equal(
85 | np.array(handle_fit_transform_callback.data[0][0].data_inputs),
86 | np.array([np.array(range(10))])
87 | )
88 | assert np.array_equal(
89 | np.array(handle_fit_transform_callback.data[0][0].expected_outputs),
90 | np.array([np.array(range(10))])
91 | )
92 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_features.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for features steps
3 | ========================================
4 |
5 | ..
6 | Copyright 2019, Neuraxio Inc.
7 |
8 | Licensed under the Apache License, Version 2.0 (the "License");
9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 |
12 | http://www.apache.org/licenses/LICENSE-2.0
13 |
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 |
20 | """
21 | import pytest
22 |
23 | from neuraxle.hyperparams.space import HyperparameterSamples
24 | from neuraxle.steps.features import Cheap3DTo2DTransformer, FFTPeakBinWithValue
25 | import numpy as np
26 |
27 |
28 | def test_fft_peak_bin_with_values():
29 | data_inputs = np.random.random((4, 5, 2))
30 | step = FFTPeakBinWithValue()
31 |
32 | outputs = step.transform(data_inputs)
33 |
34 | assert outputs.shape == (4, 4)
35 |
36 |
37 | @pytest.mark.parametrize("hyperparams, expected_feature_count", [
38 | (HyperparameterSamples({
39 | 'FFT__enabled': True,
40 | 'NumpyMean__enabled': True,
41 | 'NumpyMedian__enabled': True,
42 | 'NumpyMin__enabled': True,
43 | 'NumpyMax__enabled': True
44 | }), 18),
45 | (HyperparameterSamples({
46 | 'FFT__enabled': False,
47 | 'NumpyMean__enabled': True,
48 | 'NumpyMedian__enabled': True,
49 | 'NumpyMin__enabled': True,
50 | 'NumpyMax__enabled': True
51 | }), 8),
52 | (HyperparameterSamples({
53 | 'FFT__enabled': True,
54 | 'NumpyMean__enabled': False,
55 | 'NumpyMedian__enabled': True,
56 | 'NumpyMin__enabled': True,
57 | 'NumpyMax__enabled': True
58 | }), 16),
59 | (HyperparameterSamples({
60 | 'FFT__enabled': True,
61 | 'NumpyMean__enabled': True,
62 | 'NumpyMedian__enabled': False,
63 | 'NumpyMin__enabled': True,
64 | 'NumpyMax__enabled': True
65 | }), 16),
66 | (HyperparameterSamples({
67 | 'FFT__enabled': True,
68 | 'NumpyMean__enabled': True,
69 | 'NumpyMedian__enabled': True,
70 | 'NumpyMin__enabled': False,
71 | 'NumpyMax__enabled': True
72 | }), 16),
73 | (HyperparameterSamples({
74 | 'FFT__enabled': True,
75 | 'NumpyMean__enabled': True,
76 | 'NumpyMedian__enabled': True,
77 | 'NumpyMin__enabled': True,
78 | 'NumpyMax__enabled': False
79 | }), 16)
80 | ])
81 | def test_cheap_3D_to_2D_transformer(hyperparams: HyperparameterSamples, expected_feature_count: int):
82 | step = Cheap3DTo2DTransformer()
83 | step.set_hyperparams(hyperparams=hyperparams)
84 | data_inputs = np.random.random((7, 5, 2))
85 |
86 | outputs = step.transform(data_inputs)
87 |
88 | assert outputs.shape == (7, expected_feature_count)
89 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_flatten_for_each.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from neuraxle.base import ExecutionContext as CX
4 | from neuraxle.data_container import DataContainer as DACT
5 | from neuraxle.pipeline import Pipeline
6 | from neuraxle.steps.loop import FlattenForEach
7 | from neuraxle.steps.numpy import MultiplyByN
8 | from neuraxle.steps.output_handlers import OutputTransformerWrapper
9 |
10 | DATA_SHAPE = (3, 4)
11 | FLAT_DATA_SHAPE = (3 * 4, )
12 |
13 |
14 | def test_flatten_for_each_unflatten_should_transform_data_inputs():
15 | p = FlattenForEach(MultiplyByN(2), then_unflatten=True)
16 | data_inputs, _ = _create_random_of_shape(DATA_SHAPE)
17 |
18 | outputs = p.transform(data_inputs)
19 |
20 | assert np.array(outputs).shape == DATA_SHAPE
21 | assert np.array_equal(outputs, data_inputs * 2)
22 |
23 |
24 | def test_flatten_for_each_should_transform_data_inputs():
25 | p = FlattenForEach(MultiplyByN(2), then_unflatten=False)
26 | data_inputs, _ = _create_random_of_shape(DATA_SHAPE)
27 |
28 | outputs = p.transform(data_inputs)
29 |
30 | assert np.array(outputs).shape == FLAT_DATA_SHAPE
31 | assert np.array_equal(outputs.flatten(), data_inputs.flatten() * 2)
32 |
33 |
34 | def test_flatten_for_each_should_transform_data_inputs_and_expected_outputs():
35 | p = FlattenForEach(Pipeline([
36 | MultiplyByN(2),
37 | OutputTransformerWrapper(MultiplyByN(3))
38 | ]))
39 | # TODO: should use a tape here and ensure that the MultiplyByN received a flat 12 shape only once and not 3*4 things
40 | data_inputs, expected_outputs = _create_random_of_shape(DATA_SHAPE)
41 |
42 | p, outputs = p.handle_fit_transform(
43 | DACT(data_inputs=data_inputs, expected_outputs=expected_outputs), CX())
44 |
45 | assert np.array(outputs.data_inputs).shape == DATA_SHAPE
46 | assert np.array_equal(outputs.data_inputs, data_inputs * 2)
47 | assert np.array(outputs.expected_outputs).shape == DATA_SHAPE
48 | assert np.array_equal(outputs.expected_outputs, expected_outputs * 3)
49 |
50 |
51 | def _create_random_of_shape(shape):
52 | data_inputs = np.random.random(shape).astype(np.float32)
53 | expected_outputs = np.random.random(shape).astype(np.float32)
54 | return data_inputs, expected_outputs
55 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_for_each.py:
--------------------------------------------------------------------------------
1 | from neuraxle.pipeline import Pipeline
2 | from neuraxle.steps.loop import ForEach
3 | from neuraxle.steps.misc import TransformCallbackStep, TapeCallbackFunction, FitCallbackStep, \
4 | FitTransformCallbackStep
5 |
6 |
7 | def test_fit_for_each_should_fit_all_steps_for_each_data_inputs_expected_outputs():
8 | tape = TapeCallbackFunction()
9 | p = Pipeline([
10 | ForEach(Pipeline([
11 | FitCallbackStep(tape.callback, ["1"]),
12 | FitCallbackStep(tape.callback, ["2"]),
13 | ]))
14 | ])
15 | data_inputs = [[0, 1], [1, 2]]
16 | expected_outputs = [[2, 3], [4, 5]]
17 |
18 | p = p.fit(data_inputs, expected_outputs)
19 |
20 | assert isinstance(p, Pipeline)
21 | assert tape.get_name_tape() == ["1", "2", "1", "2"]
22 | assert tape.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
23 |
24 |
25 | def test_fit_transform_should_fit_transform_all_steps_for_each_data_inputs_expected_outputs():
26 | tape = TapeCallbackFunction()
27 | tape_fit = TapeCallbackFunction()
28 | p = Pipeline([
29 | ForEach(Pipeline([
30 | FitTransformCallbackStep(tape.callback, tape_fit, ["1"]),
31 | FitTransformCallbackStep(tape.callback, tape_fit, ["2"]),
32 | ]))
33 | ])
34 | data_inputs = [[0, 1], [1, 2]]
35 | expected_outputs = [[2, 3], [4, 5]]
36 |
37 | p, outputs = p.fit_transform(data_inputs, expected_outputs)
38 |
39 | assert tape.get_name_tape() == ["1", "2", "1", "2"]
40 | assert tape_fit.get_name_tape() == ["1", "2", "1", "2"]
41 | assert tape_fit.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
42 |
43 |
44 | def test_transform_should_transform_all_steps_for_each_data_inputs_expected_outputs():
45 | tape = TapeCallbackFunction()
46 | p = Pipeline([
47 | ForEach(Pipeline([
48 | TransformCallbackStep(tape.callback, ["1"]),
49 | TransformCallbackStep(tape.callback, ["2"]),
50 | ]))
51 | ])
52 | data_inputs = [[0, 1], [1, 2]]
53 |
54 | outputs = p.transform(data_inputs)
55 |
56 | assert tape.get_name_tape() == ["1", "2", "1", "2"]
57 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_if_execution_phase_is_then_do.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | from pytest import skip
4 |
5 | from neuraxle.base import CX, ExecutionPhase
6 | from neuraxle.data_container import DataContainer as DACT
7 | from neuraxle.steps.flow import IfExecutionPhaseIsThen, ExecutionPhaseSwitch
8 | from testing_neuraxle.test_forcehandle_mixin import ForceHandleIdentity
9 |
10 |
11 | class SomeStep(ForceHandleIdentity):
12 | def __init__(self):
13 | ForceHandleIdentity.__init__(self)
14 | self.did_process = False
15 |
16 | def _did_process(self, data_container: DACT, context: CX) -> DACT:
17 | self.did_process = True
18 | return data_container
19 |
20 |
21 | def test_ifexecphase_same_then_execute_step(tmpdir):
22 | _run(tmpdir, ExecutionPhase.TRAIN, True)
23 |
24 |
25 | def test_ifexecphase_different_then_skip_step(tmpdir):
26 | _run(tmpdir, ExecutionPhase.TEST, False)
27 |
28 |
29 | def _run(tmpdir, phase, expected):
30 | context = CX(root=tmpdir, execution_phase=phase)
31 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
32 |
33 | some_step = SomeStep()
34 | p = IfExecutionPhaseIsThen(ExecutionPhase.TRAIN, some_step)
35 | p = p.with_context(context)
36 |
37 | p.fit_transform(data_inputs)
38 | assert some_step.did_process is expected
39 |
40 |
41 | def test_ifexecphase_raise_exception_when_unspecified(tmpdir):
42 | context = CX(root=tmpdir)
43 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
44 |
45 | some_step = SomeStep()
46 | p = IfExecutionPhaseIsThen(ExecutionPhase.TRAIN, some_step)
47 | p = p.with_context(context)
48 |
49 | with pytest.raises(ValueError) as error_info:
50 | p.fit_transform(data_inputs)
51 | assert some_step.did_process is False
52 |
53 |
54 | def test_execswitch(tmpdir):
55 | context = CX(root=tmpdir, execution_phase=ExecutionPhase.TRAIN)
56 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
57 |
58 | phase_to_step = {p: SomeStep() for p in (ExecutionPhase.PRETRAIN, ExecutionPhase.TRAIN, ExecutionPhase.TEST)}
59 | p = ExecutionPhaseSwitch(phase_to_step)
60 | p_c = p.with_context(context)
61 |
62 | p_c.fit_transform(data_inputs)
63 | assert phase_to_step[ExecutionPhase.PRETRAIN].did_process is False
64 | assert phase_to_step[ExecutionPhase.TRAIN].did_process is True
65 | assert phase_to_step[ExecutionPhase.TEST].did_process is False
66 |
67 | p_c = p.with_context(context.set_execution_phase(ExecutionPhase.UNSPECIFIED))
68 | with pytest.raises(KeyError) as error_info:
69 | p_c.fit_transform(data_inputs)
70 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_numpy_steps.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for NumPy Steps
3 | ========================================
4 |
5 | ..
6 | Copyright 2019, Neuraxio Inc.
7 |
8 | Licensed under the Apache License, Version 2.0 (the "License");
9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 |
12 | http://www.apache.org/licenses/LICENSE-2.0
13 |
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 |
20 | """
21 |
22 | import numpy as np
23 | from neuraxle.steps.numpy import (NumpyConcatenateInnerFeatures,
24 | NumpyFlattenDatum, NumpyShapePrinter,
25 | NumpyTranspose, NumpyFFT, NumpyRavel)
26 |
27 |
28 | def test_flatten_datum():
29 | flat = NumpyFlattenDatum()
30 | data = np.random.random((10, 4, 5, 2)) # 4D array (could be ND with N>=2).
31 | expected_data = np.copy(data).reshape(10, 4 * 5 * 2) # 2D array.
32 |
33 | flat, received_data = flat.fit_transform(data)
34 |
35 | assert (received_data == expected_data).all()
36 |
37 |
38 | def test_concat_features():
39 | concat = NumpyConcatenateInnerFeatures()
40 | # ND arrays
41 | data1 = np.random.random((10, 4, 5, 2))
42 | data2 = np.random.random((10, 4, 5, 10))
43 | expected_all_data = np.concatenate([data1, data2], axis=-1)
44 |
45 | concat, received_all_data = concat.fit_transform([data1, data2])
46 |
47 | assert tuple(received_all_data.shape) == tuple(expected_all_data.shape)
48 | assert (received_all_data == expected_all_data).all()
49 |
50 |
51 | def test_numpy_transpose():
52 | tr = NumpyTranspose()
53 | data = np.random.random((10, 7))
54 | expected_data = np.copy(data).transpose()
55 |
56 | tr, received_data = tr.fit_transform(data)
57 |
58 | assert (received_data == expected_data).all()
59 |
60 |
61 | def test_numpy_shape_printer():
62 | pr = NumpyShapePrinter()
63 | pr.fit_transform(np.ones((10, 11)))
64 |
65 |
66 | def test_numpy_fft():
67 | fft = NumpyFFT()
68 | fft.fit_transform(np.ones((10, 11)))
69 |
70 |
71 | def test_numpy_ravel():
72 | nr = NumpyRavel()
73 | nr, out = nr.fit_transform(np.ones((10, 11)))
74 | assert out.shape == (110,)
75 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_one_hot.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from neuraxle.steps.numpy import OneHotEncoder
5 |
6 |
7 | @pytest.mark.parametrize("n_dims", [1, 2, 3])
8 | @pytest.mark.parametrize("no_columns", [10])
9 | def test_one_hot_encode_should_encode_data_inputs(n_dims, no_columns):
10 | one_hot_encode = OneHotEncoder(nb_columns=no_columns, name='one_hot')
11 | data_shape = list(range(100, 200))[:n_dims]
12 | data_inputs = np.random.randint(low=no_columns, size=data_shape)
13 | data_inputs[0] = 0
14 | data_inputs[1] = no_columns - 1
15 | data_inputs[-2] = -1 # or nan or inf.
16 |
17 | outputs = one_hot_encode.transform(data_inputs)
18 |
19 | assert outputs.shape[-1] == no_columns
20 | assert ((outputs == 1) | (outputs == 0)).all()
21 |
22 | if n_dims >= 2:
23 | assert (outputs[0, ..., 0] == 1).all()
24 | assert (outputs[1, ..., -1] == 1).all()
25 | assert (outputs[-2, ...] == 0).all()
26 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_output_transformer_wrapper.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Tuple
2 |
3 | from neuraxle.base import BaseTransformer
4 | from neuraxle.base import ExecutionContext as CX
5 | from neuraxle.data_container import DataContainer as DACT
6 | from neuraxle.hyperparams.space import HyperparameterSamples, HyperparameterSpace
7 | from neuraxle.pipeline import Pipeline
8 | from neuraxle.steps.output_handlers import IdsAndInputAndOutputTransformerMixin
9 |
10 |
11 | class MultiplyBy2OutputTransformer(IdsAndInputAndOutputTransformerMixin, BaseTransformer):
12 | def __init__(
13 | self,
14 | hyperparams: HyperparameterSamples = None,
15 | hyperparams_space: HyperparameterSpace = None,
16 | name: str = None
17 | ):
18 | BaseTransformer.__init__(self, hyperparams, hyperparams_space, name)
19 | IdsAndInputAndOutputTransformerMixin.__init__(self)
20 |
21 | def transform(self, data_inputs) -> Tuple[Any, Any]:
22 | ids, dis, eos = data_inputs
23 |
24 | new_dis = []
25 | new_eos = []
26 | for di, eo in zip(dis, eos):
27 | new_dis.append(di * 2)
28 | new_eos.append(eo * 2)
29 |
30 | return ids, new_dis, new_eos
31 |
32 |
33 | def test_output_transformer_should_zip_data_input_and_expected_output_in_the_transformed_output():
34 | pipeline = Pipeline([
35 | MultiplyBy2OutputTransformer()
36 | ])
37 |
38 | pipeline, new_data_container = pipeline.handle_fit_transform(
39 | DACT(data_inputs=[1, 2, 3], ids=[0, 1, 2], expected_outputs=[2, 3, 4]),
40 | CX()
41 | )
42 |
43 | assert new_data_container.data_inputs == [2, 4, 6]
44 | assert new_data_container.expected_outputs == [4, 6, 8]
45 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_reversible_preprocessing_wrapper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from neuraxle.base import ExecutionMode
5 | from neuraxle.pipeline import Pipeline
6 | from neuraxle.steps.flow import ReversiblePreprocessingWrapper
7 | from neuraxle.steps.misc import TapeCallbackFunction, CallbackWrapper
8 | from neuraxle.steps.numpy import MultiplyByN, AddN
9 | from testing_neuraxle.steps.neuraxle_test_case import NeuraxleTestCase
10 |
11 | DATA_INPUTS = np.array(range(5))
12 | EXPECTED_OUTPUTS = np.array(range(5, 10))
13 | EXPECTED_PROCESSED_OUTPUTS = np.array([5.0, 6.0, 7.0, 8.0, 9.0])
14 |
15 | tape_transform_preprocessing = TapeCallbackFunction()
16 | tape_fit_preprocessing = TapeCallbackFunction()
17 | tape_transform_postprocessing = TapeCallbackFunction()
18 | tape_fit_postprocessing = TapeCallbackFunction()
19 | tape_inverse_transform_preprocessing = TapeCallbackFunction()
20 |
21 |
22 | @pytest.mark.parametrize('test_case', [
23 | NeuraxleTestCase(
24 | pipeline=Pipeline([
25 | ReversiblePreprocessingWrapper(
26 | preprocessing_step=CallbackWrapper(MultiplyByN(2), tape_transform_preprocessing, tape_fit_postprocessing, tape_inverse_transform_preprocessing),
27 | postprocessing_step=CallbackWrapper(AddN(10), tape_transform_postprocessing, tape_fit_postprocessing)
28 | )]
29 | ),
30 | callbacks=[tape_transform_preprocessing, tape_fit_preprocessing, tape_transform_postprocessing, tape_fit_postprocessing, tape_inverse_transform_preprocessing],
31 | expected_callbacks_data=[
32 | [DATA_INPUTS],
33 | [],
34 | [DATA_INPUTS * 2],
35 | [],
36 | [(DATA_INPUTS * 2) + 10]
37 | ],
38 | data_inputs=DATA_INPUTS,
39 | expected_processed_outputs=EXPECTED_PROCESSED_OUTPUTS,
40 | execution_mode=ExecutionMode.TRANSFORM
41 | ),
42 | NeuraxleTestCase(
43 | pipeline=Pipeline([
44 | ReversiblePreprocessingWrapper(
45 | preprocessing_step=CallbackWrapper(MultiplyByN(2), tape_transform_preprocessing, tape_fit_preprocessing, tape_inverse_transform_preprocessing),
46 | postprocessing_step=CallbackWrapper(AddN(10), tape_transform_postprocessing, tape_fit_postprocessing)
47 | )]
48 | ),
49 | callbacks=[tape_transform_preprocessing, tape_fit_preprocessing, tape_transform_postprocessing, tape_fit_postprocessing, tape_inverse_transform_preprocessing],
50 | expected_callbacks_data=[
51 | [DATA_INPUTS],
52 | [(DATA_INPUTS, EXPECTED_OUTPUTS)],
53 | [DATA_INPUTS * 2],
54 | [(DATA_INPUTS * 2, EXPECTED_OUTPUTS)],
55 | [(DATA_INPUTS * 2) + 10]
56 | ],
57 | data_inputs=DATA_INPUTS,
58 | expected_outputs=EXPECTED_OUTPUTS,
59 | expected_processed_outputs=EXPECTED_PROCESSED_OUTPUTS,
60 | execution_mode=ExecutionMode.FIT_TRANSFORM
61 | ),
62 | NeuraxleTestCase(
63 | pipeline=Pipeline([
64 | ReversiblePreprocessingWrapper(
65 | preprocessing_step=CallbackWrapper(MultiplyByN(2), tape_transform_preprocessing, tape_fit_preprocessing, tape_inverse_transform_preprocessing),
66 | postprocessing_step=CallbackWrapper(AddN(10), tape_transform_postprocessing, tape_fit_postprocessing)
67 | )]
68 | ),
69 | callbacks=[tape_transform_preprocessing, tape_fit_preprocessing, tape_transform_postprocessing, tape_fit_postprocessing, tape_inverse_transform_preprocessing],
70 | expected_callbacks_data=[
71 | [DATA_INPUTS],
72 | [(DATA_INPUTS, EXPECTED_OUTPUTS)],
73 | [],
74 | [(DATA_INPUTS * 2, EXPECTED_OUTPUTS)],
75 | []
76 | ],
77 | data_inputs=DATA_INPUTS,
78 | expected_outputs=EXPECTED_OUTPUTS,
79 | execution_mode=ExecutionMode.FIT
80 | )
81 | ])
82 | def test_reversible_preprocessing_wrapper(test_case):
83 | processed_outputs = test_case.execute()
84 |
85 | test_case.assert_expected_processed_outputs(processed_outputs)
86 | test_case.assert_callback_data_is_as_expected()
87 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_sklearn_wrapper.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import numpy as np
4 | import pytest
5 | from neuraxle.base import Identity
6 | from neuraxle.hyperparams.distributions import RandInt, Uniform
7 | from neuraxle.hyperparams.space import (HyperparameterSamples,
8 | HyperparameterSpace)
9 | from neuraxle.metaopt.auto_ml import AutoML, RandomSearchSampler
10 | from neuraxle.metaopt.callbacks import ScoringCallback
11 | from neuraxle.metaopt.repositories.json import HyperparamsOnDiskRepository
12 | from neuraxle.metaopt.validation import KFoldCrossValidationSplitter
13 | from neuraxle.pipeline import Pipeline
14 | from neuraxle.steps.data import DataShuffler
15 | from neuraxle.steps.flow import TrainOnlyWrapper
16 | from neuraxle.steps.sklearn import SKLearnWrapper
17 | from sklearn.decomposition import PCA
18 | from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor
19 | from sklearn.linear_model import LinearRegression, SGDClassifier, SGDRegressor
20 | from sklearn.metrics import median_absolute_error
21 |
22 |
23 | def test_sklearn_wrapper_with_an_invalid_step():
24 | with pytest.raises(ValueError):
25 | SKLearnWrapper(Identity())
26 |
27 |
28 | def test_sklearn_wrapper_fit_transform_with_predict():
29 | p = SKLearnWrapper(LinearRegression())
30 | data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1)
31 | expected_outputs = np.expand_dims(np.array(list(range(10, 20))), axis=-1)
32 |
33 | p, outputs = p.fit_transform(data_inputs, expected_outputs)
34 |
35 | assert np.array_equal(outputs, expected_outputs)
36 |
37 |
38 | def test_sklearn_wrapper_transform_with_predict():
39 | p = SKLearnWrapper(LinearRegression())
40 | data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1)
41 | expected_outputs = np.expand_dims(np.array(list(range(10, 20))), axis=-1)
42 |
43 | p = p.fit(data_inputs, expected_outputs)
44 | outputs = p.transform(data_inputs)
45 |
46 | assert np.array_equal(outputs, expected_outputs)
47 |
48 |
49 | def test_sklearn_wrapper_fit_transform_with_transform():
50 | n_components = 2
51 | p = SKLearnWrapper(PCA(n_components=n_components))
52 | dim1 = 10
53 | dim2 = 10
54 | data_inputs, expected_outputs = _create_data_source((dim1, dim2))
55 |
56 | p, outputs = p.fit_transform(data_inputs, expected_outputs)
57 |
58 | assert outputs.shape == (dim1, n_components)
59 |
60 |
61 | def test_sklearn_wrapper_transform_partial_fit_with_predict():
62 | model = SKLearnWrapper(SGDRegressor(learning_rate='adaptive', eta0=0.05), use_partial_fit=True)
63 | p = Pipeline([TrainOnlyWrapper(DataShuffler()), model])
64 | data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1) / 10
65 | expected_outputs = np.ravel(np.expand_dims(np.array(list(range(10, 20))), axis=-1)) / 10
66 |
67 | for _ in range(30):
68 | p = p.fit(data_inputs, expected_outputs)
69 | outputs = p.predict(data_inputs)
70 |
71 | assert all([np.isclose(a, b, atol=0.1) for a, b in zip(expected_outputs, outputs)])
72 |
73 |
74 | def test_sklearn_wrapper_transform_partial_fit_classifier():
75 | data_inputs = np.array([[0, 1], [0, 0], [3, -2], [-1, 1], [-2, 1], [2, 0], [2, -1], [4, -2], [-3, 1], [-1, 0]])
76 | expected_outputs = np.ravel(np.expand_dims(data_inputs[:, 0] + 2 * data_inputs[:, 1] + 1, axis=-1))
77 | data_inputs = data_inputs / (4 + 1)
78 | classes = np.array([0, 1, 2, 3])
79 | model = SKLearnWrapper(
80 | SGDClassifier(learning_rate='adaptive', eta0=0.05),
81 | use_partial_fit=True,
82 | partial_fit_kwargs={'classes': classes}
83 | )
84 | p = Pipeline([TrainOnlyWrapper(DataShuffler()), model])
85 |
86 | for _ in range(30):
87 | p = p.fit(data_inputs, expected_outputs)
88 | outputs = p.predict(data_inputs)
89 |
90 | assert outputs.shape == (10,)
91 | assert len(set(outputs) - set(classes)) == 0
92 |
93 |
94 | def test_sklearn_wrapper_set_hyperparams():
95 | p = SKLearnWrapper(PCA())
96 | p.set_hyperparams(HyperparameterSamples({
97 | 'n_components': 2
98 | }))
99 |
100 | assert p.wrapped_sklearn_predictor.n_components == 2
101 |
102 |
103 | def test_sklearn_wrapper_update_hyperparams():
104 | p = SKLearnWrapper(PCA())
105 | p.set_hyperparams(HyperparameterSamples({
106 | 'n_components': 2,
107 | 'svd_solver': 'full'
108 | }))
109 | p.update_hyperparams(HyperparameterSamples({
110 | 'n_components': 4
111 | }))
112 |
113 | assert p.wrapped_sklearn_predictor.n_components == 4
114 | assert p.wrapped_sklearn_predictor.svd_solver == 'full'
115 |
116 |
117 | def _create_data_source(shape):
118 | data_inputs = np.random.random(shape).astype(np.float32)
119 | expected_outputs = np.random.random(shape).astype(np.float32)
120 | return data_inputs, expected_outputs
121 |
122 |
123 | def _test_within_auto_ml_loop(tmpdir, pipeline):
124 | X_train = np.random.random((25, 50)).astype(np.float32)
125 | Y_train = np.random.random((25,)).astype(np.float32)
126 |
127 | validation_splitter = KFoldCrossValidationSplitter(3)
128 | scoring_callback = ScoringCallback(
129 | median_absolute_error, higher_score_is_better=False)
130 |
131 | auto_ml = AutoML(
132 | pipeline=pipeline,
133 | hyperparams_optimizer=RandomSearchSampler(),
134 | validation_splitter=validation_splitter,
135 | scoring_callback=scoring_callback,
136 | n_trials=2,
137 | epochs=1,
138 | hyperparams_repository=HyperparamsOnDiskRepository(cache_folder=tmpdir),
139 | refit_best_trial=True,
140 | continue_loop_on_error=False)
141 |
142 | auto_ml.fit(X_train, Y_train)
143 |
144 |
145 | def test_automl_sklearn(tmpdir):
146 | grad_boost = SKLearnWrapper(GradientBoostingRegressor())
147 | _test_within_auto_ml_loop(tmpdir, grad_boost)
148 |
149 |
150 | def test_automl_sklearn_model_with_base_estimator(tmpdir):
151 | grad_boost = GradientBoostingRegressor()
152 | bagged_regressor = BaggingRegressor(
153 | grad_boost, random_state=5, n_jobs=-1)
154 |
155 | wrapped_bagged_regressor = SKLearnWrapper(
156 | bagged_regressor,
157 | HyperparameterSpace({
158 | "n_estimators": RandInt(2, 15),
159 | "max_features": Uniform(0.6, 1.0)}),
160 | # return_all_sklearn_default_params_on_get=True
161 | )
162 | _test_within_auto_ml_loop(tmpdir, wrapped_bagged_regressor)
163 |
--------------------------------------------------------------------------------
/testing_neuraxle/steps/test_step_cloner_for_each_data_input.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 |
5 | from neuraxle.base import ExecutionContext as CX
6 | from neuraxle.hyperparams.distributions import Boolean
7 | from neuraxle.hyperparams.space import HyperparameterSpace, HyperparameterSamples
8 | from neuraxle.pipeline import Pipeline
9 | from neuraxle.steps.loop import StepClonerForEachDataInput
10 | from neuraxle.steps.misc import FitCallbackStep, TapeCallbackFunction
11 | from neuraxle.steps.numpy import MultiplyByN
12 |
13 | HYPE_SPACE = HyperparameterSpace({
14 | "a__test": Boolean()
15 | })
16 |
17 | HYPE_SAMPLE = HyperparameterSamples({
18 | "a__test": True
19 | })
20 |
21 |
22 | def test_step_cloner_should_transform():
23 | tape = TapeCallbackFunction()
24 | p = StepClonerForEachDataInput(
25 | Pipeline([
26 | FitCallbackStep(tape),
27 | MultiplyByN(2)
28 | ])
29 | )
30 | data_inputs = _create_data((2, 2))
31 |
32 | processed_outputs = p.transform(data_inputs)
33 |
34 | assert isinstance(p.steps_as_tuple[0][1], Pipeline)
35 | assert isinstance(p.steps_as_tuple[1][1], Pipeline)
36 | assert np.array_equal(processed_outputs, data_inputs * 2)
37 |
38 |
39 | def test_step_cloner_should_fit_transform():
40 | # Given
41 | tape = TapeCallbackFunction()
42 | p = StepClonerForEachDataInput(
43 | Pipeline([
44 | FitCallbackStep(tape),
45 | MultiplyByN(2)
46 | ])
47 | )
48 | data_inputs = _create_data((2, 2))
49 | expected_outputs = _create_data((2, 2))
50 |
51 | # When
52 | p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)
53 |
54 | # Then
55 | assert isinstance(p.steps_as_tuple[0][1], Pipeline)
56 | assert np.array_equal(p.steps_as_tuple[0][1][0].callback_function.data[0][0], data_inputs[0])
57 | assert np.array_equal(p.steps_as_tuple[0][1][0].callback_function.data[0][1], expected_outputs[0])
58 |
59 | assert isinstance(p.steps_as_tuple[1][1], Pipeline)
60 | assert np.array_equal(p.steps_as_tuple[1][1][0].callback_function.data[0][0], data_inputs[1])
61 | assert np.array_equal(p.steps_as_tuple[1][1][0].callback_function.data[0][1], expected_outputs[1])
62 |
63 | assert np.array_equal(processed_outputs, data_inputs * 2)
64 |
65 |
66 | def test_step_cloner_should_inverse_transform():
67 | tape = TapeCallbackFunction()
68 | p = StepClonerForEachDataInput(
69 | Pipeline([
70 | FitCallbackStep(tape),
71 | MultiplyByN(2)
72 | ])
73 | )
74 | data_inputs = _create_data((2, 2))
75 | expected_outputs = _create_data((2, 2))
76 |
77 | p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)
78 |
79 | assert np.array_equal(processed_outputs, data_inputs * 2)
80 | inverse_processed_outputs = p.inverse_transform(processed_outputs)
81 | assert np.array_equal(np.array(inverse_processed_outputs), np.array(data_inputs))
82 |
83 |
84 | def test_step_cloner_should_set_train():
85 | tape = TapeCallbackFunction()
86 | p = StepClonerForEachDataInput(
87 | Pipeline([
88 | FitCallbackStep(tape),
89 | MultiplyByN(2)
90 | ])
91 | )
92 | data_inputs = _create_data((2, 2))
93 | expected_outputs = _create_data((2, 2))
94 | p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)
95 |
96 | p.set_train(False)
97 |
98 | assert not p.is_train
99 | assert not p.steps_as_tuple[0][1].is_train
100 | assert not p.steps_as_tuple[1][1].is_train
101 |
102 |
103 | def test_step_cloner_should_save_sub_steps(tmpdir):
104 | tape = TapeCallbackFunction()
105 | p = StepClonerForEachDataInput(
106 | Pipeline([
107 | FitCallbackStep(tape),
108 | MultiplyByN(2)
109 | ])
110 | ).with_context(CX(tmpdir))
111 | data_inputs = _create_data((2, 2))
112 | expected_outputs = _create_data((2, 2))
113 | p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)
114 |
115 | p.save(CX(tmpdir), full_dump=True)
116 |
117 | saved_paths = [
118 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/FitCallbackStep/FitCallbackStep.joblib'),
119 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'),
120 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'),
121 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/Pipeline[0].joblib'),
122 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/FitCallbackStep/FitCallbackStep.joblib'),
123 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/MultiplyByN/MultiplyByN.joblib'),
124 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/Pipeline[1].joblib'),
125 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline/FitCallbackStep/FitCallbackStep.joblib'),
126 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline/MultiplyByN/MultiplyByN.joblib'),
127 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline/Pipeline.joblib'),
128 | os.path.join(tmpdir, 'StepClonerForEachDataInput/StepClonerForEachDataInput.joblib')
129 | ]
130 |
131 | for p in saved_paths:
132 | assert os.path.exists(p)
133 |
134 |
135 | def test_step_cloner_should_load_sub_steps(tmpdir):
136 | tape = TapeCallbackFunction()
137 | p = StepClonerForEachDataInput(
138 | Pipeline([
139 | FitCallbackStep(tape),
140 | MultiplyByN(2)
141 | ])
142 | ).with_context(CX(tmpdir))
143 | data_inputs = _create_data((2, 2))
144 | expected_outputs = _create_data((2, 2))
145 | p, _ = p.fit_transform(data_inputs, expected_outputs)
146 |
147 | p.save(CX(tmpdir), full_dump=True)
148 |
149 | loaded_step_cloner = CX(tmpdir).load('StepClonerForEachDataInput')
150 | assert isinstance(loaded_step_cloner.wrapped, Pipeline)
151 | assert len(loaded_step_cloner.steps_as_tuple) == len(data_inputs)
152 | assert isinstance(loaded_step_cloner.steps_as_tuple[0][1], Pipeline)
153 | assert isinstance(loaded_step_cloner.steps_as_tuple[1][1], Pipeline)
154 |
155 |
156 | def _create_data(shape):
157 | data_inputs = np.random.random(shape).astype(np.float32)
158 | return data_inputs
159 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_basestep.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from neuraxle.base import ExecutionContext as CX
4 | from neuraxle.data_container import DataContainer as DACT
5 | from neuraxle.pipeline import Pipeline
6 | from neuraxle.steps.flow import TestOnlyWrapper, TrainOnlyWrapper
7 | from neuraxle.steps.misc import TapeCallbackFunction, CallbackWrapper
8 | from neuraxle.steps.numpy import MultiplyByN
9 |
10 | from testing_neuraxle.mocks.step_mocks import SomeStepWithHyperparams
11 |
12 |
13 | def test_basestep_print_str_representation_works_correctly():
14 | output = str(SomeStepWithHyperparams())
15 | assert output == "SomeStepWithHyperparams(name='MockStep')"
16 |
17 |
18 | def test_basestep_repr_representation_works_correctly():
19 | output = repr(SomeStepWithHyperparams())
20 | assert output == """SomeStepWithHyperparams(name='MockStep', hyperparams=HyperparameterSamples([('learning_rate', 0.1),
21 | ('l2_weight_reg', 0.001),
22 | ('hidden_size', 32),
23 | ('num_layers', 3),
24 | ('num_lstm_layers', 1),
25 | ('use_xavier_init', True),
26 | ('use_max_pool_else_avg_pool', True),
27 | ('dropout_drop_proba', 0.5),
28 | ('momentum', 0.1)]))"""
29 |
30 |
31 | def test_handle_predict_should_predict_in_test_mode():
32 | tape_fit = TapeCallbackFunction()
33 | tape_transform = TapeCallbackFunction()
34 | p = Pipeline([
35 | TestOnlyWrapper(CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)),
36 | TrainOnlyWrapper(CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit))
37 | ])
38 |
39 | data_container = p.handle_predict(
40 | data_container=DACT(data_inputs=np.array([1, 1]), expected_outputs=np.array([1, 1])),
41 | context=CX()
42 | )
43 |
44 | assert np.array_equal(data_container.data_inputs, np.array([2, 2]))
45 |
46 |
47 | def test_handle_predict_should_handle_transform_with_initial_is_train_mode_after_predict():
48 | tape_fit = TapeCallbackFunction()
49 | tape_transform = TapeCallbackFunction()
50 | p = Pipeline([
51 | TestOnlyWrapper(CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)),
52 | TrainOnlyWrapper(CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit))
53 | ])
54 | data_container = DACT(data_inputs=np.array([1, 1]), expected_outputs=np.array([1, 1]))
55 |
56 | p.handle_predict(
57 | data_container=data_container.copy(),
58 | context=CX()
59 | )
60 | data_container = p.handle_transform(data_container, CX())
61 |
62 | assert np.array_equal(data_container.data_inputs, np.array([4, 4]))
63 |
64 |
65 | def test_predict_should_predict_in_test_mode():
66 | tape_fit = TapeCallbackFunction()
67 | tape_transform = TapeCallbackFunction()
68 | p = Pipeline([
69 | TestOnlyWrapper(CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)),
70 | TrainOnlyWrapper(CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit))
71 | ])
72 |
73 | outputs = p.predict(np.array([1, 1]))
74 |
75 | assert np.array_equal(outputs, np.array([2, 2]))
76 |
77 |
78 | def test_predict_should_transform_with_initial_is_train_mode_after_predict():
79 | tape_fit = TapeCallbackFunction()
80 | tape_transform = TapeCallbackFunction()
81 | p = Pipeline([
82 | TestOnlyWrapper(CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)),
83 | TrainOnlyWrapper(CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit))
84 | ])
85 |
86 | p.predict(np.array([1, 1]))
87 | outputs = p.transform(np.array([1, 1]))
88 |
89 | assert np.array_equal(outputs, np.array([4, 4]))
90 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_data_container.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from neuraxle.data_container import DACT, ListDataContainer
4 |
5 |
6 | def test_data_container_iter_method_should_iterate_with_none_ids():
7 | data_container = DACT(
8 | di=np.array(list(range(100))),
9 | eo=np.array(list(range(100, 200)))
10 | ).set_ids(None)
11 |
12 | for i, (_id, data_input, expected_outputs) in enumerate(data_container):
13 | assert _id == i
14 | assert data_input == i
15 | assert expected_outputs == i + 100
16 |
17 |
18 | def test_data_container_iter_method_should_iterate_with_none_expected_outputs():
19 | data_container = DACT(
20 | ids=[str(i) for i in range(100)],
21 | data_inputs=np.array(list(range(100))),
22 | expected_outputs=None
23 | )
24 |
25 | for i, (_, data_input, expected_outputs) in enumerate(data_container):
26 | assert data_input == i
27 | assert expected_outputs is None
28 |
29 |
30 | def test_data_container_len_method_should_return_data_inputs_len():
31 | data_container = DACT.from_di(np.array(list(range(100))))
32 |
33 | assert len(data_container) == 100
34 |
35 |
36 | def test_data_container_should_iterate_through_data_using_minibatches():
37 | data_container = DACT(
38 | ids=[str(i) for i in range(100)],
39 | data_inputs=np.array(list(range(100))),
40 | expected_outputs=np.array(list(range(100, 200)))
41 | )
42 |
43 | batches = []
44 | for b in data_container.minibatches(batch_size=10):
45 | batches.append(b)
46 |
47 | for i, batch in enumerate(batches):
48 | assert np.array_equal(np.array(batch.data_inputs), np.array(list(range(i * 10, (i * 10) + 10))))
49 | assert np.array_equal(
50 | np.array(batch.expected_outputs),
51 | np.array(list(range((i * 10) + 100, (i * 10) + 100 + 10)))
52 | )
53 |
54 |
55 | def test_list_data_container_concat():
56 | # Given
57 | data_container = ListDataContainer(
58 | ids=[str(i) for i in range(100)],
59 | data_inputs=np.array(list(range(100))),
60 | expected_outputs=np.array(list(range(100, 200)))
61 | )
62 |
63 | # When
64 | data_container.extend(DACT(
65 | ids=[str(i) for i in range(100, 200)],
66 | data_inputs=np.array(list(range(100, 200))),
67 | expected_outputs=np.array(list(range(200, 300)))
68 | ))
69 |
70 | # Then
71 | assert np.array_equal(np.array(data_container.ids), np.array(list(range(0, 200))).astype(np.str))
72 |
73 | expected_data_inputs = np.array(list(range(0, 200))).astype(np.int)
74 | actual_data_inputs = np.array(data_container.data_inputs).astype(np.int)
75 | assert np.array_equal(actual_data_inputs, expected_data_inputs)
76 |
77 | expected_expected_outputs = np.array(list(range(100, 300))).astype(np.int)
78 | assert np.array_equal(np.array(data_container.expected_outputs).astype(np.int), expected_expected_outputs)
79 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_data_container_batching.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from neuraxle.data_container import DACT, StripAbsentValues
4 | import numpy as np
5 |
6 |
7 | class LoadableItem:
8 | def __init__(self):
9 | self.loaded = False
10 |
11 | def load(self) -> 'LoadableItem':
12 | self.loaded = True
13 | return self
14 |
15 | def is_loaded(self):
16 | return self.loaded
17 |
18 |
19 | class SomeLazyLoadableCollection:
20 | def __init__(self, inner_list):
21 | self.inner_list = inner_list
22 | self.iterations = 0
23 |
24 | def __iter__(self):
25 | for item in self.inner_list:
26 | yield item.load()
27 |
28 | def __getitem__(self, item):
29 | return SomeLazyLoadableCollection([
30 | item.load()
31 | for item in self.inner_list[item]
32 | ])
33 |
34 | def __len__(self):
35 | return len(self.inner_list)
36 |
37 |
38 | def test_data_container_minibatch_should_be_lazy_and_use_getitem_when_data_is_lazy_loadable():
39 | items = [LoadableItem() for _ in range(10)]
40 | data_inputs = SomeLazyLoadableCollection(items)
41 | expected_outputs = SomeLazyLoadableCollection([LoadableItem() for _ in range(10)])
42 | data_container = DACT(
43 | data_inputs=data_inputs,
44 | expected_outputs=expected_outputs
45 | )
46 |
47 | i = 0
48 | batch_size = 2
49 | for batch in data_container.minibatches(batch_size=batch_size):
50 | assert len(batch) == batch_size
51 | assert all(item.is_loaded() for item in data_inputs.inner_list[:(i * batch_size)])
52 | for y in range((i + 1) * batch_size, len(data_inputs)):
53 | assert not items[y].is_loaded()
54 | i += 1
55 |
56 |
57 | @pytest.mark.parametrize('batch_size,include_incomplete_pass,default_value,expected_data_containers', [
58 | (3, False, None, [
59 | DACT(ids=[0, 1, 2], data_inputs=[0, 1, 2], expected_outputs=[10, 11, 12]),
60 | DACT(ids=[3, 4, 5], data_inputs=[3, 4, 5], expected_outputs=[13, 14, 15]),
61 | DACT(ids=[6, 7, 8], data_inputs=[6, 7, 8], expected_outputs=[16, 17, 18]),
62 | ]),
63 | (3, True, 0, [
64 | DACT(ids=[0, 1, 2], data_inputs=[0, 1, 2], expected_outputs=[10, 11, 12]),
65 | DACT(ids=[3, 4, 5], data_inputs=[3, 4, 5], expected_outputs=[13, 14, 15]),
66 | DACT(ids=[6, 7, 8], data_inputs=[6, 7, 8], expected_outputs=[16, 17, 18]),
67 | DACT(ids=[0, 1, 2], data_inputs=[9, 0, 0], expected_outputs=[19, 0, 0])
68 | ]),
69 | (3, True, StripAbsentValues(), [
70 | DACT(ids=[0, 1, 2], data_inputs=[0, 1, 2], expected_outputs=[10, 11, 12]),
71 | DACT(ids=[3, 4, 5], data_inputs=[3, 4, 5], expected_outputs=[13, 14, 15]),
72 | DACT(ids=[6, 7, 8], data_inputs=[6, 7, 8], expected_outputs=[16, 17, 18]),
73 | DACT(ids=[9], data_inputs=[9], expected_outputs=[19])
74 | ])
75 | ])
76 | def test_data_container_batching(batch_size, include_incomplete_pass, default_value, expected_data_containers):
77 | data_container = DACT(
78 | ids=[str(i) for i in range(10)],
79 | data_inputs=np.array(list(range(10))),
80 | expected_outputs=np.array(list(range(10, 20)))
81 | )
82 |
83 | # When
84 | data_containers = []
85 | for dc in data_container.minibatches(
86 | batch_size=batch_size,
87 | keep_incomplete_batch=include_incomplete_pass,
88 | default_value_data_inputs=default_value
89 | ):
90 | data_containers.append(dc)
91 |
92 | # Then
93 | assert len(expected_data_containers) == len(data_containers)
94 | for expected_data_container, actual_data_container in zip(expected_data_containers, data_containers):
95 | np.array_equal(expected_data_container.ids, actual_data_container.ids)
96 | np.array_equal(expected_data_container.data_inputs, actual_data_container.data_inputs)
97 | np.array_equal(expected_data_container.expected_outputs, actual_data_container.expected_outputs)
98 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_forcehandle_mixin.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | from neuraxle.base import BaseStep
4 | from neuraxle.base import ExecutionContext as CX
5 | from neuraxle.base import ForceHandleIdentity, ForceHandleMixin
6 | from neuraxle.data_container import DataContainer as DACT
7 | from neuraxle.pipeline import Pipeline
8 |
9 |
10 | class BadForceHandleStep(ForceHandleMixin, BaseStep):
11 | def __init__(self):
12 | BaseStep.__init__(self)
13 | ForceHandleMixin.__init__(self)
14 |
15 |
16 | def test_raises_exception_if_method_not_redefined(tmpdir):
17 | # Now that I think about it, this really just is a complicated way to test the self._ensure_method_overriden function.
18 | with pytest.raises(NotImplementedError) as exception_info:
19 | BadForceHandleStep()
20 |
21 | assert "Please define _fit_data_container" in exception_info.value.args[0]
22 | assert "in BadForceHandleStep" in exception_info.value.args[0]
23 |
24 | def _fit_data_container(self, data_container: DACT, context: CX):
25 | return self
26 | BadForceHandleStep._fit_data_container = _fit_data_container
27 |
28 | with pytest.raises(NotImplementedError) as exception_info:
29 | BadForceHandleStep()
30 |
31 | assert "Please define _fit_transform_data_container" in exception_info.value.args[0]
32 | assert "in BadForceHandleStep" in exception_info.value.args[0]
33 |
34 | def _fit_transform_data_container(self, data_container: DACT, context: CX):
35 | return self, data_container
36 | BadForceHandleStep._fit_transform_data_container = _fit_transform_data_container
37 |
38 | with pytest.raises(NotImplementedError) as exception_info:
39 | BadForceHandleStep()
40 |
41 | assert "Please define _transform_data_container" in exception_info.value.args[0]
42 | assert "in BadForceHandleStep" in exception_info.value.args[0]
43 |
44 | def _transform_data_container(self, data_container: DACT, context: CX):
45 | return data_container
46 | BadForceHandleStep._transform_data_container = _transform_data_container
47 |
48 | #Should not raise any error now.
49 | BadForceHandleStep()
50 |
51 |
52 | def test_forcehandleidentity_does_not_crash(tmpdir):
53 | p = Pipeline([
54 | ForceHandleIdentity()
55 | ])
56 | data_inputs = np.array([0, 1, 2, 3])
57 | expected_outputs = data_inputs * 2
58 | p.fit(data_inputs, expected_outputs)
59 | p.fit_transform(data_inputs, expected_outputs)
60 | p.transform(data_inputs=data_inputs)
61 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_full_pipeline_dump.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | from neuraxle.base import ExecutionContext as CX
5 | from neuraxle.base import Identity, StepWithContext
6 | from neuraxle.pipeline import Pipeline
7 | from neuraxle.steps.misc import FitTransformCallbackStep, TapeCallbackFunction
8 | from neuraxle.steps.output_handlers import OutputTransformerWrapper
9 |
10 | PIPELINE_NAME = 'saved_pipeline'
11 |
12 | DATA_INPUTS = np.array(range(10, 20))
13 | EXPECTED_OUTPUTS = np.array(range(20, 30))
14 |
15 |
16 | def test_load_full_dump_from_pipeline_name(tmpdir):
17 | # Given
18 | tape_fit_callback_function = TapeCallbackFunction()
19 | tape_transform_callback_function = TapeCallbackFunction()
20 | pipeline: StepWithContext = Pipeline([
21 | ('step_a', Identity()),
22 | ('step_b', OutputTransformerWrapper(
23 | FitTransformCallbackStep(tape_fit_callback_function, tape_transform_callback_function)
24 | ))
25 | ]).set_name(PIPELINE_NAME).with_context(CX(tmpdir))
26 |
27 | # When
28 | pipeline, _ = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
29 |
30 | step_b_wrapped_step = pipeline.wrapped['step_b'].wrapped
31 | assert np.array_equal(step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS)
32 | assert np.array_equal(step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS)
33 | assert np.array_equal(step_b_wrapped_step.fit_callback_function.data[0][1], None)
34 |
35 | pipeline.save(CX(tmpdir), full_dump=True)
36 |
37 | # Then
38 | loaded_pipeline = CX(tmpdir).load(PIPELINE_NAME)
39 |
40 | assert isinstance(loaded_pipeline, Pipeline)
41 | assert isinstance(loaded_pipeline['step_a'], Identity)
42 | assert isinstance(loaded_pipeline['step_b'], OutputTransformerWrapper)
43 |
44 | loaded_step_b_wrapped_step = loaded_pipeline['step_b'].wrapped
45 | assert np.array_equal(loaded_step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS)
46 | assert np.array_equal(loaded_step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS)
47 | assert np.array_equal(loaded_step_b_wrapped_step.fit_callback_function.data[0][1], None)
48 |
49 |
50 | def test_load_full_dump_from_path(tmpdir):
51 | # Given
52 | tape_fit_callback_function = TapeCallbackFunction()
53 | tape_transform_callback_function = TapeCallbackFunction()
54 | pipeline = Pipeline([
55 | ('step_a', Identity()),
56 | ('step_b', OutputTransformerWrapper(
57 | FitTransformCallbackStep(tape_fit_callback_function, tape_transform_callback_function)
58 | ))
59 | ]).set_name(PIPELINE_NAME).with_context(CX(tmpdir))
60 |
61 | # When
62 | pipeline, _ = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
63 | pipeline.save(CX(tmpdir), full_dump=True)
64 |
65 | # Then
66 | loaded_pipeline = CX(tmpdir).load(os.path.join(PIPELINE_NAME, 'step_b'))
67 |
68 | assert isinstance(loaded_pipeline, OutputTransformerWrapper)
69 | loaded_step_b_wrapped_step = loaded_pipeline.wrapped
70 | assert np.array_equal(loaded_step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS)
71 | assert np.array_equal(loaded_step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS)
72 | assert np.array_equal(loaded_step_b_wrapped_step.fit_callback_function.data[0][1], None)
73 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_metastep_mixin.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import typing
3 | from typing import Generic, TypeVar
4 |
5 | import pytest
6 | from neuraxle.base import BaseService, MetaStep, NonFittableMixin
7 | from neuraxle.pipeline import Pipeline
8 | from neuraxle.union import Identity
9 |
10 | from testing_neuraxle.mocks.step_mocks import SomeMetaStepWithHyperparams
11 |
12 |
13 | def test_metastepmixin_set_train_should_set_train_to_false():
14 | p = MetaStep(Pipeline([
15 | Identity()
16 | ]))
17 |
18 | p.set_train(False)
19 |
20 | assert not p.is_train
21 | assert not p.wrapped[0].is_train
22 | assert not p.wrapped.is_train
23 |
24 |
25 | def test_metastepmixin_set_train_should_set_train_to_true():
26 | p = MetaStep(Pipeline([
27 | Identity()
28 | ]))
29 |
30 | assert p.is_train
31 | assert p.wrapped[0].is_train
32 | assert p.wrapped.is_train
33 |
34 |
35 | def test_basestep_str_representation_works_correctly():
36 | output = str(SomeMetaStepWithHyperparams())
37 | assert output == "SomeMetaStepWithHyperparams(SomeStepWithHyperparams(name='MockStep'))"
38 |
39 |
40 | def test_subtyping_of_metastep_works_correctly():
41 | some_step: MetaStep[Identity] = MetaStep(Identity())
42 |
43 | assert issubclass(MetaStep, Generic)
44 | assert isinstance(some_step, MetaStep)
45 | assert isinstance(some_step.get_step(), Identity)
46 |
47 |
48 | @pytest.mark.skipif(sys.version_info < (3, 8), reason="Python 3.8 or more needed")
49 | def test_typable_mixin_can_hold_type_annotation(tmpdir):
50 | # Testing the type annotation "MetaStep[MyService]":
51 | wrapped_service: MetaStep[Identity] = MetaStep(Identity())
52 |
53 | g: Generic = wrapped_service.__orig_bases__[-1]
54 | assert isinstance(wrapped_service.get_step(), g.__parameters__[0].__bound__)
55 | bt: TypeVar = typing.get_args(g)[0]
56 | assert isinstance(wrapped_service.get_step(), bt.__bound__)
57 |
58 | assert isinstance(wrapped_service.get_step(), Identity)
59 | assert isinstance(wrapped_service.get_step(), NonFittableMixin)
60 | assert isinstance(wrapped_service.get_step(), BaseService)
61 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_optional.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from neuraxle.hyperparams.space import HyperparameterSamples
4 | from neuraxle.steps.flow import OptionalStep
5 | from neuraxle.steps.numpy import MultiplyByN
6 |
7 |
8 | def test_optional_should_disable_wrapped_step_when_disabled():
9 | p = OptionalStep(MultiplyByN(2), nullified_return_value=[]).set_hyperparams(HyperparameterSamples({
10 | 'enabled': False
11 | }))
12 | data_inputs = np.array(list(range(10)))
13 |
14 | outputs = p.transform(data_inputs)
15 |
16 | assert outputs == []
17 |
18 |
19 | def test_optional_should_enable_wrapped_step_when_enabled():
20 | p = OptionalStep(MultiplyByN(2), nullified_return_value=[]).set_hyperparams(HyperparameterSamples({
21 | 'enabled': True
22 | }))
23 | data_inputs = np.array(list(range(10)))
24 |
25 | outputs = p.transform(data_inputs)
26 |
27 | assert np.array_equal(outputs, data_inputs * 2)
28 |
29 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_pipeline_fitted_step_checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/test_pipeline_fitted_step_checkpoint.py
--------------------------------------------------------------------------------
/testing_neuraxle/test_pipeline_setup_teardown.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Tuple
2 |
3 | import pytest
4 | from neuraxle.base import BaseService, BaseStep
5 | from neuraxle.base import ExecutionContext as CX
6 | from neuraxle.base import Identity, MetaStep, NamedStepsList, _HasChildrenMixin
7 | from neuraxle.hyperparams.space import RecursiveDict
8 | from neuraxle.pipeline import Pipeline
9 |
10 | from testing_neuraxle.test_pipeline import SomeStep
11 |
12 |
13 | class SomePipeline(Pipeline):
14 | def __init__(self, steps: NamedStepsList):
15 | Pipeline.__init__(self, steps)
16 | self.teared_down = False
17 |
18 | def teardown(self) -> 'BaseStep':
19 | self.teared_down = True
20 | return Pipeline.teardown(self)
21 |
22 |
23 | class SomeException(BaseStep):
24 | def fit_transform(self, data_inputs, expected_outputs=None) -> Tuple['BaseStep', Any]:
25 | raise Exception()
26 |
27 | def fit(self, data_inputs, expected_outputs=None) -> Tuple['BaseStep', Any]:
28 | raise Exception()
29 |
30 | def transform(self, data_inputs, expected_outputs=None) -> Tuple['BaseStep', Any]:
31 | raise Exception()
32 |
33 |
34 | class SomeStepSetup(SomeStep):
35 | def __init__(self):
36 | SomeStep.__init__(self)
37 | self.called_with = None
38 |
39 |
40 | def test_fit_transform_should_setup_pipeline_and_steps():
41 | step_setup = SomeStepSetup()
42 | p = SomePipeline([
43 | step_setup
44 | ])
45 |
46 | assert not p.is_initialized
47 | assert not step_setup.is_initialized
48 |
49 | p.fit_transform([1], [1])
50 |
51 | assert p.is_initialized
52 | assert step_setup.is_initialized
53 |
54 |
55 | def test_transform_should_setup_pipeline_and_steps():
56 | step_setup = SomeStepSetup()
57 | p = SomePipeline([
58 | step_setup
59 | ])
60 | assert not p.is_initialized
61 | assert not step_setup.is_initialized
62 |
63 | p.transform([1])
64 |
65 | assert p.is_initialized
66 | assert step_setup.is_initialized
67 |
68 |
69 | def test_fit_should_setup_pipeline_and_steps():
70 | step_setup = SomeStepSetup()
71 | p = SomePipeline([
72 | step_setup
73 | ])
74 | assert not p.is_initialized
75 | assert not step_setup.is_initialized
76 |
77 | p.fit([1], [1])
78 |
79 | assert p.is_initialized
80 | assert step_setup.is_initialized
81 |
82 |
83 | class SomeService(BaseService):
84 | pass
85 |
86 |
87 | @pytest.mark.parametrize('base_service', [
88 | Identity(),
89 | MetaStep(Identity()),
90 | SomePipeline([SomeStepSetup()])
91 | ])
92 | def test_that_steps_are_setuppeable(base_service: BaseService, tmpdir):
93 | assert not base_service.is_initialized
94 | _verify_subservices_initialization(base_service, False)
95 | base_service.setup(CX(tmpdir))
96 | _verify_subservices_initialization(base_service, True)
97 | base_service.teardown()
98 | _verify_subservices_initialization(base_service, False)
99 |
100 |
101 | def _verify_subservices_initialization(sub_service, is_initialized: bool):
102 | assert sub_service.is_initialized == is_initialized
103 | if isinstance(sub_service, _HasChildrenMixin):
104 | for child in sub_service.get_children():
105 | _verify_subservices_initialization(child, is_initialized)
106 |
107 |
108 | @pytest.mark.parametrize('base_service', [
109 | Identity(),
110 | MetaStep(Identity()),
111 | SomePipeline([SomeStepSetup()]),
112 | CX(),
113 | CX().set_service_locator({
114 | Identity: Identity(),
115 | SomeService: SomeService()
116 | }),
117 | CX().set_service_locator({
118 | Pipeline: Pipeline([SomeStepSetup()])
119 | })
120 | ])
121 | def test_that_steps_are_applyable_with_name(base_service: BaseService, tmpdir):
122 |
123 | names = base_service.apply(lambda self: RecursiveDict({"name": self.get_name()}))
124 |
125 | _verify_subservices_names(base_service, names)
126 |
127 |
128 | def _verify_subservices_names(sub_service, sub_service_name: RecursiveDict):
129 | assert sub_service.name == sub_service_name["name"], f"Not equal: {sub_service.name} != {sub_service_name['name']}."
130 | if isinstance(sub_service, _HasChildrenMixin):
131 | for child in sub_service.get_children():
132 | _verify_subservices_names(child, sub_service_name[child.name])
133 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_recursive_arguments.py:
--------------------------------------------------------------------------------
1 | from neuraxle.base import _RecursiveArguments
2 | from neuraxle.hyperparams.space import HyperparameterSamples
3 |
4 |
5 | def test_recursive_arguments_should_get_root_level():
6 | ra = _RecursiveArguments(kwargs={'hyperparams': HyperparameterSamples({
7 | 'hp0': 0,
8 | 'hp1': 1,
9 | 'pipeline__stepa__hp2': 2,
10 | 'pipeline__stepb__hp3': 3
11 | })})
12 |
13 | root_ra = ra[None]
14 |
15 | root_ra.args == []
16 | root_ra.kwargs == {'hyperparams': HyperparameterSamples({
17 | 'hp0': 0,
18 | 'hp1': 1
19 | })}
20 |
21 |
22 | def test_recursive_arguments_should_get_recursive_levels():
23 | ra = _RecursiveArguments(kwargs={'hyperparams': HyperparameterSamples({
24 | 'hp0': 0,
25 | 'hp1': 1,
26 | 'stepa__hp2': 2,
27 | 'stepb__hp3': 3,
28 | 'stepb__stepd__hp4': 4
29 | })})
30 |
31 | ra = ra['stepb']
32 |
33 | ra.args == []
34 | ra.kwargs == {'hyperparams': HyperparameterSamples({
35 | 'stepb__hp3': 2,
36 | 'stepb__stepd__hp4': 4
37 | })}
38 |
39 |
40 | def test_recursive_arguments_should_have_copy_constructor():
41 | ra = _RecursiveArguments(
42 | ra=_RecursiveArguments(kwargs={'hyperparams': HyperparameterSamples({
43 | 'hp0': 0,
44 | 'hp1': 1
45 | })}),
46 | )
47 |
48 | ra.args == []
49 | ra.kwargs == {'hyperparams': HyperparameterSamples({
50 | 'hp0': 0,
51 | 'hp1': 1,
52 | })}
53 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_recursive_dict.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from neuraxle.hyperparams.space import RecursiveDict, HyperparameterSamples
4 |
5 | POINT_SEPARATOR = '.'
6 |
7 |
8 | @pytest.mark.parametrize("separator", ["__", ".", "___"])
9 | def test_recursive_dict_to_flat(separator):
10 | dict_values = {
11 | 'hp': 1,
12 | 'stepa': {
13 | 'hp': 2,
14 | 'stepb': {
15 | 'hp': 3
16 | }
17 | }
18 | }
19 | r = RecursiveDict(separator=separator, **dict_values)
20 |
21 | r = r.to_flat_dict()
22 |
23 | expected_dict_values = {
24 | 'hp': 1,
25 | 'stepa{}hp'.format(separator): 2,
26 | 'stepa{0}stepb{0}hp'.format(separator): 3
27 | }
28 | assert r == expected_dict_values
29 |
30 |
31 | def test_recursive_dict_to_flat_different_separator():
32 | dict_values = {
33 | 'hp': 1,
34 | 'stepa': {
35 | 'hp': 2,
36 | 'stepb': {
37 | 'hp': 3
38 | }
39 | }
40 | }
41 | r = RecursiveDict(separator='__', **dict_values)
42 | r['stepa'] = RecursiveDict(r['stepa'], separator='.')
43 | r['stepa']['stepb'] = RecursiveDict(r['stepa']['stepb'], separator='$$$')
44 |
45 | nested_r = r.to_nested_dict()
46 | r = r.to_flat_dict()
47 |
48 | expected_dict_values = {
49 | 'hp': 1,
50 | 'stepa__hp': 2,
51 | 'stepa__stepb.hp': 3
52 | }
53 | assert r == expected_dict_values
54 | assert nested_r == dict_values
55 |
56 | def test_recursive_dict_to_nested_dict_constructor():
57 | dict_values = {
58 | 'hp': 1,
59 | 'stepa__hp': 2,
60 | 'stepa__stepb__hp': 3
61 | }
62 | r = HyperparameterSamples(**dict_values)
63 |
64 | expected_dict_values = {
65 | 'hp': 1,
66 | 'stepa': {
67 | 'hp': 2,
68 | 'stepb': {
69 | 'hp': 3
70 | }
71 | }
72 | }
73 | assert r == HyperparameterSamples(**expected_dict_values)
74 | assert r.to_nested_dict() == expected_dict_values
75 |
76 |
77 | def test_recursive_dict_get_item():
78 | dict_values = {
79 | 'hp': 1,
80 | 'stepa__hp': 2,
81 | 'stepa__stepb__hp': 3
82 | }
83 | r = HyperparameterSamples(**dict_values)
84 |
85 | assert r[None] == {'hp': 1}
86 | assert r["hp"] == 1
87 | assert r['stepa'].to_flat_dict() == {'hp': 2, 'stepb__hp': 3}
88 | assert r["stepa__hp"] == 2
89 | assert r["stepa"][None] == {'hp':2}
90 | assert r['stepa__stepb'].to_flat_dict() == {'hp': 3}
91 | assert r['stepa__stepb'][None] == {'hp': 3}
92 | assert r['stepa__stepb__hp'] == 3
93 |
94 |
95 | def test_hyperparams_to_nested_dict_constructor():
96 | dict_values = {
97 | 'hp': 1,
98 | 'stepa__hp': 2,
99 | 'stepa__stepb__hp': 3
100 | }
101 | r = HyperparameterSamples(dict_values)
102 |
103 | expected_dict_values = {
104 | 'hp': 1,
105 | 'stepa': {
106 | 'hp': 2,
107 | 'stepb': {
108 | 'hp': 3
109 | }
110 | }
111 | }
112 | assert r.to_nested_dict() == expected_dict_values
113 | assert r == HyperparameterSamples(expected_dict_values)
114 |
115 |
116 | def test_recursive_dict_copy_constructor():
117 | dict_values = {
118 | 'hp': 1,
119 | 'stepa__hp': 2,
120 | 'stepa__stepb__hp': 3
121 | }
122 | r = RecursiveDict(RecursiveDict(**dict_values), separator='__')
123 |
124 | assert r == RecursiveDict(**dict_values)
125 |
126 |
127 | def test_recursive_dict_copy_constructor_should_set_separator():
128 | dict_values = {
129 | 'hp': 1,
130 | 'stepa__hp': 2,
131 | 'stepa__stepb__hp': 3
132 | }
133 | r = RecursiveDict(RecursiveDict(**dict_values, separator=POINT_SEPARATOR))
134 |
135 | assert r.separator == POINT_SEPARATOR
136 |
137 |
138 | def test_recursive_dict_should_raise_when_item_missing():
139 | with pytest.raises(KeyError):
140 | r = RecursiveDict()
141 | missing = r['missing']
142 |
143 |
144 | @pytest.mark.parametrize("dict_values",
145 | [{
146 | 'hp': 1,
147 | 'stepa__hp': 2,
148 | 'stepa__stepb__hp': 3
149 | },{
150 | "stepa__hp1":1,
151 | 'stepa__hp2': 2,
152 | 'stepa__stepb__hp': 3
153 | }])
154 | def test_hyperparams_copy_constructor(dict_values):
155 | r = HyperparameterSamples(HyperparameterSamples(**dict_values))
156 | assert r == HyperparameterSamples(**dict_values)
157 |
158 |
159 | def test_hyperparams_to_flat():
160 | dict_values = {
161 | 'hp': 1,
162 | 'stepa': {
163 | 'hp': 2,
164 | 'stepb': {
165 | 'hp': 3
166 | }
167 | }
168 | }
169 | r = HyperparameterSamples(**dict_values)
170 |
171 | r = r.to_flat_dict()
172 |
173 | expected_dict_values = {
174 | 'hp': 1,
175 | 'stepa__hp': 2,
176 | 'stepa__stepb__hp': 3
177 | }
178 | assert r == expected_dict_values
179 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_step_saving.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | from joblib import dump
5 | from py._path.local import LocalPath
6 | from pprint import pprint
7 |
8 | from neuraxle.hyperparams.space import RecursiveDict
9 | from neuraxle.base import CX, StepWithContext, TruncableJoblibStepSaver
10 | from neuraxle.pipeline import Pipeline
11 | from neuraxle.steps.numpy import MultiplyByN
12 |
13 | OUTPUT = "OUTPUT"
14 | ROOT = 'Pipeline'
15 | PIPELINE_2 = 'Pipeline2'
16 | SOME_STEPS = ['some_step0', 'some_step1', 'some_step2']
17 |
18 | EXPECTED_OUTPUTS = [0, 48, 96, 144, 192, 240, 288, 336, 384, 432]
19 |
20 |
21 | def create_some_step_path(tmpdir, step_no=0, create_dir=False):
22 | if step_no == 0:
23 | path1 = os.path.join(tmpdir, ROOT, SOME_STEPS[step_no])
24 | else:
25 | path1 = os.path.join(tmpdir, ROOT, PIPELINE_2, SOME_STEPS[step_no])
26 | if create_dir:
27 | os.makedirs(path1)
28 | path2 = os.path.join(path1, '{0}.joblib'.format(SOME_STEPS[step_no]))
29 | return path2
30 |
31 |
32 | def create_pipeline2_path(tmpdir, create_dir=False):
33 | path1 = os.path.join(tmpdir, ROOT, PIPELINE_2)
34 | if create_dir:
35 | os.makedirs(path1)
36 | path2 = os.path.join(path1, '{0}.joblib'.format(PIPELINE_2))
37 | return path2
38 |
39 |
40 | def create_root_path(tmpdir, create_dir=False):
41 | path1 = os.path.join(tmpdir, ROOT)
42 | if create_dir and not os.path.exists(os.path.join(tmpdir, ROOT)):
43 | os.makedirs(path1)
44 | path2 = os.path.join(path1, '{0}.joblib'.format(ROOT))
45 | return path2
46 |
47 |
48 | def test_nested_pipeline_fit_transform_should_save_some_fitted_pipeline_steps(tmpdir: LocalPath):
49 | p: StepWithContext = create_pipeline(tmpdir)
50 |
51 | p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10)))
52 | p.save()
53 |
54 | assert np.array_equal(outputs, EXPECTED_OUTPUTS)
55 | saved_paths = [
56 | create_root_path(tmpdir), create_pipeline2_path(tmpdir),
57 | create_some_step_path(tmpdir, step_no=0), create_some_step_path(tmpdir, step_no=1),
58 | create_some_step_path(tmpdir, step_no=2)
59 | ]
60 | for path in saved_paths:
61 | assert os.path.exists(path), path
62 |
63 |
64 | def test_pipeline_transform_should_not_save_steps(tmpdir: LocalPath):
65 | p: StepWithContext = create_pipeline(tmpdir)
66 |
67 | outputs = p.transform(np.array(range(10)))
68 | p.wrapped.save(CX(tmpdir), full_dump=False)
69 |
70 | assert np.array_equal(outputs, EXPECTED_OUTPUTS)
71 | not_saved_paths = [
72 | create_root_path(tmpdir), create_pipeline2_path(tmpdir), create_some_step_path(tmpdir, step_no=0),
73 | create_some_step_path(tmpdir, step_no=1), create_some_step_path(tmpdir, step_no=2)]
74 | for path in not_saved_paths:
75 | assert not os.path.exists(path), path
76 |
77 |
78 | def test_pipeline_fit_should_save_all_fitted_pipeline_steps(tmpdir: LocalPath):
79 | p: StepWithContext = create_pipeline(tmpdir)
80 |
81 | p = p.fit(np.array(range(10)), np.array(range(10)))
82 | p.save()
83 |
84 | saved_paths = [
85 | create_root_path(tmpdir), create_pipeline2_path(tmpdir),
86 | create_some_step_path(tmpdir, step_no=0), create_some_step_path(tmpdir, step_no=1),
87 | create_some_step_path(tmpdir, step_no=2)
88 | ]
89 | for path in saved_paths:
90 | assert os.path.exists(path), path
91 |
92 |
93 | def test_pipeline_fit_transform_should_load_all_pipeline_steps(tmpdir: LocalPath):
94 | p = given_saved_pipeline(tmpdir)
95 |
96 | p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10)))
97 |
98 | assert np.array_equal(outputs, EXPECTED_OUTPUTS)
99 |
100 |
101 | def test_pipeline_transform_should_load_all_pipeline_steps(tmpdir: LocalPath):
102 | p = given_saved_pipeline(tmpdir)
103 |
104 | outputs = p.transform(np.array(range(10)))
105 |
106 | assert np.array_equal(outputs, EXPECTED_OUTPUTS)
107 |
108 |
109 | def test_pipeline_fit_should_load_all_pipeline_steps(tmpdir: LocalPath):
110 | p = given_saved_pipeline(tmpdir)
111 |
112 | p = p.fit(np.array(range(10)), np.array(range(10)))
113 |
114 | assert p.wrapped[SOME_STEPS[0]].hyperparams['multiply_by'] == 2
115 | assert p.wrapped[PIPELINE_2][SOME_STEPS[1]].hyperparams['multiply_by'] == 4
116 | assert p.wrapped[PIPELINE_2][SOME_STEPS[2]].hyperparams['multiply_by'] == 6
117 |
118 |
119 | def given_saved_pipeline(tmpdir: LocalPath) -> Pipeline:
120 | path = create_root_path(tmpdir, True)
121 | p = Pipeline([]).set_name(ROOT).with_context(CX(tmpdir)).with_context(CX(tmpdir))
122 | dump(p, path)
123 |
124 | pipeline_2 = Pipeline([]).set_name(PIPELINE_2).with_context(CX(tmpdir))
125 | pipeline_2.sub_steps_savers = [
126 | (SOME_STEPS[0], []),
127 | (SOME_STEPS[1], []),
128 | ]
129 | dump(pipeline_2, create_pipeline2_path(tmpdir, True))
130 |
131 | given_saved_some_step(multiply_by=2, step_no=0, path=create_some_step_path(tmpdir, step_no=0, create_dir=True))
132 | given_saved_some_step(multiply_by=4, step_no=1, path=create_some_step_path(tmpdir, step_no=1, create_dir=True))
133 | given_saved_some_step(multiply_by=6, step_no=2, path=create_some_step_path(tmpdir, step_no=2, create_dir=True))
134 |
135 | p = create_pipeline(tmpdir)
136 |
137 | return p
138 |
139 |
140 | def create_pipeline(tmpdir) -> StepWithContext:
141 | return Pipeline([
142 | (SOME_STEPS[0], MultiplyByN(multiply_by=2)),
143 | (PIPELINE_2, Pipeline([
144 | (SOME_STEPS[1], MultiplyByN(multiply_by=4)),
145 | (SOME_STEPS[2], MultiplyByN(multiply_by=6))
146 | ]))
147 | ]).set_name(ROOT).with_context(CX(tmpdir))
148 |
149 |
150 | def given_saved_some_step(multiply_by, step_no, path):
151 | some_step1 = MultiplyByN(multiply_by=multiply_by)
152 | some_step1.name = SOME_STEPS[step_no]
153 | dump(some_step1, path)
154 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_truncable_steps.py:
--------------------------------------------------------------------------------
1 | from neuraxle.pipeline import Pipeline
2 | from neuraxle.steps.flow import TrainOnlyWrapper
3 | from testing_neuraxle.mocks.step_mocks import SomeSplitStep, SomeStep, SomeTruncableStep
4 |
5 |
6 | def test_truncable_steps_should_split_by_type():
7 | pipeline = Pipeline([
8 | SomeStep(),
9 | SomeStep(),
10 | SomeSplitStep(),
11 | SomeStep(),
12 | SomeStep(),
13 | SomeSplitStep(),
14 | SomeStep(),
15 | ])
16 |
17 | sub_pipelines = pipeline.split(SomeSplitStep)
18 |
19 | assert 'SomeStep' in sub_pipelines[0]
20 | assert 'SomeStep1' in sub_pipelines[0]
21 | assert 'SomeSplitStep' in sub_pipelines[0]
22 | assert 'SomeStep2' in sub_pipelines[1]
23 | assert 'SomeStep3' in sub_pipelines[1]
24 | assert 'SomeSplitStep1' in sub_pipelines[1]
25 | assert 'SomeStep4' in sub_pipelines[2]
26 |
27 |
28 | def test_set_train_should_set_train_to_false():
29 | pipeline = Pipeline([
30 | SomeStep(),
31 | SomeStep(),
32 | Pipeline([
33 | SomeStep(),
34 | ])
35 | ])
36 |
37 | pipeline.set_train(False)
38 |
39 | assert not pipeline.is_train
40 | assert not pipeline[0].is_train
41 | assert not pipeline[1].is_train
42 | assert not pipeline[2].is_train
43 | assert not pipeline[2][0].is_train
44 |
45 |
46 | def test_set_train_should_set_train_to_true():
47 | pipeline = Pipeline([
48 | SomeStep(),
49 | SomeStep(),
50 | Pipeline([
51 | SomeStep(),
52 | ])
53 | ])
54 |
55 | assert pipeline.is_train
56 | assert pipeline[0].is_train
57 | assert pipeline[1].is_train
58 | assert pipeline[2].is_train
59 | assert pipeline[2][0].is_train
60 |
61 |
62 | def test_step_print_str_representation_works_correctly():
63 | output = str(TrainOnlyWrapper(SomeTruncableStep()))
64 | assert output == """TrainOnlyWrapper(SomeTruncableStep([
65 | SomeStepWithHyperparams(name='MockStep'),
66 | SomeStepWithHyperparams(name='MockStep1')
67 | ]))"""
68 |
69 |
70 | def test_step_repr_representation_works_correctly():
71 | output = repr(TrainOnlyWrapper(SomeTruncableStep()))
72 | assert output == """TrainOnlyWrapper(SomeTruncableStep([
73 | SomeStepWithHyperparams(name='MockStep', hyperparams=HyperparameterSamples([('learning_rate', 0.1),
74 | ('l2_weight_reg', 0.001),
75 | ('hidden_size', 32),
76 | ('num_layers', 3),
77 | ('num_lstm_layers', 1),
78 | ('use_xavier_init', True),
79 | ('use_max_pool_else_avg_pool', True),
80 | ('dropout_drop_proba', 0.5),
81 | ('momentum', 0.1)])),
82 | SomeStepWithHyperparams(name='MockStep1', hyperparams=HyperparameterSamples([('learning_rate', 0.1),
83 | ('l2_weight_reg', 0.001),
84 | ('hidden_size', 32),
85 | ('num_layers', 3),
86 | ('num_lstm_layers', 1),
87 | ('use_xavier_init', True),
88 | ('use_max_pool_else_avg_pool', True),
89 | ('dropout_drop_proba', 0.5),
90 | ('momentum', 0.1)]))
91 | ], hyperparams=HyperparameterSamples([('learning_rate', 0.1),
92 | ('l2_weight_reg', 0.001),
93 | ('hidden_size', 32),
94 | ('num_layers', 3),
95 | ('num_lstm_layers', 1),
96 | ('use_xavier_init', True),
97 | ('use_max_pool_else_avg_pool', True),
98 | ('dropout_drop_proba', 0.5),
99 | ('momentum', 0.1)])))"""
100 |
--------------------------------------------------------------------------------
/testing_neuraxle/test_zip_data_container.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from neuraxle.data_container import ZipDataContainer, DACT
4 |
5 |
6 | def test_zip_data_container_should_merge_two_data_sources_together():
7 | data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2))
8 | data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10))
9 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d)
10 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d)
11 |
12 | zip_data_container = ZipDataContainer.create_from(data_container, data_container_2d)
13 |
14 | assert zip_data_container.ids == data_container.ids
15 | for i, di in enumerate(zip_data_container.data_inputs):
16 | assert np.array_equal(di[0], data_inputs_3d[i])
17 | assert np.array_equal(di[1], data_inputs_2d[i])
18 |
19 |
20 | def test_zip_data_container_should_merge_1d_with_2d():
21 | data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2))
22 | data_inputs_1d, expected_outputs_1d = _create_data_source((10,))
23 | data_container_1d = DACT(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d)
24 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d)
25 |
26 | zip_data_container = ZipDataContainer.create_from(data_container, data_container_1d)
27 |
28 | assert zip_data_container.ids == data_container.ids
29 | for i, di in enumerate(zip_data_container.data_inputs):
30 | assert np.array_equal(di[0], data_inputs_3d[i])
31 | assert np.array_equal(di[1], data_inputs_1d[i])
32 |
33 |
34 | def test_zip_data_container_should_merge_multiple_data_sources_together():
35 | data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2))
36 | data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10))
37 | data_inputs_1d, expected_outputs_1d = _create_data_source((10,))
38 | data_container_1d = DACT(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d)
39 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d)
40 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d)
41 |
42 | zip_data_container = ZipDataContainer.create_from(data_container, data_container_2d, data_container_1d)
43 |
44 | assert zip_data_container.ids == data_container.ids
45 | for i, di in enumerate(zip_data_container.data_inputs):
46 | assert np.array_equal(di[0], data_inputs_3d[i])
47 | assert np.array_equal(di[1], data_inputs_2d[i])
48 |
49 |
50 | def test_zip_data_container_should_concatenate_inner_features():
51 | data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2))
52 | data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10))
53 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d)
54 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d)
55 |
56 | zip_data_container = ZipDataContainer.create_from(data_container, data_container_2d)
57 | zip_data_container.concatenate_inner_features()
58 |
59 | assert np.array_equal(np.array(zip_data_container.data_inputs)[..., -1], data_container_2d.data_inputs)
60 | assert np.array_equal(np.array(zip_data_container.expected_outputs), expected_outputs_3d)
61 |
62 |
63 | def _create_data_source(shape):
64 | data_inputs = np.random.random(shape).astype(np.float32)
65 | expected_outputs = np.random.random(shape).astype(np.float32)
66 | return data_inputs, expected_outputs
67 |
--------------------------------------------------------------------------------