├── .clabot ├── .coveragerc ├── .github ├── CODE_OF_CONDUCT.md ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── question.md ├── pull_request_template.md ├── stale.yml └── workflows │ ├── license_checker_v2.py │ ├── python_license_checker.yml │ └── testpythonpackage.yml ├── .gitignore ├── .vscode └── launch.json ├── CONTRIBUTING.md ├── LICENSE ├── README.rst ├── Why Neuraxle.rst ├── assets └── images │ ├── La-Cite-LP.png │ ├── favicon.ico │ ├── kimoby.png │ ├── neuraxio.png │ ├── neuraxle_logo.png │ ├── solution_nexam_io.jpg │ └── umaneo.png ├── coverage.sh ├── examples ├── Handler Methods.ipynb ├── Hyperparams And Distributions.ipynb ├── Introduction to Automatic Hyperparameter Tuning.ipynb ├── Introduction to Random Distributions.ipynb ├── Introduction to Time Series Processing.ipynb ├── README.txt ├── Rest API Serving.ipynb ├── Step Saving And Lifecycle.ipynb ├── __init__.py ├── _images │ ├── neuraxle_handler_methods.png │ ├── neuraxle_machine_learning_lifecycle.png │ └── neuraxle_time_series_data.png ├── auto_ml │ ├── README.txt │ └── plot_automl_loop_clean_kata.py ├── caching │ └── README.txt ├── deployment │ ├── README.txt │ └── plot_easy_rest_api_serving.py ├── getting_started │ ├── README.txt │ ├── plot_force_handle_mixin.py │ ├── plot_inverse_transform.py │ ├── plot_label_encoder_across_multiple_columns.py │ ├── plot_nested_pipelines.py │ └── plot_non_fittable_mixin.py ├── hyperparams │ ├── README.txt │ └── plot_hyperparams.py ├── operations │ └── plot_apply_method.py ├── parallel │ ├── README.txt │ └── plot_streaming_pipeline.py └── sklearn │ ├── README.txt │ ├── plot_boston_housing_meta_optimization.py │ ├── plot_boston_housing_regression_with_model_stacking.py │ └── plot_cyclical_feature_engineering.py ├── flake8.sh ├── neuraxle ├── __init__.py ├── base.py ├── data_container.py ├── distributed │ ├── __init__.py │ └── streaming.py ├── hyperparams │ ├── __init__.py │ ├── distributions.py │ ├── scipy_distributions.py │ └── space.py ├── logging │ ├── __init__.py │ ├── logging.py │ └── warnings.py ├── metaopt │ ├── __init__.py │ ├── auto_ml.py │ ├── callbacks.py │ ├── context.py │ ├── data │ │ ├── __init__.py │ │ ├── aggregates.py │ │ ├── reporting.py │ │ └── vanilla.py │ ├── hyperopt │ │ ├── __init__.py │ │ └── tpe.py │ ├── optimizer.py │ ├── repositories │ │ ├── __init__.py │ │ ├── db.py │ │ ├── json.py │ │ └── repo.py │ └── validation.py ├── pipeline.py ├── rest │ ├── __init__.py │ └── flask.py ├── steps │ ├── __init__.py │ ├── column_transformer.py │ ├── data.py │ ├── features.py │ ├── flow.py │ ├── loop.py │ ├── misc.py │ ├── numpy.py │ ├── output_handlers.py │ └── sklearn.py └── union.py ├── requirements.txt ├── run_quick_tests.sh ├── run_slow_tests.sh ├── run_tests.sh ├── setup.cfg ├── setup.py └── testing_neuraxle ├── __init__.py ├── api ├── __init__.py └── test_flask.py ├── examples ├── __init__.py └── test_examples.py ├── hyperparams ├── __init__.py ├── test_distributions.py ├── test_get_set_hyperparams.py ├── test_scipy_distributions.py └── test_space.py ├── metaopt ├── __init__.py ├── test_automl.py ├── test_automl_aggregates.py ├── test_automl_dataclasses.py ├── test_automl_redesign.py ├── test_automl_reports.py ├── test_automl_repositories.py ├── test_automl_sequence_validation_splitter.py ├── test_database_repo.py ├── test_random.py ├── test_tpe.py ├── test_trial.py └── test_validation_splitter.py ├── mocks ├── __init__.py └── step_mocks.py ├── steps ├── __init__.py ├── neuraxle_test_case.py ├── test_assertion_steps.py ├── test_choose_one_or_many_steps_of.py ├── test_column_selector_2d.py ├── test_column_transformer.py ├── test_concatenate_data_container.py ├── test_data_shuffling.py ├── test_epochs_repeater.py ├── test_expand_dim.py ├── test_features.py ├── test_flatten_for_each.py ├── test_for_each.py ├── test_if_execution_phase_is_then_do.py ├── test_numpy_steps.py ├── test_one_hot.py ├── test_output_transformer_wrapper.py ├── test_reversible_preprocessing_wrapper.py ├── test_sklearn_wrapper.py ├── test_step_cloner_for_each_data_input.py └── test_train_only_wrapper.py ├── test_apply.py ├── test_automl_scenarios.py ├── test_basestep.py ├── test_context_logger.py ├── test_data_container.py ├── test_data_container_batching.py ├── test_forcehandle_mixin.py ├── test_full_pipeline_dump.py ├── test_metastep_mixin.py ├── test_minibatch_sequential_pipeline.py ├── test_optional.py ├── test_output_transformer_wrapper.py ├── test_pipeline.py ├── test_pipeline_fitted_step_checkpoint.py ├── test_pipeline_setup_teardown.py ├── test_recursive_arguments.py ├── test_recursive_dict.py ├── test_service_assertions.py ├── test_step_saving.py ├── test_streaming.py ├── test_truncable_steps.py ├── test_union.py └── test_zip_data_container.py /.clabot: -------------------------------------------------------------------------------- 1 | { 2 | "contributors": ["guillaume-chevalier", "alexbrillant", "Eric2Hamel", "JeromeBlanchet", "mlevesquedion", "Vaunorage", "NeuroData-ltd", "Klaimohelmi", "vincent-antaki", "Rohith295"], 3 | "message": "Thank you for contributing!

We detected that this might be your first contribution to a [Neuraxio open-source](https://github.com/Neuraxio) project. Before we can look at your open-source contributions, you (or your employer or company depending on the situation) will need to sign a Contributor License Agreement (CLA). You can sign it [here](https://docs.google.com/forms/d/e/1FAIpQLSfDP3eCQoV0tMq296OfbOpNn-QkHwfJQLkS0MVjSHiZQXPw2Q/viewform).

Once the CLA will be signed, please reply here and tell us. For example: `I signed it`, `My employer signed it` or `My company signed it`.

Thank you for taking the time to contribute!" 4 | } 5 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | parallel = True 3 | concurrency = multiprocessing, thread 4 | 5 | [report] 6 | exclude_lines = 7 | raise NotImplementedError.* 8 | if False: 9 | if 0: 10 | raise AssertionError.* 11 | @(abc\.)?abstractmethod 12 | if __name__ == __main__: 13 | 14 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of conduct 2 | 3 | We encourage contributors and users to respect this code of conduct. 4 | 5 | ## Respect 6 | 7 | Please show respect, and don't blast people. 8 | 9 | ## Politeness 10 | 11 | Make any feedback you provide constructive rather than negative. 12 | 13 | ## Code reviews 14 | 15 | As a general rule in the industry, it's recommended to focus on the things to improve and how, rather than trying to blame people which can be counter-productive. 16 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: guillaume-chevalier # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: 'Bug: ' 5 | labels: bug, invalid 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Suggested Fix** 20 | Any idea that may help fixing the bug the right way. 21 | 22 | **Additional context** 23 | Add any other context about the problem here. 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: 'Feature: ' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask a question 4 | title: 'Question: ' 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | We suggest that you ask your question here instead: 11 | https://stackoverflow.com/questions/tagged/neuraxle 12 | 13 | You can still open an issue here to link to your question on StackOverflow if you would like to notify us. 14 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Neuraxle Pull Request 2 | 11 | 12 | ## What it is 13 | 14 | My pull request does: 15 | 16 | 17 | ## How it works 18 | 19 | I coded it this way: 20 | 21 | 22 | ## Example usage 23 | 24 | Here is how you can use this new code as a end user: 25 | 26 | ```python 27 | # Note: Please make dimensions and types clear to the reader. 28 | # E.g.: in the event fictious data is processed in this code example. 29 | # Replace the current code example with your own. 30 | # You may then use this PR code example to further document your code as a docstring! 31 | 32 | this: Example = is_a_code_example 33 | pass 34 | ``` 35 | 36 | __________________ 37 | 38 | 39 | ## Checklist before merging PR. 40 | 41 | Things to check each time you contribute: 42 | 43 | 44 | - [x] If this is your first contribution to Neuraxle, please read the [guide to contributing to the Neuraxle framework](https://www.neuraxle.org/stable/Neuraxle/CONTRIBUTING.html). 45 | - [ ] Your local Git username is set to your GitHub username, and your local Git email is set to your GitHub email. This is important to avoid breaking the cla-bot and for your contributions to be linked to your profile. More info: https://github.com/settings/emails 46 | - [ ] Argument's dimensions and types are specified for new steps (important), with examples in docstrings when needed. 47 | - [ ] Class names and argument / API variables are very clear: there is no possible ambiguity. They also respect the existing code style (avoid duplicating words for the same concept) and are intuitive. 48 | - [ ] Use typing like `variable: Typing = ...` as much as possible. Also use typing for function arguments and return values like `def my_func(self, my_list: Dict[int, List[str]]) -> 'OrderedDict[int, str]':`. 49 | - [ ] Classes are documented: their behavior is explained beyond just the title of the class. You may even use the description written in your pull request above to fill some docstrings accurately. 50 | - [ ] If a numpy array is used, it is important to remember that these arrays are a special type that must be documented accordingly, and that numpy array should not be abused. This is because Neuraxle is a library that is not only limited to transforming numpy arrays. To this effect, numpy steps should probably be located in the existing numpy python files as much as possible, and not be all over the place. The same applies to Pandas DataFrames. 51 | - [ ] Code coverage is above 90% for the added code for the unit tests. 52 | - [ ] The above description of the pull request in natural language was used to document the new code inside the code's docstrings so as to have complete documentation, with examples. 53 | - [ ] Respect the Unit Testing status check 54 | - [ ] Respect the Codacy status check 55 | - [ ] Respect the cla-bot status check (unless the cla-bot is truly broken - please try to debug it first) 56 | - [ ] Code files that were edited were reformatted automatically using PyCharm's `Ctrl+Alt+L` shortcut. You may have reorganized imports as well. 57 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 550 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 180 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | # Label to use when marking an issue as stale 10 | staleLabel: wontfix 11 | # Comment to post when marking an issue as stale. Set to `false` to disable 12 | markComment: > 13 | This issue has been automatically marked as stale because it has not had 14 | recent activity. It will be closed if no further activity occurs in the 15 | next 180 days. Thank you for your contributions. 16 | # Comment to post when closing a stale issue. Set to `false` to disable 17 | closeComment: false 18 | -------------------------------------------------------------------------------- /.github/workflows/license_checker_v2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import urllib3 4 | 5 | # Here is the command I use to verify all libraries installed with pip. 6 | # python license_checker_v2.py --dependencies $(cut -d '=' -f 1 <<< $(pip freeze)) 7 | # Alternatively, if you are processing directly a requirements.txt file, you can use the following command to parse 8 | # awk -F'[>=<]' '{print $1}' requirements.txt 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--dependencies', nargs='+', required=True, 12 | help="A list of python library name you want to check the license of.") 13 | parser.add_argument('--accepted_licenses', nargs='*', 14 | help="A list of license which are considered acceptable for your project.", 15 | default=["Apache Software License", "Apache 2.0", "BSD", "ZLIB", "MIT", "Unlicense", "CC0", "CC-BY", "PSF", "MPL", "Mozilla Public License 2.0", "Historical Permission Notice and Disclaimer", "HPND", "LGPL"]) 16 | parser.add_argument('--forbidden_licenses', nargs='*', 17 | help="A list of license which are considered problematic for your project.", 18 | default=["GNU", "GPL", "Commons Clause", "BY-N"]) 19 | args = parser.parse_args() 20 | 21 | python_dependencies = args.dependencies 22 | pypi_pages = {} 23 | 24 | for library_name in python_dependencies: 25 | url = f"https://pypi.org/project/{library_name}/" 26 | http_pool = urllib3.connection_from_url(url) 27 | result = http_pool.urlopen('GET', url) 28 | html_page = result.data.decode('utf-8') 29 | pypi_pages[library_name] = html_page 30 | 31 | 32 | def parse_html(html_page): 33 | lines = html_page.split('\n') 34 | 35 | for i, l in enumerate(lines): 36 | if ("License: " in l: 41 | return lines[i].replace("

License: ", "").replace('

', '') 42 | 43 | raise ValueError("Unable to find license in html page") 44 | 45 | 46 | unknown_licenses = [] 47 | library_license_dict = {} 48 | accepted_libraries = [] 49 | refused_libraries = [] 50 | maybe_libraries = [] 51 | 52 | 53 | def is_license_in_list(license, license_list): 54 | for l in license_list: 55 | if l.lower() in license.lower(): 56 | return True 57 | return False 58 | 59 | 60 | for library_name in python_dependencies: 61 | try: 62 | library_license = parse_html(pypi_pages[library_name]) 63 | library_license_dict[library_name] = library_license 64 | print(f"{library_name}: {library_license}") 65 | # First checks if its refused_licenses, then if its in accepted_licenses, else add in the maybe list 66 | 67 | is_forbidden = is_license_in_list(library_license, args.forbidden_licenses) 68 | is_accepted = is_license_in_list(library_license, args.accepted_licenses) 69 | if is_forbidden and not is_accepted: 70 | refused_libraries.append(library_name) 71 | elif is_accepted and not is_forbidden: 72 | accepted_libraries.append(library_name) 73 | else: 74 | maybe_libraries.append(library_name) 75 | 76 | except Exception as e: 77 | print(f"{library_name}: {e}") 78 | unknown_licenses.append(library_name) 79 | 80 | 81 | def plurial(lst, _if='s', _else=''): 82 | return _if if len(lst) > 1 else _else 83 | 84 | if len(unknown_licenses) > 0: 85 | print(f"Couldn't find the license{plurial(unknown_licenses)} of the following dependencies: {unknown_licenses}") 86 | 87 | print(f"\nThe following dependenc{plurial(accepted_libraries, 'y', 'ies')} have an accepted license: {accepted_libraries}") 88 | 89 | if len(refused_libraries) > 0: 90 | print(f"The following dependencie{plurial(refused_libraries, 'y', 'ies')} have forbidden license(s):") 91 | for library_name in refused_libraries: 92 | print(f" {library_name}: {library_license_dict[library_name]}") 93 | 94 | if len(maybe_libraries) > 0: 95 | print(f"The following dependencie{plurial(maybe_libraries, 'y', 'ies')} have license which needs to be reviewed: ") 96 | for library_name in maybe_libraries: 97 | print(f" {library_name}: {library_license_dict[library_name]}") 98 | 99 | 100 | assert len(refused_libraries) == 0 and len(maybe_libraries) == 0 and len(unknown_licenses) == 0 101 | -------------------------------------------------------------------------------- /.github/workflows/python_license_checker.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | - dev 6 | pull_request: 7 | branches: 8 | - master 9 | - dev 10 | jobs: 11 | check-license: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v1 16 | - name: Set up Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: 3.8.11 20 | - name: Install dependencies 21 | run: python -m pip install urllib3 && python -m pip install -r requirements.txt --no-cache-dir 22 | - name: Check license 23 | run: python .github/workflows/license_checker_v2.py --dependencies $(cut -d '=' -f 1 <<< $(pip freeze)) 24 | -------------------------------------------------------------------------------- /.github/workflows/testpythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Test Python Package 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - dev 8 | pull_request: 9 | branches: 10 | - master 11 | - dev 12 | 13 | jobs: 14 | build: 15 | 16 | runs-on: ubuntu-latest 17 | strategy: 18 | max-parallel: 4 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.7, 3.8, 3.9] 22 | 23 | steps: 24 | - uses: actions/checkout@v1 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v1 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install -r requirements.txt 33 | python setup.py install 34 | - name: Lint with flake8 35 | run: | 36 | pip install flake8 37 | # stop the build if there are Python syntax errors or undefined names 38 | flake8 neuraxle testing_neuraxle --count --select=E9,F63,F7,F82 --show-source --statistics 39 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 40 | flake8 neuraxle testing_neuraxle --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 41 | - name: Test with pytest 42 | run: | 43 | python setup.py test 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | prof/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | venv 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # IDEs 109 | .idea 110 | .vscode/settings.json 111 | .style.yapf 112 | *-py.js 113 | *pmap.yml 114 | tmp 115 | 116 | # Other 117 | .DS_Store 118 | ___* 119 | todo.txt 120 | **cache/** 121 | **caching/** 122 | cache/** 123 | caching/** 124 | testing_neuraxle/examples/cache/** 125 | testing_neuraxle/cache/** 126 | testing_neuraxle/cache/* 127 | cov.xml 128 | profile.sh 129 | 130 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Python: Current File with /Neuraxle workdir", 6 | "type": "python", 7 | "justMyCode": false, 8 | "request": "launch", 9 | "program": "${file}", 10 | "console": "integratedTerminal", 11 | "cwd": "${workspaceFolder}", 12 | "env": { 13 | "PYTHONPATH": "${cwd}" 14 | }, 15 | "redirectOutput": true, 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Neuraxle 2 | 3 | 4 | ## First steps 5 | 6 | For contributing, first, read the README. 7 | 8 | We'd love to see you comment in an issue if you want to work on it. 9 | 10 | You can as well suggest new features by creating new issues. Don't hesitate to bring new ideas. 11 | 12 | 13 | ## Before coding 14 | 15 | New contributor? Follow this checklist to get started right on track: 16 | 17 | - [ ] Your local Git username is set to your GitHub username, and your local Git email is set to your [GitHub email](https://github.com/settings/emails). This is important to avoid breaking the cla-bot and for your contributions to be linked to your profile. If at least 1 contribution is not commited properly using the good credentials, the cla-bot will break until your [re-commit it](https://stackoverflow.com/questions/20002557/how-to-remove-a-too-large-file-in-a-commit-when-my-branch-is-ahead-of-master-by/39768343#39768343). Before commiting, [change your name and email](https://stackoverflow.com/a/42167480/2476920) to the good ones. 18 | - [ ] Use the PyCharm IDE with PyTest to test your code. Reformatting your code at every file save is a good idea, using [PyCharm's `Ctrl+Alt+L` shortcut](https://www.jetbrains.com/help/pycharm/reformat-and-rearrange-code.html). You may reorganize imports automatically as well, as long as your project root is well configured. Run the tests to see if everything works, and always ensure that all tests run before opening a pull request as well. 19 | - [ ] We recommend letting PyCharm manage the virtual environment by [creating a new one just for this project](https://www.jetbrains.com/help/pycharm/creating-virtual-environment.html#existing-environment), and [using PyTest as a test runner in PyCharm](https://www.jetbrains.com/help/pycharm/pytest.html#pytest-fixtures). This is not required, but should help in getting you started. 20 | - [ ] Please [make your pull request(s) editable](https://docs.github.com/en/github/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork), such as for us to add you to the list of contributors if you didn't add the entry, for example. 21 | - [ ] To contribute, first fork the project, then do your changes, and then [open a pull request in the main repository](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). 22 | - [ ] Sign the [Contributor License Agreement (CLA)](https://docs.google.com/forms/d/e/1FAIpQLSfDP3eCQoV0tMq296OfbOpNn-QkHwfJQLkS0MVjSHiZQXPw2Q/viewform) to allow Neuraxio to use and publish your contributions under the Apache 2.0 license, in order for everyone to be able to use your open-source contributions. Follow the instructions of the cla-bot upon opening the pull request. 23 | 24 | 25 | ## Pull Requests 26 | 27 | You will then be able to open pull requests. The instructions in the [pull request template](https://www.neuraxle.org/stable/Neuraxle/.github/pull_request_template.html) will be shown to you upon creating each pull request. 28 | 29 | 30 | ## Code Reviews 31 | 32 | We do code review. We expect most of what we suggest to be fixed. This is a machine learning framework. This means that it is the basis for several other projects. Therefore, the code **must** be clean, understandeable (easy to read), and documented, as many people will read and use what you have coded. Please respect pep8 as much as possible, and try as much as possible to create clean code with a good Oriented Object Programming OOP design. It is normal and expected that your Pull Requests have lots of review comments. 33 | 34 | 35 | ## Reviewing other's code 36 | 37 | We love that contributors review each other's code as well. 38 | 39 | 40 | ## Publishing project to PyPI 41 | 42 | **For official project maintainers only:** you may follow these instructions to upload a new version of tha package on pip: 43 | - https://github.com/Neuraxio/Neuraxle/wiki/How-to-deploy-a-new-package-(or-version-of-package)-to-PyPI 44 | -------------------------------------------------------------------------------- /assets/images/La-Cite-LP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/La-Cite-LP.png -------------------------------------------------------------------------------- /assets/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/favicon.ico -------------------------------------------------------------------------------- /assets/images/kimoby.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/kimoby.png -------------------------------------------------------------------------------- /assets/images/neuraxio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/neuraxio.png -------------------------------------------------------------------------------- /assets/images/neuraxle_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/neuraxle_logo.png -------------------------------------------------------------------------------- /assets/images/solution_nexam_io.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/solution_nexam_io.jpg -------------------------------------------------------------------------------- /assets/images/umaneo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/assets/images/umaneo.png -------------------------------------------------------------------------------- /coverage.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ./flake8.sh 3 | pytest -n 7 --cov-report html --cov-report xml:cov.xml --cov-config=.coveragerc --cov=neuraxle testing_neuraxle 4 | # pytest --cov-report html --cov=neuraxle testing_neuraxle; open htmlcov/index.html 5 | 6 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | \>\>\> Practical Examples 2 | ============================ 3 | 4 | Navigate the hands-on examples. 5 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/examples/__init__.py -------------------------------------------------------------------------------- /examples/_images/neuraxle_handler_methods.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/examples/_images/neuraxle_handler_methods.png -------------------------------------------------------------------------------- /examples/_images/neuraxle_machine_learning_lifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/examples/_images/neuraxle_machine_learning_lifecycle.png -------------------------------------------------------------------------------- /examples/_images/neuraxle_time_series_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/examples/_images/neuraxle_time_series_data.png -------------------------------------------------------------------------------- /examples/auto_ml/README.txt: -------------------------------------------------------------------------------- 1 | .. _real_world_examples: 2 | 3 | AutoML 4 | ------------------------------------- 5 | 6 | This demonstrates how to use the AutoML loop, and the auto ml steps that are available in Neuraxle. 7 | -------------------------------------------------------------------------------- /examples/caching/README.txt: -------------------------------------------------------------------------------- 1 | .. _real_world_examples: 2 | 3 | Caching 4 | ------------------------------------- 5 | 6 | This demonstrates how to use checkpoints in Neuraxle. 7 | -------------------------------------------------------------------------------- /examples/deployment/README.txt: -------------------------------------------------------------------------------- 1 | .. _real_world_examples: 2 | 3 | REST API Model Serving 4 | --------------------------------------------------- 5 | 6 | This demonstrates an easy way to deploy your Neuraxle model or pipeline to a REST API. 7 | -------------------------------------------------------------------------------- /examples/deployment/plot_easy_rest_api_serving.py: -------------------------------------------------------------------------------- 1 | """ 2 | Easy REST API Model Serving with Neuraxle 3 | ================================================ 4 | 5 | This demonstrates an easy way to deploy your Neuraxle model or pipeline to a REST API. 6 | 7 | .. 8 | Copyright 2019, Neuraxio Inc. 9 | 10 | Licensed under the Apache License, Version 2.0 (the "License"); 11 | you may not use this file except in compliance with the License. 12 | You may obtain a copy of the License at 13 | 14 | http://www.apache.org/licenses/LICENSE-2.0 15 | 16 | Unless required by applicable law or agreed to in writing, software 17 | distributed under the License is distributed on an "AS IS" BASIS, 18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | See the License for the specific language governing permissions and 20 | limitations under the License. 21 | 22 | .. 23 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 24 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 25 | 26 | """ 27 | 28 | import numpy as np 29 | from flask import Flask 30 | from sklearn.cluster import KMeans 31 | from sklearn.datasets import load_boston 32 | from sklearn.decomposition import PCA, FastICA 33 | from sklearn.ensemble import GradientBoostingRegressor 34 | from sklearn.metrics import r2_score 35 | from sklearn.model_selection import train_test_split 36 | from sklearn.utils import shuffle 37 | 38 | from neuraxle.rest.flask import FlaskRestApiWrapper, JSONDataBodyDecoder, JSONDataResponseEncoder 39 | from neuraxle.pipeline import Pipeline 40 | from neuraxle.steps.sklearn import RidgeModelStacking 41 | from neuraxle.union import AddFeatures 42 | 43 | 44 | def main(): 45 | boston = load_boston() 46 | X, y = shuffle(boston.data, boston.target, random_state=13) 47 | X = X.astype(np.float32) 48 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) 49 | 50 | pipeline = Pipeline([ 51 | AddFeatures([ 52 | PCA(n_components=2), 53 | FastICA(n_components=2), 54 | ]), 55 | RidgeModelStacking([ 56 | GradientBoostingRegressor(), 57 | KMeans(), 58 | ]), 59 | ]) 60 | 61 | print("Fitting on train:") 62 | pipeline = pipeline.fit(X_train, y_train) 63 | print("") 64 | print("Transforming train and test:") 65 | y_train_predicted = pipeline.transform(X_train) 66 | y_test_predicted = pipeline.transform(X_test) 67 | print("") 68 | print("Evaluating transformed train:") 69 | score = r2_score(y_train_predicted, y_train) 70 | print('R2 regression score:', score) 71 | print("") 72 | print("Evaluating transformed test:") 73 | score = r2_score(y_test_predicted, y_test) 74 | print('R2 regression score:', score) 75 | print("Deploying the application by routing data to the transform method:") 76 | 77 | class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder): 78 | """This is a custom JSON decoder class that precedes the pipeline's transformation.""" 79 | 80 | def decode(self, data_inputs): 81 | """ 82 | Transform a JSON list object into an np.array object. 83 | 84 | :param data_inputs: json object 85 | :return: np array for data inputs 86 | """ 87 | return np.array(data_inputs) 88 | 89 | class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder): 90 | """This is a custom JSON response encoder class for converting the pipeline's transformation outputs.""" 91 | 92 | def encode(self, data_inputs) -> dict: 93 | """ 94 | Convert predictions to a dict for creating a JSON Response object. 95 | 96 | :param data_inputs: 97 | :return: 98 | """ 99 | return { 100 | 'predictions': list(data_inputs) 101 | } 102 | 103 | app = FlaskRestApiWrapper( 104 | json_decoder=CustomJSONDecoderFor2DArray(), 105 | wrapped=pipeline, 106 | json_encoder=CustomJSONEncoderOfOutputs() 107 | ).get_app() 108 | 109 | print("Finally, run the app by uncommenting this next line of code:") 110 | 111 | # app.run(debug=False, port=5000) 112 | 113 | print("You can now call your pipeline over HTTP with a (JSON) REST API.") 114 | 115 | # test_predictictions = requests.post( 116 | # url='http://127.0.0.1:5000/', 117 | # json=X_test.tolist() 118 | # ) 119 | # print(test_predictictions) 120 | # print(test_predictictions.content) 121 | 122 | assert isinstance(app, Flask) 123 | 124 | return app 125 | 126 | 127 | if __name__ == "__main__": 128 | main() 129 | -------------------------------------------------------------------------------- /examples/getting_started/README.txt: -------------------------------------------------------------------------------- 1 | .. _real_world_examples: 2 | 3 | Getting started 4 | ------------------------------------- 5 | 6 | This demonstrates how to build basic pipelines with Neuraxle. 7 | -------------------------------------------------------------------------------- /examples/getting_started/plot_force_handle_mixin.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create Pipeline Steps that require implementing only handler methods 3 | ======================================================================================================================== 4 | 5 | If a pipeline step only needs to implement handler methods, then you can inherit from the 6 | ForceHandleMixin as demonstrated here. Handler methods are useful when : 7 | 8 | - You need to change the shape of the data container passed to the following steps, or the wrapped steps. 9 | - You want to apply side effects based on the data container, and the execution context. 10 | - You want to change the pipeline execution flow based on the data container, and the execution context. 11 | 12 | .. 13 | Copyright 2022, Neuraxio Inc. 14 | 15 | Licensed under the Apache License, Version 2.0 (the "License"); 16 | you may not use this file except in compliance with the License. 17 | You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, software 22 | distributed under the License is distributed on an "AS IS" BASIS, 23 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 | See the License for the specific language governing permissions and 25 | limitations under the License. 26 | 27 | .. 28 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 29 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 30 | 31 | """ 32 | from typing import Tuple 33 | 34 | import numpy as np 35 | from neuraxle.base import DACT, BaseStep 36 | from neuraxle.base import ExecutionContext as CX 37 | from neuraxle.base import ForceHandleMixin 38 | 39 | 40 | class ForceHandleMixinStep(ForceHandleMixin, BaseStep): 41 | """ 42 | Please make your steps inherit from ForceHandleMixin when they only implement handle_methods, but also 43 | when you want to make impossible the use of regular fit, transform, and fit_transform methods 44 | Also, make sure that BaseStep is the last step you inherit from. 45 | """ 46 | 47 | def __init__(self): 48 | BaseStep.__init__(self) 49 | ForceHandleMixin.__init__(self) 50 | 51 | def _fit_data_container(self, data_container: DACT, context: CX) -> BaseStep: 52 | """ 53 | Change the shape of the data container. 54 | and/or 55 | Apply any side effects based on the data container 56 | And/or 57 | Change the execution flow of the pipeline 58 | """ 59 | context.logger.info("Handling the 'fit' with handler method!") 60 | return self 61 | 62 | def _transform_data_container(self, data_container: DACT, context: CX) -> DACT: 63 | """ 64 | Change the shape of the data container. 65 | and/or 66 | Apply any side effects based on the data container 67 | And/or 68 | Change the execution flow of the pipeline 69 | """ 70 | context.logger.info("Handling the 'transform' with handler method!") 71 | return data_container 72 | 73 | def _fit_transform_data_container( 74 | self, data_container: DACT, context: CX 75 | ) -> Tuple[BaseStep, DACT]: 76 | """ 77 | Change the shape of the data container. 78 | and/or 79 | Apply any side effects based on the data container 80 | And/or 81 | Change the execution flow of the pipeline 82 | """ 83 | context.logger.info("Handling the 'fit_transform' with handler method!") 84 | return self, data_container 85 | 86 | 87 | def main(): 88 | p = ForceHandleMixinStep() 89 | data_inputs = np.array([0, 1]) 90 | expected_outputs = np.array([0, 1]) 91 | 92 | p = p.fit(data_inputs, expected_outputs) 93 | outputs = p.transform(data_inputs) 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | -------------------------------------------------------------------------------- /examples/getting_started/plot_inverse_transform.py: -------------------------------------------------------------------------------- 1 | """ 2 | Inverse Transforms in Neuraxle: How to Reverse a Prediction 3 | ============================================================ 4 | 5 | This demonstrates how to make a prediction, and then to undo the prediction to get back the original inputs or an 6 | estimate of the original inputs. Not every pipeline steps have an inverse transform method, because not every operation 7 | is reversible. 8 | 9 | .. 10 | Copyright 2019, Neuraxio Inc. 11 | 12 | Licensed under the Apache License, Version 2.0 (the "License"); 13 | you may not use this file except in compliance with the License. 14 | You may obtain a copy of the License at 15 | 16 | http://www.apache.org/licenses/LICENSE-2.0 17 | 18 | Unless required by applicable law or agreed to in writing, software 19 | distributed under the License is distributed on an "AS IS" BASIS, 20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21 | See the License for the specific language governing permissions and 22 | limitations under the License. 23 | 24 | .. 25 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 26 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 27 | 28 | """ 29 | 30 | import numpy as np 31 | 32 | from neuraxle.pipeline import Pipeline 33 | from neuraxle.steps.numpy import MultiplyByN 34 | 35 | 36 | def main(): 37 | p = Pipeline([MultiplyByN(multiply_by=2)]) 38 | 39 | data_inputs = np.array([1, 2]) 40 | generated_outputs = p.transform(data_inputs) 41 | regenerated_inputs = p.inverse_transform(generated_outputs) 42 | 43 | assert np.array_equal(regenerated_inputs, data_inputs) 44 | assert np.array_equal(generated_outputs, 2 * data_inputs) 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /examples/getting_started/plot_label_encoder_across_multiple_columns.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create label encoder across multiple columns 3 | ================================================== 4 | 5 | You can apply label encoder to all columns using the ColumnTransformer step. 6 | 7 | This demonstrates how to use properly transform columns using neuraxle. 8 | 9 | For more info, see the `thread here `__. 10 | 11 | .. 12 | Copyright 2019, Neuraxio Inc. 13 | 14 | Licensed under the Apache License, Version 2.0 (the "License"); 15 | you may not use this file except in compliance with the License. 16 | You may obtain a copy of the License at 17 | 18 | http://www.apache.org/licenses/LICENSE-2.0 19 | 20 | Unless required by applicable law or agreed to in writing, software 21 | distributed under the License is distributed on an "AS IS" BASIS, 22 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | See the License for the specific language governing permissions and 24 | limitations under the License. 25 | 26 | """ 27 | 28 | import numpy as np 29 | import pandas 30 | from sklearn.preprocessing import LabelEncoder 31 | 32 | from neuraxle.steps.column_transformer import ColumnTransformer 33 | from neuraxle.steps.loop import FlattenForEach 34 | 35 | # Discussion: 36 | # https://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn 37 | df = pandas.DataFrame({ 38 | 'pets': ['cat', 'dog', 'cat', 'monkey', 'dog', 'dog'], 39 | 'owner': ['Champ', 'Ron', 'Brick', 'Champ', 'Veronica', 'Ron'], 40 | 'location': ['San_Diego', 'New_York', 'New_York', 'San_Diego', 'San_Diego', 'New_York'] 41 | }) 42 | 43 | 44 | def _apply_same_encoder_to_all_columns(): 45 | """ 46 | One shared LabelEncoder will be applied on all the data to encode it. 47 | """ 48 | p = FlattenForEach(LabelEncoder(), then_unflatten=True) 49 | 50 | p, predicted_output = p.fit_transform(df.values) 51 | 52 | expected_output = np.array([ 53 | [6, 7, 6, 8, 7, 7], 54 | [1, 3, 0, 1, 5, 3], 55 | [4, 2, 2, 4, 4, 2] 56 | ]).transpose() 57 | assert np.array_equal(predicted_output, expected_output) 58 | 59 | 60 | def _apply_different_encoders_to_columns(): 61 | """ 62 | One standalone LabelEncoder will be applied on the pets, 63 | and another one will be shared for the columns owner and location. 64 | """ 65 | p = ColumnTransformer([ 66 | # A different encoder will be used for column 0 with name "pets": 67 | (0, FlattenForEach(LabelEncoder(), then_unflatten=True)), 68 | # A shared encoder will be used for column 1 and 2, "owner" and "location": 69 | ([1, 2], FlattenForEach(LabelEncoder(), then_unflatten=True)), 70 | ], n_dimension=2) 71 | 72 | p, predicted_output = p.fit_transform(df.values) 73 | 74 | expected_output = np.array([ 75 | [0, 1, 0, 2, 1, 1], 76 | [1, 3, 0, 1, 5, 3], 77 | [4, 2, 2, 4, 4, 2] 78 | ]).transpose() 79 | assert np.array_equal(predicted_output, expected_output) 80 | 81 | 82 | def main(): 83 | _apply_same_encoder_to_all_columns() 84 | _apply_different_encoders_to_columns() 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /examples/getting_started/plot_nested_pipelines.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create Nested Pipelines in Neuraxle 3 | ================================================ 4 | 5 | You can create pipelines within pipelines using the composition design pattern. 6 | 7 | This demonstrates how to create pipelines within pipelines, and how to access the steps and their 8 | attributes in the nested pipelines. 9 | 10 | For more info, see the `thread here `__. 11 | 12 | .. 13 | Copyright 2019, Neuraxio Inc. 14 | 15 | Licensed under the Apache License, Version 2.0 (the "License"); 16 | you may not use this file except in compliance with the License. 17 | You may obtain a copy of the License at 18 | 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | 21 | Unless required by applicable law or agreed to in writing, software 22 | distributed under the License is distributed on an "AS IS" BASIS, 23 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 | See the License for the specific language governing permissions and 25 | limitations under the License. 26 | 27 | .. 28 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 29 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 30 | 31 | """ 32 | 33 | import numpy as np 34 | from sklearn.decomposition import PCA 35 | from sklearn.preprocessing import StandardScaler 36 | 37 | from neuraxle.base import Identity 38 | from neuraxle.pipeline import Pipeline 39 | 40 | 41 | def main(): 42 | np.random.seed(42) 43 | X = np.random.randint(5, size=(100, 5)) 44 | 45 | # Create and fit the pipeline: 46 | pipeline = Pipeline([ 47 | StandardScaler(), 48 | Identity(), 49 | Pipeline([ 50 | Identity(), 51 | Identity(), # Note: an Identity step is a step that does nothing. 52 | Identity(), # We use it here for demonstration purposes. 53 | Pipeline([ 54 | Identity(), 55 | PCA(n_components=2) 56 | ]) 57 | ]) 58 | ]) 59 | pipeline, X_t = pipeline.fit_transform(X) 60 | 61 | # Get the components: 62 | pca_components = pipeline["Pipeline"]["Pipeline"][-1].get_wrapped_sklearn_predictor().components_ 63 | assert pca_components.shape == (2, 5) 64 | 65 | # Discussion: 66 | # https://stackoverflow.com/questions/28822756/getting-model-attributes-from-scikit-learn-pipeline/58359509#58359509 67 | 68 | 69 | if __name__ == "__main__": 70 | main() 71 | -------------------------------------------------------------------------------- /examples/getting_started/plot_non_fittable_mixin.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create Pipeline Steps in Neuraxle that doesn't fit or transform 3 | ================================================================ 4 | 5 | If a pipeline step doesn't need to be fitted and only transforms data (e.g.: taking the logarithm of the data), 6 | then you can inherit from the NonFittableMixin as demonstrated here, which will override the fit method properly 7 | for you. You can also use a NonTransformableMixin if your step doesn't transform anything, which is rarer. If your step 8 | simply just does nothing to the data, then you could even use the Identity class of Neuraxle, which is simply a class 9 | that inherits from both the NonFittableMixin, the NonTransformableMixin, and BaseStep. 10 | 11 | Mixins are an old Object Oriented Programming (OOP) design pattern that resurfaces when designing 12 | Machine Learning Pipelines. Those are add-ons to classes to implement some methods in some specific ways already. 13 | A mixin doesn't inherit from BaseStep itself, because we can combine many of them in one class. However, a mixin must 14 | suppose that the object that inherits from the mixin also inherits from it's base class. Here, our base class is the 15 | BaseStep class. 16 | 17 | .. 18 | Copyright 2019, Neuraxio Inc. 19 | 20 | Licensed under the Apache License, Version 2.0 (the "License"); 21 | you may not use this file except in compliance with the License. 22 | You may obtain a copy of tche License at 23 | 24 | http://www.apache.org/licenses/LICENSE-2.0 25 | 26 | Unless required by applicable law or agreed to in writing, software 27 | distributed under the License is distributed on an "AS IS" BASIS, 28 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29 | See the License for the specific language governing permissions and 30 | limitations under the License. 31 | 32 | .. 33 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 34 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 35 | 36 | """ 37 | import numpy as np 38 | 39 | from neuraxle.base import NonTransformableMixin, Identity, BaseStep, NonFittableMixin 40 | from neuraxle.pipeline import Pipeline 41 | 42 | 43 | class NonFittableStep(NonFittableMixin, BaseStep): 44 | """ 45 | Fit method is automatically implemented as changing nothing. 46 | Please make your steps inherit from NonFittableMixin, when they don't need any fitting. 47 | Also, make sure that BaseStep is the last step you inherit from. 48 | Note that we could also define the inverse_transform method in the present object. 49 | """ 50 | def __init__(self): 51 | BaseStep.__init__(self) 52 | NonFittableMixin.__init__(self) 53 | 54 | def transform(self, data_inputs): 55 | # insert your transform code here 56 | print("NonFittableStep: I transformed.") 57 | return data_inputs 58 | 59 | 60 | class NonTransformableStep(NonTransformableMixin, BaseStep): 61 | """ 62 | Transform method is automatically implemented as returning data inputs as it is. 63 | Please make your steps inherit from NonTransformableMixin, when they don't need any transformations. 64 | Also, make sure that BaseStep is the last step you inherit from. 65 | """ 66 | def __init__(self): 67 | BaseStep.__init__(self) 68 | NonTransformableMixin.__init__(self) 69 | 70 | def fit(self, data_inputs, expected_outputs=None) -> 'NonTransformableStep': 71 | # insert your fit code here 72 | print("NonTransformableStep: I fitted.") 73 | return self 74 | 75 | 76 | def main(): 77 | p = Pipeline([ 78 | NonFittableStep(), 79 | NonTransformableStep(), 80 | Identity() # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin. 81 | ]) 82 | 83 | some_data = np.array([0, 1]) 84 | p = p.fit(some_data) 85 | # Out: 86 | # NonFittableStep: I transformed. 87 | # NonTransformableStep: I fitted. 88 | 89 | out = p.transform(some_data) 90 | # Out: 91 | # NonFittableStep: I transformed. 92 | 93 | assert np.array_equal(out, some_data) 94 | # Data is unchanged as we've done nothing in the only transform. 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /examples/hyperparams/README.txt: -------------------------------------------------------------------------------- 1 | .. _real_world_examples: 2 | 3 | Hyperparameters 4 | ------------------------------------- 5 | 6 | This demonstrates how to add hyperparameters to Neuraxle pipelines. 7 | -------------------------------------------------------------------------------- /examples/hyperparams/plot_hyperparams.py: -------------------------------------------------------------------------------- 1 | """ 2 | Manipulate Hyperparameter Spaces for Hyperparameter Tuning 3 | =========================================================== 4 | 5 | This demonstrates how to manipulate hyperparameters and hyperparameter spaces. 6 | 7 | .. 8 | Copyright 2019, Neuraxio Inc. 9 | 10 | Licensed under the Apache License, Version 2.0 (the "License"); 11 | you may not use this file except in compliance with the License. 12 | You may obtain a copy of the License at 13 | 14 | http://www.apache.org/licenses/LICENSE-2.0 15 | 16 | Unless required by applicable law or agreed to in writing, software 17 | distributed under the License is distributed on an "AS IS" BASIS, 18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | See the License for the specific language governing permissions and 20 | limitations under the License. 21 | 22 | .. 23 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 24 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 25 | 26 | """ 27 | 28 | from sklearn.decomposition import PCA 29 | 30 | from neuraxle.base import Identity 31 | from neuraxle.hyperparams.distributions import RandInt 32 | from neuraxle.hyperparams.space import HyperparameterSpace 33 | from neuraxle.pipeline import Pipeline 34 | from neuraxle.steps.numpy import MultiplyByN 35 | 36 | 37 | def main(): 38 | p = Pipeline([ 39 | ('step1', MultiplyByN()), 40 | ('step2', MultiplyByN()), 41 | Pipeline([ 42 | Identity(), 43 | Identity(), 44 | PCA(n_components=4) 45 | ]) 46 | ]) 47 | 48 | p.set_hyperparams_space({ 49 | 'step1__multiply_by': RandInt(42, 50), 50 | 'step2__multiply_by': RandInt(-10, 0), 51 | 'Pipeline__PCA__n_components': RandInt(2, 3) 52 | }) 53 | 54 | samples = p.get_hyperparams_space().rvs() 55 | p.set_hyperparams(samples) 56 | 57 | samples = p.get_hyperparams() 58 | assert 42 <= samples['step1__multiply_by'] <= 50 59 | assert -10 <= samples['step2__multiply_by'] <= 0 60 | assert samples['Pipeline__PCA__n_components'] in [2, 3] 61 | assert p['Pipeline']['PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3] 62 | 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /examples/operations/plot_apply_method.py: -------------------------------------------------------------------------------- 1 | """ 2 | Apply recursive operations to a pipeline 3 | =========================================================== 4 | 5 | This demonstrates how to apply a method to each pipeline step. 6 | 7 | .. 8 | Copyright 2019, Neuraxio Inc. 9 | 10 | Licensed under the Apache License, Version 2.0 (the "License"); 11 | you may not use this file except in compliance with the License. 12 | You may obtain a copy of the License at 13 | 14 | http://www.apache.org/licenses/LICENSE-2.0 15 | 16 | Unless required by applicable law or agreed to in writing, software 17 | distributed under the License is distributed on an "AS IS" BASIS, 18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | See the License for the specific language governing permissions and 20 | limitations under the License. 21 | 22 | """ 23 | import json 24 | 25 | from scipy.stats import randint 26 | 27 | from neuraxle.base import Identity 28 | from neuraxle.hyperparams.space import RecursiveDict, HyperparameterSamples, HyperparameterSpace 29 | from neuraxle.pipeline import Pipeline 30 | 31 | 32 | class IdentityWithRvs(Identity): 33 | def _rvs(self): 34 | return HyperparameterSamples(self.hyperparams_space.rvs()) 35 | 36 | 37 | def rvs(step) -> RecursiveDict: 38 | return HyperparameterSamples(step.hyperparams_space.rvs()) 39 | 40 | 41 | def main(): 42 | p = Pipeline([ 43 | IdentityWithRvs().set_hyperparams_space(HyperparameterSpace({ 44 | 'a': randint(low=2, high=5) 45 | })), 46 | IdentityWithRvs().set_hyperparams_space(HyperparameterSpace({ 47 | 'b': randint(low=100, high=400) 48 | })) 49 | ]) 50 | 51 | samples: HyperparameterSamples = p.apply(rvs) 52 | print('p.apply(rvs) ==>') 53 | print(json.dumps(samples, indent=4)) 54 | 55 | # or equivalently: 56 | 57 | samples: HyperparameterSamples = p.apply('_rvs') 58 | print('p.apply(\'_rvs\') ==>') 59 | print(json.dumps(samples, indent=4)) 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /examples/parallel/README.txt: -------------------------------------------------------------------------------- 1 | .. _real_world_examples: 2 | 3 | Parallel 4 | ------------------------------------- 5 | 6 | This demonstrates how parallel processing works in Neuraxle. -------------------------------------------------------------------------------- /examples/parallel/plot_streaming_pipeline.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parallel processing in Neuraxle 3 | =================================================================== 4 | 5 | This demonstrates how to stream data in parallel in a Neuraxle pipeline. 6 | The pipeline steps' parallelism here will be obvious. 7 | 8 | The pipeline has two steps: 9 | 1. Preprocessing: the step that process the data simply sleeps. 10 | 2. Model: the model simply multiplies the data by two. 11 | 12 | This can be used with scikit-learn as well to transform things in parallel, 13 | and any other library such as tensorflow. 14 | 15 | Pipelines benchmarked: 16 | 1. We first use a classical pipeline and evaluate the time. 17 | 2. Then we use a minibatched pipeline and we evaluate the time. 18 | 3. Then we use a parallel pipeline and we evaluate the time. 19 | 20 | We expect the parallel pipeline to be faster due to having more workers 21 | in parallel, as well as starting the model's transformations at the same 22 | time that other batches are being preprocessed, using queues. 23 | 24 | 25 | .. 26 | Copyright 2022, Neuraxio Inc. 27 | 28 | Licensed under the Apache License, Version 2.0 (the "License"); 29 | you may not use this file except in compliance with the License. 30 | You may obtain a copy of the License at 31 | 32 | http://www.apache.org/licenses/LICENSE-2.0 33 | 34 | Unless required by applicable law or agreed to in writing, software 35 | distributed under the License is distributed on an "AS IS" BASIS, 36 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 37 | See the License for the specific language governing permissions and 38 | limitations under the License. 39 | 40 | 41 | """ 42 | import time 43 | import numpy as np 44 | from neuraxle.base import ExecutionContext as CX 45 | 46 | from neuraxle.distributed.streaming import SequentialQueuedPipeline 47 | from neuraxle.pipeline import BasePipeline, Pipeline, MiniBatchSequentialPipeline 48 | from neuraxle.steps.loop import ForEach 49 | from neuraxle.steps.misc import Sleep 50 | from neuraxle.steps.numpy import MultiplyByN 51 | 52 | 53 | def eval_run_time(pipeline: BasePipeline): 54 | pipeline.setup(CX()) 55 | a = time.time() 56 | output = pipeline.transform(list(range(100))) 57 | b = time.time() 58 | seconds = b - a 59 | return seconds, output 60 | 61 | 62 | def main(): 63 | """ 64 | The task is to sleep 0.02 seconds for each data input and then multiply by 2. 65 | """ 66 | sleep_time = 0.02 67 | preprocessing_and_model_steps = [ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)] 68 | 69 | # Classical pipeline - all at once with one big batch: 70 | p = Pipeline(preprocessing_and_model_steps) 71 | time_vanilla_pipeline, output_classical = eval_run_time(p) 72 | print(f"Classical 'Pipeline' execution time: {time_vanilla_pipeline} seconds.") 73 | 74 | # Classical minibatch pipeline - minibatch size 5: 75 | p = MiniBatchSequentialPipeline(preprocessing_and_model_steps, 76 | batch_size=5) 77 | time_minibatch_pipeline, output_minibatch = eval_run_time(p) 78 | print(f"Minibatched 'MiniBatchSequentialPipeline' execution time: {time_minibatch_pipeline} seconds.") 79 | 80 | # Parallel pipeline - minibatch size 5 with 4 parallel workers per step that 81 | # have a max queue size of 10 batches between preprocessing and the model: 82 | p = SequentialQueuedPipeline(preprocessing_and_model_steps, 83 | n_workers_per_step=4, max_queued_minibatches=10, batch_size=5) 84 | time_parallel_pipeline, output_parallel = eval_run_time(p) 85 | print(f"Parallel 'SequentialQueuedPipeline' execution time: {time_parallel_pipeline} seconds.") 86 | 87 | assert np.array_equal(output_classical, output_minibatch) 88 | assert np.array_equal(output_classical, output_parallel) 89 | assert time_parallel_pipeline < time_minibatch_pipeline, str((time_parallel_pipeline, time_vanilla_pipeline)) 90 | 91 | 92 | if __name__ == '__main__': 93 | main() 94 | -------------------------------------------------------------------------------- /examples/sklearn/README.txt: -------------------------------------------------------------------------------- 1 | .. _real_world_examples: 2 | 3 | Neuraxle hyperparameter examples 4 | ------------------------------------- 5 | 6 | This demonstrates how to use sklearn classes in a Neuraxle pipeline. 7 | -------------------------------------------------------------------------------- /examples/sklearn/plot_boston_housing_meta_optimization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Boston Housing Regression with Meta Optimization 3 | ================================================ 4 | 5 | This is an automatic machine learning example. It is more sophisticated than the other simple regression example. 6 | Not only a pipeline is defined, but also an hyperparameter space is defined for the pipeline. Then, a random search is 7 | performed to find the best possible combination of hyperparameters by sampling randomly in the hyperparameter space. 8 | 9 | .. 10 | Copyright 2022, Neuraxio Inc. 11 | 12 | Licensed under the Apache License, Version 2.0 (the "License"); 13 | you may not use this file except in compliance with the License. 14 | You may obtain a copy of the License at 15 | 16 | http://www.apache.org/licenses/LICENSE-2.0 17 | 18 | Unless required by applicable law or agreed to in writing, software 19 | distributed under the License is distributed on an "AS IS" BASIS, 20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21 | See the License for the specific language governing permissions and 22 | limitations under the License. 23 | 24 | .. 25 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 26 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 27 | 28 | """ 29 | 30 | import numpy as np 31 | from sklearn.cluster import KMeans 32 | from sklearn.datasets import load_boston 33 | from sklearn.decomposition import PCA, FastICA 34 | from sklearn.ensemble import GradientBoostingRegressor 35 | from sklearn.linear_model import Ridge 36 | from sklearn.metrics import r2_score, mean_squared_error 37 | from sklearn.model_selection import train_test_split 38 | from sklearn.utils import shuffle 39 | 40 | from neuraxle.hyperparams.distributions import RandInt, LogUniform, Boolean 41 | from neuraxle.hyperparams.space import HyperparameterSpace 42 | from neuraxle.metaopt.auto_ml import AutoML, ValidationSplitter 43 | from neuraxle.metaopt.callbacks import MetricCallback 44 | from neuraxle.pipeline import Pipeline 45 | from neuraxle.steps.numpy import NumpyTranspose 46 | from neuraxle.steps.sklearn import SKLearnWrapper 47 | from neuraxle.union import AddFeatures, ModelStacking 48 | 49 | 50 | def main(tmpdir): 51 | boston = load_boston() 52 | X, y = shuffle(boston.data, boston.target, random_state=13) 53 | X = X.astype(np.float32) 54 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) 55 | 56 | # Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set 57 | # within the classes ar their definition if using custom classes, or also it could be defined after declaring the 58 | # pipeline using a flat dict or a nested dict. 59 | 60 | p = Pipeline([ 61 | AddFeatures([ 62 | SKLearnWrapper( 63 | PCA(n_components=2), 64 | HyperparameterSpace({"n_components": RandInt(1, 3)}) 65 | ), 66 | SKLearnWrapper( 67 | FastICA(n_components=2), 68 | HyperparameterSpace({"n_components": RandInt(1, 3)}) 69 | ), 70 | ]), 71 | ModelStacking([ 72 | SKLearnWrapper( 73 | GradientBoostingRegressor(), 74 | HyperparameterSpace({ 75 | "n_estimators": RandInt(50, 300), "max_depth": RandInt(1, 4), 76 | "learning_rate": LogUniform(0.07, 0.7) 77 | }) 78 | ), 79 | SKLearnWrapper( 80 | KMeans(), 81 | HyperparameterSpace({"n_clusters": RandInt(5, 10)}) 82 | ), 83 | ], 84 | joiner=NumpyTranspose(), 85 | judge=SKLearnWrapper( 86 | Ridge(), 87 | HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})), 88 | ) 89 | ]) 90 | 91 | print("Meta-fitting on train:") 92 | auto_ml = AutoML( 93 | p, 94 | validation_splitter=ValidationSplitter(0.20), 95 | n_trials=10, 96 | epochs=1, # 1 epoch here due to using sklearn models that just fit once. 97 | callbacks=[MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)], 98 | ) 99 | 100 | fitted_random_search = auto_ml.fit(X_train, y_train) 101 | print("") 102 | 103 | print("Transforming train and test:") 104 | y_train_predicted = fitted_random_search.predict(X_train) 105 | y_test_predicted = fitted_random_search.predict(X_test) 106 | 107 | print("") 108 | 109 | print("Evaluating transformed train:") 110 | score_transform = r2_score(y_train_predicted, y_train) 111 | print('R2 regression score:', score_transform) 112 | 113 | print("") 114 | 115 | print("Evaluating transformed test:") 116 | score_test = r2_score(y_test_predicted, y_test) 117 | print('R2 regression score:', score_test) 118 | 119 | 120 | if __name__ == "__main__": 121 | main('cache') 122 | -------------------------------------------------------------------------------- /examples/sklearn/plot_boston_housing_regression_with_model_stacking.py: -------------------------------------------------------------------------------- 1 | """ 2 | Boston Housing Regression 3 | ========================== 4 | 5 | This example solves a regression problem using a pipeline with the following steps: 6 | 7 | - Feature augmentation with PCA and Fast ICA, 8 | - A Pre-regression using an ensemble containing gradient boosted, and a KMeans clustering for even more features in the stacking, 9 | - The model stacking using a ridge regression. 10 | 11 | This example also prints the shapes of the objects between the pipeline elements. 12 | 13 | .. 14 | Copyright 2019, Neuraxio Inc. 15 | 16 | Licensed under the Apache License, Version 2.0 (the "License"); 17 | you may not use this file except in compliance with the License. 18 | You may obtain a copy of the License at 19 | 20 | http://www.apache.org/licenses/LICENSE-2.0 21 | 22 | Unless required by applicable law or agreed to in writing, software 23 | distributed under the License is distributed on an "AS IS" BASIS, 24 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | See the License for the specific language governing permissions and 26 | limitations under the License. 27 | 28 | .. 29 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 30 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 31 | 32 | """ 33 | 34 | import numpy as np 35 | from sklearn.cluster import KMeans 36 | from sklearn.datasets import load_boston 37 | from sklearn.decomposition import PCA, FastICA 38 | from sklearn.ensemble import GradientBoostingRegressor 39 | from sklearn.metrics import r2_score 40 | from sklearn.model_selection import train_test_split 41 | from sklearn.utils import shuffle 42 | 43 | from neuraxle.pipeline import Pipeline 44 | from neuraxle.steps.numpy import NumpyShapePrinter 45 | from neuraxle.steps.sklearn import RidgeModelStacking 46 | from neuraxle.union import AddFeatures 47 | 48 | 49 | def main(): 50 | boston = load_boston() 51 | X, y = shuffle(boston.data, boston.target, random_state=13) 52 | X = X.astype(np.float32) 53 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) 54 | 55 | p = Pipeline([ 56 | NumpyShapePrinter(), 57 | AddFeatures([ 58 | PCA(n_components=2), 59 | FastICA(n_components=2), 60 | ]), 61 | NumpyShapePrinter(), 62 | RidgeModelStacking([ 63 | GradientBoostingRegressor(), 64 | GradientBoostingRegressor(n_estimators=500), 65 | GradientBoostingRegressor(max_depth=5), 66 | KMeans(), 67 | ]), 68 | NumpyShapePrinter(), 69 | ]) 70 | 71 | print("Fitting on train:") 72 | p = p.fit(X_train, y_train) 73 | print("") 74 | print("Transforming train and test:") 75 | y_train_predicted = p.predict(X_train) 76 | y_test_predicted = p.predict(X_test) 77 | print("") 78 | print("Evaluating transformed train:") 79 | score_train = r2_score(y_train_predicted, y_train) 80 | print('R2 regression score:', score_train) 81 | print("") 82 | print("Evaluating transformed test:") 83 | score_test = r2_score(y_test_predicted, y_test) 84 | print('R2 regression score:', score_test) 85 | 86 | assert y_train_predicted.shape == (379,) 87 | assert y_test_predicted.shape == (127,) 88 | assert isinstance(score_train, float) 89 | assert isinstance(score_test, float) 90 | 91 | return y_train_predicted, y_test_predicted, score_train, score_test 92 | 93 | 94 | if __name__ == "__main__": 95 | main() 96 | -------------------------------------------------------------------------------- /flake8.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | flake8 neuraxle testing_neuraxle --count --max-line-length=120 --select=E9,F63,F7,F82 --statistics --show-source 3 | 4 | -------------------------------------------------------------------------------- /neuraxle/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.8.1" 2 | -------------------------------------------------------------------------------- /neuraxle/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/distributed/__init__.py -------------------------------------------------------------------------------- /neuraxle/hyperparams/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/hyperparams/__init__.py -------------------------------------------------------------------------------- /neuraxle/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/logging/__init__.py -------------------------------------------------------------------------------- /neuraxle/logging/warnings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neuraxle's Deprecation Warnings 3 | ==================================== 4 | Code evolves through time. When updating Neuraxle, you may find 5 | that some old arguments you were using or some classes you were 6 | using changed. Warnings will be printed using the methods here. 7 | 8 | .. 9 | Copyright 2019, Neuraxio Inc. 10 | 11 | Licensed under the Apache License, Version 2.0 (the "License"); 12 | you may not use this file except in compliance with the License. 13 | You may obtain a copy of the License at 14 | 15 | http://www.apache.org/licenses/LICENSE-2.0 16 | 17 | Unless required by applicable law or agreed to in writing, software 18 | distributed under the License is distributed on an "AS IS" BASIS, 19 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | See the License for the specific language governing permissions and 21 | limitations under the License. 22 | 23 | """ 24 | 25 | import warnings 26 | 27 | 28 | SILENCE_DEPRECATION_WARNING = False 29 | 30 | 31 | def silence_all_deprecation_warnings(): 32 | """ 33 | Turn off all of the neuraxle.logging.warnings for deprecations. 34 | """ 35 | global SILENCE_DEPRECATION_WARNING 36 | SILENCE_DEPRECATION_WARNING = True 37 | 38 | 39 | def warn_deprecated_class(self, replacement_class: type = None, as_per_version: str = None): 40 | global SILENCE_DEPRECATION_WARNING 41 | if not SILENCE_DEPRECATION_WARNING and replacement_class is not None: 42 | warnings.warn( 43 | _deprecated_class_msg(replacement_class, as_per_version) + _deact_msg_instructions() 44 | ) 45 | return self 46 | 47 | 48 | class RaiseDeprecatedClass: 49 | """ 50 | Use this class and call it's __init__ method to raise a 51 | DeprecationWarning and point to the good class to use. 52 | """ 53 | 54 | def __init__(self, replacement_class: type = None, since_version: str = None) -> None: 55 | raise_deprecated_class(replacement_class, since_version) 56 | 57 | 58 | def raise_deprecated_class(replacement_class: type = None, since_version: str = None): 59 | raise DeprecationWarning(_deprecated_class_msg(replacement_class, since_version)) 60 | 61 | 62 | def _deprecated_class_msg(self, replacement_class: type = None, since_version: str = None) -> str: 63 | return ( 64 | f"The class `{self.__class__.__name__}` is deprecated" 65 | f" since version `neuraxle>={since_version}`." if since_version is not None else "." 66 | f" Please consider using the class `{replacement_class.__name__}` instead: visit https://www.neuraxle.org/stable/search.html?q={replacement_class.__name__} for more information" if ( 67 | hasattr(replacement_class, "__name__") and replacement_class.__name__ is not None) else " Visit https://www.neuraxle.org/stable/api.html for more information." 68 | ) 69 | 70 | 71 | def warn_deprecated_arg(self, arg_name, default_value, value, replacement_argument_name, replacement_class: type = None): 72 | global SILENCE_DEPRECATION_WARNING 73 | if not SILENCE_DEPRECATION_WARNING and default_value != value: 74 | if isinstance(replacement_class, type): 75 | replacement_class = replacement_class.__name__ 76 | warnings.warn( 77 | f"Argument `{arg_name}={value}` for class `{self.__class__.__name__}` is deprecated. " 78 | f"Please consider using `{replacement_argument_name}` " 79 | f"or the class `{replacement_class}` " if replacement_class is not None else "" 80 | f"instead. " 81 | f"{_deact_msg_instructions()}" 82 | ) 83 | return self 84 | 85 | 86 | def _deact_msg_instructions() -> str: 87 | return ( 88 | " If you want to disable these warnings," 89 | " call `neuraxle.logging.warnings.silence_all_deprecation_warnings()`." 90 | ) 91 | -------------------------------------------------------------------------------- /neuraxle/metaopt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/metaopt/__init__.py -------------------------------------------------------------------------------- /neuraxle/metaopt/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/metaopt/data/__init__.py -------------------------------------------------------------------------------- /neuraxle/metaopt/hyperopt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/metaopt/hyperopt/__init__.py -------------------------------------------------------------------------------- /neuraxle/metaopt/repositories/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/metaopt/repositories/__init__.py -------------------------------------------------------------------------------- /neuraxle/rest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/rest/__init__.py -------------------------------------------------------------------------------- /neuraxle/rest/flask.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neuraxle's Flask Wrapper classes 3 | ==================================== 4 | The flask wrapper classes are used to easily serve pipeline predictions using a flask rest api. 5 | 6 | .. 7 | Copyright 2019, Neuraxio Inc. 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | http://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | 21 | .. 22 | Thanks to Umaneo Technologies Inc. for their contributions to this Machine Learning 23 | project, visit https://www.umaneo.com/ for more information on Umaneo Technologies Inc. 24 | 25 | """ 26 | from abc import ABC, abstractmethod 27 | 28 | import numpy as np 29 | from flask import Response 30 | 31 | from neuraxle.base import BaseStep 32 | from neuraxle.pipeline import Pipeline 33 | 34 | 35 | class JSONDataBodyDecoder(BaseStep, ABC): 36 | """ 37 | Class to be used within a FlaskRESTApiWrapper to convert input json to actual data (e.g.: arrays) 38 | """ 39 | 40 | def transform(self, data_inputs): 41 | return self.decode(data_inputs) 42 | 43 | @abstractmethod 44 | def decode(self, data_inputs: dict): 45 | """ 46 | Will convert data_inputs to a dict or a compatible data structure for jsonification 47 | 48 | :param encoded data_inputs (dict parsed from json): 49 | :return: data_inputs (as a data structure compatible with pipeline's data inputs) 50 | """ 51 | raise NotImplementedError("TODO: inherit from the `JSONDataBodyDecoder` class and implement this method.") 52 | 53 | 54 | class JSONDataResponseEncoder(BaseStep, ABC): 55 | """ 56 | Base class to be used within a FlaskRESTApiWrapper to convert prediction output to json response. 57 | """ 58 | 59 | def transform(self, data_inputs) -> Response: 60 | """ 61 | Transform processed data inputs into a flask response object. 62 | 63 | :param data_inputs: 64 | :return: flask response object 65 | """ 66 | from flask import jsonify 67 | return jsonify(self.encode(data_inputs)) 68 | 69 | @abstractmethod 70 | def encode(self, data_inputs) -> dict: 71 | """ 72 | Convert data_inputs to a dict or a compatible data structure for jsonification. 73 | 74 | :param data_inputs (a data structure outputted by the pipeline after a transform): 75 | :return: encoded data_inputs (jsonifiable dict) 76 | """ 77 | raise NotImplementedError("TODO: inherit from the `JSONDataResponseEncoder` class and implement this method.") 78 | 79 | 80 | class FlaskRestApiWrapper(Pipeline): 81 | """ 82 | Wrap a pipeline to easily deploy it to a REST API. Just provide a json encoder and a json decoder. 83 | 84 | Usage example: 85 | 86 | ``` 87 | class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder): 88 | '''This is a custom JSON decoder class that precedes the pipeline's transformation.''' 89 | 90 | def decode(self, data_inputs: dict): 91 | values_in_json_2d_arr: List[List[int]] = data_inputs["values"] 92 | return np.array(values_in_json_2d_arr) 93 | 94 | class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder): 95 | '''This is a custom JSON response encoder class for converting the pipeline's transformation outputs.''' 96 | 97 | def encode(self, data_inputs) -> dict: 98 | return { 99 | 'predictions': list(data_inputs) 100 | } 101 | 102 | app = FlaskRestApiWrapper( 103 | json_decoder=CustomJSONDecoderFor2DArray(), 104 | wrapped=Pipeline(...), 105 | json_encoder=CustomJSONEncoderOfOutputs(), 106 | ).get_app() 107 | 108 | app.run(debug=False, port=5000) 109 | ``` 110 | """ 111 | 112 | def __init__( 113 | self, 114 | json_decoder: JSONDataBodyDecoder, 115 | wrapped: BaseStep, 116 | json_encoder: JSONDataResponseEncoder, 117 | route='/'): 118 | Pipeline.__init__(self, [ 119 | json_decoder, 120 | wrapped, 121 | json_encoder 122 | ]) 123 | self.route: str = route 124 | 125 | def get_app(self): 126 | """ 127 | This methods returns a REST API wrapping the pipeline. 128 | 129 | :return: a Flask app (as given by `app = Flask(__name__)` and then configured). 130 | """ 131 | from flask import Flask, request 132 | from flask_restful import Api, Resource 133 | 134 | app = Flask(__name__) 135 | api = Api(app) 136 | wrapped = self 137 | 138 | class RESTfulRes(Resource): 139 | def get(self): 140 | return wrapped.transform(request.get_json()) 141 | 142 | api.add_resource( 143 | RESTfulRes, 144 | self.route 145 | ) 146 | 147 | return app 148 | -------------------------------------------------------------------------------- /neuraxle/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/neuraxle/steps/__init__.py -------------------------------------------------------------------------------- /neuraxle/steps/features.py: -------------------------------------------------------------------------------- 1 | """ 2 | Featurization Steps 3 | ========================================================== 4 | You can find here steps that featurize your data. 5 | 6 | .. 7 | Copyright 2019, Neuraxio Inc. 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | http://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | 21 | """ 22 | from neuraxle.pipeline import Pipeline 23 | from neuraxle.steps.flow import ChooseOneOrManyStepsOf 24 | from neuraxle.steps.numpy import NumpyFFT, NumpyAbs, NumpyFlattenDatum, NumpyConcatenateInnerFeatures, NumpyMean, \ 25 | NumpyMedian, NumpyMin, NumpyMax, NumpyArgMax 26 | from neuraxle.union import FeatureUnion 27 | 28 | 29 | class FFTPeakBinWithValue(FeatureUnion): 30 | """ 31 | Compute peak fft bins (int), and their magnitudes' value (float), to concatenate them. 32 | This is intended to be used only after a NumpyFFT absolute step. 33 | 34 | .. seealso:: 35 | :class:`~neuraxle.base.BaseStep`, 36 | :class:`~neuraxle.base.NonFittableMixin`, 37 | :class:`~neuraxle.steps.numpy.NumpyFFT`, 38 | :class:`Cheap3DTo2DTransformer` 39 | """ 40 | def __init__(self): 41 | super().__init__([ 42 | NumpyArgMax(axis=-2), 43 | NumpyMax(axis=-2) 44 | ], joiner=NumpyConcatenateInnerFeatures()) 45 | 46 | 47 | class Cheap3DTo2DTransformer(ChooseOneOrManyStepsOf): 48 | """ 49 | Prebuild class to featurize 3D data into 2D data for simple classification or regression, for instance. 50 | 51 | You can enable, or disable features using hyperparams : 52 | 53 | .. code-block:: python 54 | 55 | step = Cheap3DTo2DTransformer().set_hyperparams(hyperparams={ 56 | 'FFT__enabled': True, 57 | 'NumpyMean__enabled': True, 58 | 'NumpyMedian__enabled': True, 59 | 'NumpyMin__enabled': True, 60 | 'NumpyMax__enabled': True 61 | }) 62 | 63 | .. seealso:: 64 | :class:`~neuraxle.steps.flow.ChooseOneOrManyStepsOf`, 65 | :class:`~neuraxle.steps.numpy.NumpyFFT`, 66 | :class:`~neuraxle.steps.numpy.NumpyAbs`, 67 | :class:`~neuraxle.steps.numpy.NumpyFlattenDatum`, 68 | :class:`FFTPeakBinWithValue`, 69 | :class:`~neuraxle.steps.numpy.NumpyConcatenateInnerFeatures`, 70 | :class:`~neuraxle.steps.numpy.NumpyMean`, 71 | :class:`~neuraxle.steps.numpy.NumpyMedian`, 72 | :class:`~neuraxle.steps.numpy.NumpyMin`, 73 | :class:`~neuraxle.steps.numpy.NumpyMax` 74 | """ 75 | 76 | def __init__(self): 77 | super().__init__([ 78 | Pipeline([ 79 | NumpyFFT(), 80 | NumpyAbs(), 81 | FeatureUnion([ 82 | NumpyFlattenDatum(), # Reshape from 3D to flat 2D: flattening data except on batch size 83 | FFTPeakBinWithValue() # Extract 2D features from the 3D FFT bins 84 | ], joiner=NumpyConcatenateInnerFeatures()) 85 | ]).set_name('FFT'), 86 | NumpyMean(), 87 | NumpyMedian(), 88 | NumpyMin(), 89 | NumpyMax() 90 | ]) 91 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | pytest>=6.0.0 3 | pytest-cov>=2.6.1 4 | numpy>=1.16.2 5 | matplotlib==3.3.4 6 | scikit-learn>=0.24.1 7 | scipy>=1.4.1 8 | pandas>=1.3.5 9 | joblib>=0.13.2 10 | flask==1.1.4 11 | flask-restful>=0.3.9 12 | SQLAlchemy==1.4.26 13 | markupsafe==2.0.1 14 | pytest-timeout>=2.1.0 15 | -------------------------------------------------------------------------------- /run_quick_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | py.test testing_neuraxle/ -n 10 --ignore=testing_neuraxle/metaopt/test_tpe.py --ignore=testing_neuraxle/examples/test_examples.py --disable-pytest-warnings --durations=10 --timeout=100 $1 $2 $3 $4 3 | 4 | -------------------------------------------------------------------------------- /run_slow_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | py.test -n 10 testing_neuraxle/metaopt/test_tpe.py testing_neuraxle/examples/test_examples.py --disable-pytest-warnings --durations=10 --timeout=100 3 | 4 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | py.test -n 10 testing_neuraxle/ --disable-pytest-warnings --durations=10 --timeout=100 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | 4 | 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neuraxle setup file 3 | ==================================== 4 | Setup file specifying the python version and so forth. 5 | 6 | .. 7 | Copyright 2019, Neuraxio Inc. 8 | 9 | Licensed under the Apache License, Version 2.0 (the "License"); 10 | you may not use this file except in compliance with the License. 11 | You may obtain a copy of the License at 12 | 13 | http://www.apache.org/licenses/LICENSE-2.0 14 | 15 | Unless required by applicable law or agreed to in writing, software 16 | distributed under the License is distributed on an "AS IS" BASIS, 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | See the License for the specific language governing permissions and 19 | limitations under the License. 20 | 21 | """ 22 | 23 | from setuptools import setup, find_packages 24 | 25 | from neuraxle import __version__ as _VERSION 26 | 27 | with open('README.rst') as _f: 28 | _README = _f.read() 29 | 30 | setup( 31 | name='neuraxle', 32 | version=_VERSION, 33 | description='Neuraxle is a Machine Learning (ML) library for building neat pipelines, providing the right ' 34 | 'abstractions to both ease research, development, and deployment of your ML applications.', 35 | long_description=_README, 36 | classifiers=[ 37 | "Development Status :: 4 - Beta", 38 | "Intended Audience :: Developers", 39 | "Intended Audience :: Education", 40 | "Intended Audience :: Financial and Insurance Industry", 41 | "Intended Audience :: Healthcare Industry", 42 | "Intended Audience :: Information Technology", 43 | "Intended Audience :: Manufacturing", 44 | "Intended Audience :: Science/Research", 45 | "Intended Audience :: System Administrators", 46 | "Intended Audience :: Telecommunications Industry", 47 | 'License :: OSI Approved :: Apache Software License', 48 | "Natural Language :: English", 49 | "Operating System :: OS Independent", 50 | 'Programming Language :: Python :: 3.7', 51 | 'Programming Language :: Python :: 3.8', 52 | 'Programming Language :: Python :: 3.9', 53 | "Topic :: Adaptive Technologies", 54 | "Topic :: Office/Business", 55 | "Topic :: Scientific/Engineering", 56 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 57 | "Topic :: Scientific/Engineering :: Artificial Life", 58 | "Topic :: Scientific/Engineering :: Bio-Informatics", 59 | "Topic :: Scientific/Engineering :: Image Recognition", 60 | "Topic :: Scientific/Engineering :: Information Analysis", 61 | "Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator", 62 | "Topic :: Scientific/Engineering :: Mathematics", 63 | "Topic :: Scientific/Engineering :: Medical Science Apps.", 64 | "Topic :: Scientific/Engineering :: Physics", 65 | "Topic :: Software Development", 66 | "Topic :: Software Development :: Assemblers", 67 | "Topic :: Software Development :: Build Tools", 68 | "Topic :: Software Development :: Libraries", 69 | "Topic :: Software Development :: Libraries :: Python Modules", 70 | "Topic :: Software Development :: Object Brokering", 71 | "Topic :: Software Development :: Pre-processors", 72 | "Topic :: Software Development :: Quality Assurance", 73 | "Topic :: Software Development :: Testing", 74 | "Topic :: System", 75 | "Topic :: System :: Clustering", 76 | "Topic :: System :: Distributed Computing", 77 | "Topic :: System :: Networking", 78 | # Topic :: System :: Systems Administration, 79 | "Topic :: Text Processing", 80 | "Topic :: Text Processing :: Filters", 81 | "Topic :: Text Processing :: Linguistic", 82 | "Topic :: Utilities", 83 | "Typing :: Typed" 84 | ], 85 | url='https://github.com/Neuraxio/Neuraxle', 86 | download_url='https://github.com/Neuraxio/Neuraxle/tarball/{}'.format( 87 | _VERSION), 88 | author='Neuraxio Inc.', 89 | author_email='guillaume.chevalier@neuraxio.com', 90 | packages=find_packages(include=['neuraxle*']), 91 | test_suite="testing_neuraxle", 92 | setup_requires=["pytest-runner"], 93 | install_requires=[ 94 | 'numpy>=1.16.2', 95 | 'scipy>=1.4.1', 96 | 'scikit-learn>=0.24.1', 97 | 'matplotlib==3.3.4', 98 | 'joblib>=0.13.2', 99 | 'Flask>=1.1.4', 100 | 'Flask-RESTful>=0.3.9', 101 | 'markupsafe==2.0.1', 102 | 'pandas>=1.3.5', 103 | ], 104 | tests_require=[ 105 | "pytest", 106 | "pytest-cov", 107 | "pytest-timeout>=2.1.0", 108 | "scikit-learn>=0.24.1" 109 | ], 110 | include_package_data=True, 111 | license='Apache 2.0', 112 | keywords='pipeline pipelines data science machine learning deep learning neuraxle sklearn scikit-learn scipy numpy pandas tensorflow' 113 | ) 114 | 115 | print(""" 116 | ____________________________________________________________________ 117 | 118 | Thank you for installing 119 | _ _ __ 120 | | \ | | | | 121 | | \| | ___ _ _ _ __ ___ __ __ | | ___ 122 | | . ` |/ _ \| | | || ' _||__ \\\\ \/ / | | / _ \\ 123 | | |\ || __|| |_| | | | / _ | > < | | | __| 124 | |_| \_|\___| \__,_||___| \_,_|/_/\_\ |__|\___| 125 | 126 | 127 | Learn more: 128 | - https://www.neuraxle.org/stable/index.html 129 | 130 | Contribute: 131 | - https://gitter.im/Neuraxle/community 132 | 133 | Open issue: 134 | - https://github.com/Neuraxio/Neuraxle 135 | 136 | Ask questions: 137 | - https://stackoverflow.com/questions/tagged/neuraxle 138 | ____________________________________________________________________ 139 | """) 140 | -------------------------------------------------------------------------------- /testing_neuraxle/__init__.py: -------------------------------------------------------------------------------- 1 | from neuraxle.logging.warnings import silence_all_deprecation_warnings 2 | 3 | 4 | silence_all_deprecation_warnings() 5 | -------------------------------------------------------------------------------- /testing_neuraxle/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/api/__init__.py -------------------------------------------------------------------------------- /testing_neuraxle/api/test_flask.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for Flask. 3 | ============================================ 4 | 5 | .. 6 | Copyright 2019, Neuraxio Inc. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | """ 21 | 22 | import numpy as np 23 | 24 | from neuraxle.rest.flask import JSONDataResponseEncoder, JSONDataBodyDecoder, FlaskRestApiWrapper 25 | from neuraxle.base import BaseTransformer 26 | 27 | 28 | def setup_api(): 29 | class Decoder(JSONDataBodyDecoder): 30 | """This is a custom JSON decoder class that precedes the pipeline's transformation.""" 31 | 32 | def decode(self, data_inputs): 33 | """ 34 | Transform a JSON list object into an np.array object. 35 | 36 | :param data_inputs: json object 37 | :return: np array for data inputs 38 | """ 39 | return np.array(data_inputs) 40 | 41 | class Encoder(JSONDataResponseEncoder): 42 | """This is a custom JSON response encoder class for converting the pipeline's transformation outputs.""" 43 | 44 | def encode(self, data_inputs) -> dict: 45 | """ 46 | Convert predictions to a dict for creating a JSON Response object. 47 | 48 | :param data_inputs: 49 | :return: 50 | """ 51 | return { 52 | 'predictions': np.array(data_inputs).tolist() 53 | } 54 | 55 | class Multiplier(BaseTransformer): 56 | def transform(self, data_inputs): 57 | return 2 * data_inputs 58 | 59 | app = FlaskRestApiWrapper( 60 | json_decoder=Decoder(), 61 | wrapped=Multiplier(), 62 | json_encoder=Encoder() 63 | ).get_app() 64 | 65 | app.testing = True 66 | 67 | test_client = app.test_client() 68 | 69 | return test_client 70 | 71 | 72 | def test_api_wrapper_works(): 73 | test_client = setup_api() 74 | data_inputs = [ 75 | [0, 1, 2], 76 | [3, 4, 5], 77 | ] 78 | 79 | json_response = test_client.get('/', json=data_inputs) 80 | 81 | predictions_np_arr = np.array(json_response.json["predictions"]) 82 | expected_outputs = 2 * np.array(data_inputs) 83 | assert np.array_equal(predictions_np_arr, expected_outputs) 84 | -------------------------------------------------------------------------------- /testing_neuraxle/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/examples/__init__.py -------------------------------------------------------------------------------- /testing_neuraxle/examples/test_examples.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from py._path.local import LocalPath 3 | 4 | 5 | def test_auto_ml_loop_clean_kata(tmpdir: LocalPath): 6 | from examples.auto_ml.plot_automl_loop_clean_kata import main 7 | main(tmpdir) 8 | 9 | 10 | def test_easy_rest_api_serving(): 11 | from examples.deployment.plot_easy_rest_api_serving import main 12 | main() 13 | 14 | 15 | def test_force_handle_mixin(): 16 | from examples.getting_started.plot_force_handle_mixin import main 17 | main() 18 | 19 | 20 | def test_inverse_transform(): 21 | from examples.getting_started.plot_inverse_transform import main 22 | main() 23 | 24 | 25 | def test_label_encoder_across_multiple_columns(): 26 | from examples.getting_started.plot_label_encoder_across_multiple_columns import main 27 | main() 28 | 29 | 30 | def test_nested_pipelines(): 31 | from examples.getting_started.plot_nested_pipelines import main 32 | main() 33 | 34 | 35 | def test_non_fittable_mixin(): 36 | from examples.getting_started.plot_non_fittable_mixin import main 37 | main() 38 | 39 | 40 | def test_hyperparams(): 41 | from examples.hyperparams.plot_hyperparams import main 42 | main() 43 | 44 | 45 | def test_apply(): 46 | from examples.operations.plot_apply_method import main 47 | main() 48 | 49 | 50 | def test_parallel_streaming(): 51 | from examples.parallel.plot_streaming_pipeline import main 52 | main() 53 | 54 | 55 | def test_boston_housing_meta_optimization(tmpdir: LocalPath): 56 | from examples.sklearn.plot_boston_housing_meta_optimization import main 57 | main(tmpdir) 58 | 59 | 60 | def test_boston_housing_regression_with_model_stacking(): 61 | from examples.sklearn.plot_boston_housing_regression_with_model_stacking import main 62 | main() 63 | 64 | 65 | def test_cyclical_feature_engineering(): 66 | from examples.sklearn.plot_cyclical_feature_engineering import predictions 67 | print(predictions) 68 | assert predictions is not None 69 | -------------------------------------------------------------------------------- /testing_neuraxle/hyperparams/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/hyperparams/__init__.py -------------------------------------------------------------------------------- /testing_neuraxle/hyperparams/test_space.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for Hyperparameters Distribution Spaces 3 | ============================================= 4 | 5 | .. 6 | Copyright 2019, Neuraxio Inc. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | """ 21 | import copy 22 | from collections import OrderedDict 23 | 24 | import pytest 25 | import scipy 26 | from neuraxle.hyperparams.distributions import (Boolean, Choice, 27 | FixedHyperparameter, 28 | HyperparameterDistribution, 29 | LogNormal, LogUniform, Normal, 30 | PriorityChoice, Quantized, 31 | RandInt, Uniform) 32 | from neuraxle.hyperparams.scipy_distributions import Gaussian, Poisson 33 | from neuraxle.hyperparams.space import (FlatDict, HyperparameterSamples, 34 | HyperparameterSpace, RecursiveDict) 35 | 36 | HYPERPARAMS_FLAT_AND_DICT_PAIRS = [( 37 | # Pair 1: 38 | { 39 | "a__learning_rate": 7 40 | }, 41 | { 42 | "a": { 43 | "learning_rate": 7 44 | } 45 | }), 46 | # Pair 2: 47 | ({ 48 | "b__a__learning_rate": 7, 49 | "b__learning_rate": 9 50 | }, 51 | { 52 | "b": { 53 | "a": { 54 | "learning_rate": 7 55 | }, 56 | "learning_rate": 9 57 | } 58 | }), 59 | ] 60 | 61 | 62 | @pytest.mark.parametrize("class_to_test", [RecursiveDict, HyperparameterSamples]) 63 | @pytest.mark.parametrize("flat,expected_dic", HYPERPARAMS_FLAT_AND_DICT_PAIRS) 64 | def test_flat_to_dict_hyperparams(flat: dict, expected_dic: dict, class_to_test): 65 | from_flat_dic = class_to_test(flat) 66 | from_nested_dic = class_to_test(expected_dic) 67 | 68 | assert from_flat_dic == from_nested_dic 69 | assert from_flat_dic.to_flat_dict() == flat 70 | assert from_nested_dic.to_flat_dict() == flat 71 | assert from_nested_dic.to_nested_dict() == expected_dic 72 | assert from_flat_dic.to_nested_dict() == expected_dic 73 | 74 | 75 | HYPE_SPACE = HyperparameterSpace(OrderedDict({ 76 | "a__b__c": PriorityChoice([0, 1, False, "Test"]), 77 | "a__b__q__c": Quantized(Uniform(-10, 10)), 78 | "a__b__q__q": Quantized(Uniform(-10, 10)), 79 | "a__c": Choice([0, 1, False, "Test"]), 80 | "a__e__q__c": Choice([0, 1, False, "Test"]), 81 | "a__test": Boolean(), 82 | "d__param": RandInt(-10, 10), 83 | "d__u": Uniform(-10, 10), 84 | "e__alpha": Normal(0.0, 1.0), 85 | "e__f__g": LogNormal(0.0, 2.0), 86 | "e__other": LogUniform(0.001, 10), 87 | "p__could_also_be_as_fixed": FixedHyperparameter("also hey"), 88 | "scipy__gaussian": Gaussian(-1, 1), 89 | "scipy__poisson": Poisson(1.0, 2.0), 90 | "scipy__scipy__gaussian": scipy.stats.randint(0, 10) 91 | })) 92 | 93 | 94 | def test_hyperparams_space_rvs_outputs_samples(): 95 | space = copy.deepcopy(HYPE_SPACE) 96 | 97 | samples = space.rvs() 98 | 99 | assert isinstance(samples, HyperparameterSamples) 100 | assert len(samples) == len(space) 101 | for k, v in samples.iter_flat(): 102 | assert k in space 103 | assert not isinstance(v, HyperparameterDistribution) 104 | 105 | 106 | @pytest.mark.parametrize("hd", list(HYPE_SPACE.to_flat_dict().values())) 107 | def test_hyperparams_space_rvs_outputs_in_range(hd: HyperparameterDistribution): 108 | for _ in range(20): 109 | 110 | sample = hd.rvs() 111 | 112 | assert sample in hd 113 | 114 | 115 | def test_wildcards(): 116 | EXPECTED_WILDCARDS = [ 117 | "*b__c", 118 | "*b*c", 119 | "*q", 120 | "a__c", 121 | "*e*c", 122 | "*test", 123 | "*param", 124 | "*u", 125 | "*alpha", 126 | "*g", 127 | "*other", 128 | "*could_also_be_as_fixed", 129 | "scipy__gaussian", 130 | "*poisson", 131 | "*scipy__gaussian", 132 | ] 133 | 134 | wildcards: FlatDict = HYPE_SPACE.to_wildcards() 135 | 136 | for wc, ewc in zip(wildcards.keys(), EXPECTED_WILDCARDS): 137 | assert wc == ewc, f"{wc} != {ewc}, but should be equal as expected." 138 | for wv, ewv in zip(wildcards.values(), HYPE_SPACE.to_flat_dict().values()): 139 | assert wv == ewv, f"{str(wv)} != {str(ewv)}, but should remain the same." 140 | -------------------------------------------------------------------------------- /testing_neuraxle/metaopt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/metaopt/__init__.py -------------------------------------------------------------------------------- /testing_neuraxle/metaopt/test_automl_redesign.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Callable, Optional 3 | 4 | import numpy as np 5 | import pytest 6 | from neuraxle.base import BaseStep 7 | from neuraxle.base import ExecutionContext as CX 8 | from neuraxle.base import Identity, NonFittableMixin 9 | from neuraxle.data_container import DataContainer as DACT 10 | from neuraxle.hyperparams.distributions import Boolean, Choice, LogUniform, RandInt 11 | from neuraxle.hyperparams.space import HyperparameterSpace 12 | from neuraxle.metaopt.auto_ml import AutoML, RandomSearchSampler 13 | from neuraxle.metaopt.callbacks import EarlyStoppingCallback, MetricCallback, ScoringCallback 14 | from neuraxle.metaopt.context import AutoMLContext 15 | from neuraxle.metaopt.data.vanilla import ScopedLocation 16 | from neuraxle.metaopt.repositories.repo import HyperparamsRepository, VanillaHyperparamsRepository 17 | from neuraxle.metaopt.validation import ValidationSplitter 18 | from neuraxle.pipeline import Pipeline 19 | from neuraxle.steps.numpy import NumpyRavel 20 | from neuraxle.steps.output_handlers import OutputTransformerWrapper 21 | from neuraxle.steps.sklearn import SKLearnWrapper 22 | from sklearn.linear_model import LogisticRegression 23 | from sklearn.metrics import accuracy_score, mean_squared_error 24 | from sklearn.preprocessing import StandardScaler 25 | from testing_neuraxle.metaopt.test_automl_repositories import CX_WITH_REPO_CTORS, TmpDir 26 | 27 | 28 | def _create_data_source(): 29 | data_inputs = np.random.random((25, 50)).astype(np.float32) 30 | expected_outputs = (np.random.random((25,)) > 0.5).astype(np.int32) 31 | return data_inputs, expected_outputs 32 | 33 | 34 | class SetNoneEO(Identity): 35 | 36 | def __init__(self): 37 | Identity.__init__(self) 38 | 39 | def _will_process(self, dact: DACT, cx: CX): 40 | dact, cx = Identity._will_process(self, dact, cx) 41 | dact = dact.with_eo(None) 42 | return dact, cx 43 | 44 | 45 | class FailingStep(NonFittableMixin, BaseStep): 46 | 47 | def __init__(self): 48 | BaseStep.__init__(self) 49 | NonFittableMixin.__init__(self) 50 | 51 | def _will_process(self, dact: DACT, cx: CX): 52 | raise ValueError("This error should be found in the logs of the test.") 53 | return dact, cx 54 | 55 | 56 | def _create_pipeline(has_failing_step=False): 57 | return Pipeline([ 58 | StandardScaler(), 59 | OutputTransformerWrapper(NumpyRavel()), 60 | SKLearnWrapper( 61 | LogisticRegression(), 62 | HyperparameterSpace({ 63 | 'C': LogUniform(0.01, 10.0), 64 | 'fit_intercept': Boolean(), 65 | 'penalty': Choice(['none', 'l2']), 66 | 'max_iter': RandInt(20, 200) 67 | }) 68 | ), 69 | FailingStep() if has_failing_step else Identity(), 70 | SetNoneEO(), 71 | ]) 72 | 73 | 74 | @pytest.mark.parametrize('cx_repo_ctor', CX_WITH_REPO_CTORS) 75 | @pytest.mark.parametrize('has_failing_step', [False, True]) 76 | def test_automl_api_entry_point(tmpdir, cx_repo_ctor: Callable[[Optional[TmpDir]], AutoMLContext], has_failing_step: bool): 77 | data_inputs, expected_outputs = _create_data_source() 78 | dact = DACT(data_inputs=data_inputs, expected_outputs=expected_outputs) 79 | pipeline = _create_pipeline(has_failing_step=has_failing_step) 80 | # TODO: # HyperbandControllerLoop(), ClusteringParallelFor() ? 81 | 82 | a: AutoML = AutoML( 83 | pipeline=pipeline, 84 | validation_splitter=ValidationSplitter(0.20), 85 | hyperparams_optimizer=RandomSearchSampler(), 86 | hyperparams_repository=VanillaHyperparamsRepository(cache_folder=os.path.join(tmpdir, "hp")), 87 | scoring_callback=ScoringCallback(mean_squared_error), 88 | callbacks=[ 89 | MetricCallback('accuracy', metric_function=accuracy_score, higher_score_is_better=False), 90 | EarlyStoppingCallback(max_epochs_without_improvement=3) 91 | ], 92 | continue_loop_on_error=True, 93 | n_trials=4, 94 | epochs=5, 95 | refit_best_trial=False, 96 | ) 97 | cx: CX = cx_repo_ctor() 98 | repo: HyperparamsRepository = cx.repo 99 | 100 | a = a.handle_fit(dact, cx) 101 | 102 | if has_failing_step: 103 | assert 'ValueError("This error should be found in the logs of the test.")' in repo.get_log_from_logging_handler( 104 | cx.logger, ScopedLocation()) 105 | else: 106 | a, _out = a.to_force_refit_best_trial().handle_fit_transform(dact, cx) 107 | assert _out is not None 108 | -------------------------------------------------------------------------------- /testing_neuraxle/metaopt/test_automl_reports.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import List 3 | 4 | import pytest 5 | from neuraxle.metaopt.data.reporting import (BaseReport, ClientReport, 6 | MetricResultsReport, 7 | ProjectReport, RoundReport, 8 | TrialReport, TrialSplitReport, 9 | dataclass_2_report) 10 | from neuraxle.metaopt.data.vanilla import BaseDataclass 11 | from testing_neuraxle.metaopt.test_automl_dataclasses import ( 12 | ALL_DATACLASSES, HYPERPARAMS_DIMS_WILDCARDS, SOME_CLIENT_DATACLASS, 13 | SOME_METRIC_NAME, SOME_PROJECT_DATACLASS, SOME_ROUND_DATACLASS, SOME_TRIAL_DATACLASS) 14 | 15 | 16 | def test_project_report_to_clients_with_best_scores_df(): 17 | pr = ProjectReport(SOME_PROJECT_DATACLASS) 18 | 19 | df = pr.to_clients_with_best_scores_df() 20 | 21 | assert ClientReport.CLIENT_ID_COLUMN_NAME in df.columns 22 | 23 | 24 | def test_client_report_to_rounds_with_best_scores_df(): 25 | cr = ClientReport(SOME_CLIENT_DATACLASS) 26 | 27 | df = cr.to_rounds_with_best_scores_df() 28 | 29 | assert RoundReport.ROUND_ID_COLUMN_NAME in df.columns 30 | 31 | 32 | def test_round_dc_to_scatterplot_df(): 33 | rr = RoundReport(SOME_ROUND_DATACLASS) 34 | 35 | df = rr.to_round_scatterplot_df(SOME_METRIC_NAME, HYPERPARAMS_DIMS_WILDCARDS) 36 | 37 | assert SOME_METRIC_NAME in df.columns 38 | assert TrialReport.TRIAL_ID_COLUMN_NAME in df.columns 39 | for d in HYPERPARAMS_DIMS_WILDCARDS: 40 | assert d in df.columns 41 | 42 | 43 | def test_round_dc_to_scores_over_time_df(): 44 | rr = RoundReport(SOME_ROUND_DATACLASS) 45 | 46 | df = rr.to_scores_over_time_df(SOME_METRIC_NAME, HYPERPARAMS_DIMS_WILDCARDS) 47 | 48 | assert SOME_METRIC_NAME in df.columns 49 | assert TrialReport.TRIAL_ID_COLUMN_NAME in df.columns 50 | assert MetricResultsReport.EPOCH_COLUMN_NAME in df.columns 51 | for d in HYPERPARAMS_DIMS_WILDCARDS: 52 | assert d in df.columns 53 | 54 | 55 | def test_round_metric_names(): 56 | rr = RoundReport(SOME_ROUND_DATACLASS) 57 | 58 | assert rr.get_metric_names() == [SOME_METRIC_NAME] 59 | 60 | 61 | @pytest.mark.parametrize("discard_singles,expected_hp_dims", ([False, HYPERPARAMS_DIMS_WILDCARDS], [True, []])) 62 | def test_round_hp_wildcards_scenario(discard_singles: bool, expected_hp_dims: List[str]): 63 | rr = RoundReport(SOME_ROUND_DATACLASS) 64 | 65 | hp_wildcards = rr.list_hyperparameters_wildcards(discard_singles=discard_singles) 66 | 67 | assert hp_wildcards == expected_hp_dims 68 | 69 | 70 | @pytest.mark.parametrize('dc', ALL_DATACLASSES[1:]) 71 | def test_reports_has_sufficient_dc_info(dc: BaseDataclass): 72 | r: dataclass_2_report[dc.__class__] = BaseReport.from_dc(dc) 73 | df = r.info_df() 74 | 75 | assert len(dc.to_dict()) - 3 == len(df.index), ( 76 | f"Dataclass dc={dc} should have rows for each attribute that isn't the " 77 | f"class name, id, or subdataclasses collections. Got df={df.to_string()}." 78 | ) 79 | 80 | 81 | def test_trial_report_to_scores_over_time_df(): 82 | tr = TrialReport(SOME_TRIAL_DATACLASS) 83 | 84 | df = tr.to_scores_over_time_df(SOME_METRIC_NAME) 85 | 86 | assert TrialSplitReport.TRIAL_SPLIT_ID_COLUMN_NAME in df.columns 87 | assert MetricResultsReport.EPOCH_COLUMN_NAME in df.columns 88 | assert MetricResultsReport.TRAIN_VAL_COLUMN_NAME in df.columns 89 | assert SOME_METRIC_NAME in df.columns 90 | -------------------------------------------------------------------------------- /testing_neuraxle/metaopt/test_automl_sequence_validation_splitter.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | from neuraxle.base import ExecutionContext as CX 5 | from neuraxle.data_container import DataContainer as DACT 6 | from neuraxle.hyperparams.distributions import RandInt 7 | from neuraxle.hyperparams.space import FlatDict, HyperparameterSpace 8 | from neuraxle.metaopt.auto_ml import AutoML 9 | from neuraxle.metaopt.callbacks import MetricCallback 10 | from neuraxle.metaopt.context import AutoMLContext 11 | from neuraxle.metaopt.data.aggregates import Round 12 | from neuraxle.metaopt.data.vanilla import ScopedLocation 13 | from neuraxle.metaopt.optimizer import (GridExplorationSampler, 14 | RandomSearchSampler) 15 | from neuraxle.metaopt.validation import (KFoldCrossValidationSplitter, 16 | ValidationSplitter) 17 | from neuraxle.pipeline import Pipeline 18 | from neuraxle.steps.numpy import MultiplyByN 19 | from sklearn.metrics import mean_squared_error 20 | 21 | 22 | def test_automl_sequence_splitter(tmpdir): 23 | # Setting seed for better reproducibility 24 | np.random.seed(68) 25 | 26 | # Given 27 | data_inputs = np.array(range(100)) 28 | expected_outputs = np.array(range(100, 200)) 29 | 30 | hyperparameter_space = HyperparameterSpace({ 31 | 'multiplication_1__multiply_by': RandInt(1, 3), 32 | 'multiplication_2__multiply_by': RandInt(1, 3), 33 | 'multiplication_3__multiply_by': RandInt(1, 3), 34 | }) 35 | 36 | pipeline = Pipeline([ 37 | ('multiplication_1', MultiplyByN()), 38 | ('multiplication_2', MultiplyByN()), 39 | ('multiplication_3', MultiplyByN()) 40 | ]).set_hyperparams_space(hyperparameter_space) 41 | 42 | auto_ml = AutoML( 43 | pipeline=pipeline, 44 | hyperparams_optimizer=RandomSearchSampler(), 45 | validation_splitter=KFoldCrossValidationSplitter(k_fold=4), 46 | callbacks=[MetricCallback("MSE", mean_squared_error, False)], 47 | ) 48 | 49 | # When 50 | auto_ml = auto_ml.handle_fit( 51 | DACT(data_inputs=data_inputs, expected_outputs=expected_outputs), CX(tmpdir)) 52 | predicted_outputs = auto_ml.transform(data_inputs) 53 | 54 | # Then 55 | actual_mse = ((predicted_outputs - expected_outputs) ** 2).mean() 56 | assert actual_mse < 20000 57 | 58 | 59 | def test_automl_validation_splitter(tmpdir): 60 | # Setting seed for reproducibility 61 | np.random.seed(75) 62 | # Given 63 | cx = AutoMLContext.from_context() 64 | data_inputs = np.array(range(1000, 1020)) 65 | expected_outputs = np.array(range(2020, 2040)) 66 | hyperparameter_space = HyperparameterSpace({ 67 | 'multiplication_1__multiply_by': RandInt(1, 3), 68 | 'multiplication_2__multiply_by': RandInt(1, 3), 69 | }) 70 | pipeline = Pipeline([ 71 | ('multiplication_1', MultiplyByN()), 72 | ('multiplication_2', MultiplyByN()), 73 | ]).set_hyperparams_space(hyperparameter_space) 74 | 75 | hp_search = AutoML( 76 | pipeline=pipeline, 77 | validation_splitter=ValidationSplitter(validation_size=0.2), 78 | scoring_callback=MetricCallback("MSE", mean_squared_error, False), 79 | hyperparams_optimizer=GridExplorationSampler(9), 80 | n_trials=8, 81 | ).with_context(cx) 82 | 83 | # When 84 | hp_search = hp_search.fit(data_inputs, expected_outputs) 85 | predicted_outputs = hp_search.transform(data_inputs) 86 | 87 | # Then 88 | optimal_mse = mean_squared_error(expected_outputs, data_inputs * 2) 89 | actual_mse = mean_squared_error(expected_outputs, predicted_outputs) 90 | assert actual_mse == optimal_mse 91 | 92 | 93 | def test_grid_exploration_sampler_can_try_everything(): 94 | hp_space = HyperparameterSpace({ 95 | 'a': RandInt(1, 3), 96 | 'b': RandInt(1, 3), 97 | 'c': RandInt(1, 3), 98 | }) 99 | max_trials = 3 * 3 * 3 100 | ges = GridExplorationSampler(max_trials) 101 | _round: Round = Round.from_context(AutoMLContext.from_context(loc=ScopedLocation.default(0))) 102 | _round.with_optimizer(ges, hp_space) 103 | 104 | for _ in range(max_trials): 105 | with _round.new_rvs_trial(): 106 | pass 107 | 108 | trials_hps: List[FlatDict] = _round.report.get_all_hyperparams(as_flat=True) 109 | unique_trials = set([tuple(r.items()) for r in trials_hps]) 110 | assert len(unique_trials) == max_trials 111 | -------------------------------------------------------------------------------- /testing_neuraxle/metaopt/test_database_repo.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | import pytest 5 | from neuraxle.metaopt.data.vanilla import (DEFAULT_CLIENT, DEFAULT_PROJECT, 6 | ClientDataclass, ProjectDataclass, 7 | RootDataclass, ScopedLocation) 8 | from neuraxle.metaopt.repositories.db import (Base, ClientNode, DataClassNode, 9 | ProjectNode, 10 | ScopedLocationTreeNode, 11 | SQLLiteHyperparamsRepository) 12 | from sqlalchemy import and_, create_engine 13 | from sqlalchemy.orm import sessionmaker 14 | from testing_neuraxle.metaopt.test_automl_dataclasses import \ 15 | SOME_PROJECT_DATACLASS 16 | 17 | 18 | def get_sqlite_session_with_root(tmpdir): 19 | sqlite_filepath = os.path.join(tmpdir, "sqlite.db") 20 | engine = create_engine(f"sqlite:///{sqlite_filepath}", echo=True, future=True) 21 | Session = sessionmaker() 22 | Session.configure(bind=engine) 23 | session = Session() 24 | Base.metadata.create_all(engine) 25 | session.commit() 26 | 27 | root_dcn = DataClassNode(RootDataclass()) 28 | root = ScopedLocationTreeNode(root_dcn, None) 29 | session.add(root) 30 | 31 | session.commit() 32 | return session, root 33 | 34 | 35 | def test_sqlalchemy_sqllite_nodes_star_shema_joins(tmpdir): 36 | session, root = get_sqlite_session_with_root(tmpdir) 37 | 38 | def_proj = ProjectNode(ProjectDataclass(project_name="def_proj")) 39 | project = ScopedLocationTreeNode(def_proj, parent=root) 40 | session.add(project) 41 | session.commit() 42 | 43 | def_client = ClientNode(ClientDataclass(client_name="def_client")) 44 | client = ScopedLocationTreeNode(def_client, parent=project) 45 | session.add(client) 46 | session.commit() 47 | 48 | session.expunge_all() 49 | q = session.query( 50 | ScopedLocationTreeNode.project_name, ScopedLocationTreeNode.client_name 51 | ) 52 | assert q[0] == (None, None) 53 | assert q[1] == ("def_proj", None) 54 | assert q[2] == ("def_proj", "def_client") 55 | 56 | 57 | def test_root_db_node_can_be_queried(tmpdir): 58 | session = get_sqlite_session_with_root(tmpdir)[0] 59 | 60 | root_tree_node = session.query(ScopedLocationTreeNode).filter( 61 | and_(*[ 62 | getattr(ScopedLocationTreeNode, attr) == None 63 | for attr in ScopedLocation.__dataclass_fields__ 64 | ]) 65 | ).one() 66 | 67 | assert root_tree_node.project_name is None 68 | assert root_tree_node.client_name is None 69 | assert root_tree_node.round_number is None 70 | 71 | 72 | @pytest.mark.parametrize("deep", [True, False]) 73 | def test_can_use_sqlite_db_repo_to_save_and_load_and_overwrite_simple_project(tmpdir, deep): 74 | repo = SQLLiteHyperparamsRepository(tmpdir) 75 | project: ProjectDataclass = SOME_PROJECT_DATACLASS 76 | project_loc = ScopedLocation.default().at_dc(project) 77 | 78 | repo.save(project, project_loc, deep=deep) 79 | project_reloaded = repo.load(project_loc, deep=deep) 80 | repo.save(project_reloaded, project_loc, deep=deep) 81 | -------------------------------------------------------------------------------- /testing_neuraxle/metaopt/test_validation_splitter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from neuraxle.base import ExecutionContext as CX 3 | from neuraxle.data_container import DataContainer as DACT 4 | from neuraxle.hyperparams.space import HyperparameterSpace 5 | from neuraxle.metaopt.auto_ml import Trainer 6 | from neuraxle.metaopt.callbacks import MetricCallback 7 | from neuraxle.metaopt.data.aggregates import Round 8 | from neuraxle.metaopt.optimizer import GridExplorationSampler 9 | from neuraxle.metaopt.validation import ValidationSplitter 10 | from neuraxle.steps.misc import FitTransformCallbackStep, TapeCallbackFunction 11 | from sklearn.metrics import mean_squared_error 12 | 13 | 14 | def test_validation_splitter_handler_methods_should_split_data(tmpdir): 15 | transform_callback = TapeCallbackFunction() 16 | fit_callback = TapeCallbackFunction() 17 | pipeline = FitTransformCallbackStep( 18 | transform_callback_function=transform_callback, 19 | fit_callback_function=fit_callback, 20 | transform_function=lambda di: di * 2 21 | ) 22 | metric: MetricCallback = MetricCallback("MSE", mean_squared_error, False) 23 | validation_split_wrapper = Trainer( 24 | callbacks=[metric], 25 | validation_splitter=ValidationSplitter(validation_size=0.1), 26 | n_epochs=1, 27 | ) 28 | 29 | data_inputs = np.random.randint(low=1, high=100, size=(100, 5)) 30 | expected_outputs = np.random.randint(low=1, high=100, size=(100, 5)) 31 | dact = DACT(di=data_inputs, eo=expected_outputs) 32 | 33 | round_scope: Round = Round.dummy().with_metric(metric.name).save(deep=False) 34 | with round_scope.with_optimizer(GridExplorationSampler(), HyperparameterSpace()).new_rvs_trial() as trial_scope: 35 | trained_pipeline: FitTransformCallbackStep = validation_split_wrapper.train( 36 | pipeline, dact, trial_scope, return_trained_pipelines=True)[0] 37 | 38 | predicted_outputs = trained_pipeline.predict(data_inputs) 39 | fit_callback = trained_pipeline.fit_callback_function 40 | transform_callback = trained_pipeline.transform_callback_function 41 | 42 | assert np.array_equal(predicted_outputs, data_inputs * 2) 43 | 44 | # should fit on train split 45 | assert np.array_equal(fit_callback.data[0][0], data_inputs[0:90]) 46 | assert np.array_equal(fit_callback.data[0][1], expected_outputs[0:90]) 47 | 48 | # should transform on test split 49 | assert np.array_equal(transform_callback.data[0], data_inputs[0:90]) 50 | assert np.array_equal(transform_callback.data[1], data_inputs[90:]) 51 | 52 | # should predict on all data at the end 53 | assert np.array_equal(transform_callback.data[2], data_inputs) 54 | 55 | with round_scope.last_trial() as trial_scope: 56 | assert trial_scope.get_avg_validation_score(metric.name) is not None 57 | -------------------------------------------------------------------------------- /testing_neuraxle/mocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/mocks/__init__.py -------------------------------------------------------------------------------- /testing_neuraxle/mocks/step_mocks.py: -------------------------------------------------------------------------------- 1 | from neuraxle.base import BaseStep, TruncableSteps, MetaStep, BaseTransformer 2 | from neuraxle.hyperparams.distributions import LogUniform, Quantized, RandInt, Boolean 3 | from neuraxle.hyperparams.space import HyperparameterSpace, HyperparameterSamples 4 | 5 | HYPERPARAMETERS_SPACE = HyperparameterSpace({ 6 | 'learning_rate': LogUniform(0.0001, 0.1), 7 | 'l2_weight_reg': LogUniform(0.0001, 0.1), 8 | 'momentum': LogUniform(0.01, 1.0), 9 | 'hidden_size': Quantized(LogUniform(16, 512)), 10 | 'num_layers': RandInt(1, 4), 11 | 'num_lstm_layers': RandInt(1, 2), 12 | 'use_xavier_init': Boolean(), 13 | 'use_max_pool_else_avg_pool': Boolean(), 14 | 'dropout_drop_proba': LogUniform(0.3, 0.7) 15 | }) 16 | 17 | HYPERPARAMETERS = HyperparameterSamples({ 18 | 'learning_rate': 0.1, 19 | 'l2_weight_reg': 0.001, 20 | 'hidden_size': 32, 21 | 'num_layers': 3, 22 | 'num_lstm_layers': 1, 23 | 'use_xavier_init': True, 24 | 'use_max_pool_else_avg_pool': True, 25 | 'dropout_drop_proba': 0.5, 26 | 'momentum': 0.1 27 | }) 28 | 29 | AN_INPUT = "I am an input" 30 | AN_EXPECTED_OUTPUT = "I am an expected output" 31 | 32 | 33 | class SomeStep(BaseTransformer): 34 | def __init__(self, hyperparams_space: HyperparameterSpace = None, output=AN_EXPECTED_OUTPUT): 35 | super().__init__(hyperparams=None, hyperparams_space=hyperparams_space) 36 | self.output = output 37 | 38 | def transform(self, data_inputs): 39 | return [self.output] * len(data_inputs) 40 | 41 | 42 | class SomeStepWithHyperparams(BaseStep): 43 | def __init__(self): 44 | super().__init__( 45 | hyperparams=HYPERPARAMETERS, 46 | hyperparams_space=HYPERPARAMETERS_SPACE, 47 | name="MockStep" 48 | ) 49 | 50 | def transform(self, data_inputs): 51 | pass 52 | 53 | def fit(self, data_inputs, expected_outputs=None): 54 | pass 55 | 56 | 57 | class SomeMetaStepWithHyperparams(MetaStep): 58 | def __init__(self): 59 | MetaStep.__init__(self, wrapped=SomeStepWithHyperparams()) 60 | 61 | def transform(self, data_inputs): 62 | pass 63 | 64 | def fit(self, data_inputs, expected_outputs=None): 65 | pass 66 | 67 | 68 | class SomeTruncableStep(TruncableSteps): 69 | def __init__(self): 70 | TruncableSteps.__init__(self, 71 | hyperparams=HYPERPARAMETERS, 72 | hyperparams_space=HYPERPARAMETERS_SPACE, 73 | steps_as_tuple=(SomeStepWithHyperparams(), SomeStepWithHyperparams()) 74 | ) 75 | 76 | def transform(self, data_inputs): 77 | pass 78 | 79 | def fit(self, data_inputs, expected_outputs=None): 80 | pass 81 | 82 | 83 | class SomeSplitStep(BaseStep): 84 | def transform(self, data_inputs): 85 | pass 86 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/steps/__init__.py -------------------------------------------------------------------------------- /testing_neuraxle/steps/neuraxle_test_case.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neuraxle Test Case Class 3 | ======================================== 4 | 5 | .. 6 | Copyright 2019, Neuraxio Inc. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | """ 21 | import numpy as np 22 | 23 | from neuraxle.base import ExecutionMode 24 | 25 | 26 | class NeuraxleTestCase: 27 | def __init__( 28 | self, 29 | pipeline, 30 | callbacks, 31 | expected_callbacks_data, 32 | hyperparams_space=None, 33 | hyperparams=None, 34 | expected_processed_outputs=None, 35 | execution_mode=None, 36 | more_arguments=None, 37 | data_inputs=None, 38 | expected_outputs=None 39 | ): 40 | self.expected_outputs = expected_outputs 41 | self.data_inputs = data_inputs 42 | self.execution_mode = execution_mode 43 | self.pipeline = pipeline 44 | self.callbacks = callbacks 45 | self.expected_callbacks_data = expected_callbacks_data 46 | self.hyperparams = hyperparams 47 | self.hyperparams_space = hyperparams_space 48 | self.expected_processed_outputs = expected_processed_outputs 49 | self.more_arguments = more_arguments 50 | 51 | def assert_callback_data_is_as_expected(self): 52 | for callback, expected_callback_data in zip(self.callbacks, self.expected_callbacks_data): 53 | if len(callback.data) > 0: 54 | if isinstance(callback.data[0], tuple): 55 | for (expected_di, expected_eo), (actual_di, actual_eo) in zip(expected_callback_data, callback.data): 56 | assert np.array_equal(expected_di, actual_di) 57 | assert np.array_equal(expected_eo, actual_eo) 58 | else: 59 | assert np.array_equal( 60 | np.array(callback.data), 61 | expected_callback_data 62 | ) 63 | else: 64 | assert np.array_equal( 65 | np.array([]), 66 | np.array(expected_callback_data) 67 | ) 68 | 69 | def assert_expected_processed_outputs(self, processed_outputs): 70 | if self.execution_mode != ExecutionMode.FIT: 71 | assert np.array_equal(processed_outputs, self.expected_processed_outputs) 72 | 73 | def execute(self): 74 | for c in self.callbacks: 75 | c.reset() 76 | 77 | processed_outputs = None 78 | if self.execution_mode == ExecutionMode.TRANSFORM: 79 | processed_outputs = self.pipeline.transform(self.data_inputs) 80 | if self.execution_mode == ExecutionMode.FIT_TRANSFORM: 81 | self.pipeline, processed_outputs = self.pipeline.fit_transform(self.data_inputs, self.expected_outputs) 82 | if self.execution_mode == ExecutionMode.FIT: 83 | self.pipeline = self.pipeline.fit(self.data_inputs, self.expected_outputs) 84 | 85 | return processed_outputs 86 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_assertion_steps.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | import numpy as np 4 | import pytest 5 | from neuraxle.base import AssertExpectedOutputIsNone, BaseStep 6 | from neuraxle.base import ExecutionContext as CX 7 | from neuraxle.base import ExecutionPhase, HandleOnlyMixin, NonFittableMixin 8 | from neuraxle.data_container import DataContainer as DACT 9 | from neuraxle.pipeline import Pipeline 10 | 11 | 12 | class SomeAssertionStep(NonFittableMixin, HandleOnlyMixin, BaseStep): 13 | def __init__(self): 14 | BaseStep.__init__(self) 15 | HandleOnlyMixin.__init__(self) 16 | 17 | def _transform_data_container(self, data_container: DACT, context: CX) -> DACT: 18 | _, data_inputs, expected_outputs = data_container.tolist().unpack() 19 | if expected_outputs is not None: 20 | self._assert_equals(data_inputs, expected_outputs, "Assertion failed", context) 21 | return data_inputs 22 | 23 | 24 | class TestAssertionMethodInSteps(TestCase): 25 | 26 | def test_assertion_step_logs_and_raises_with_pipeline(self): 27 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 28 | expected_outputs = data_inputs * 2 29 | dact = DACT(data_inputs, None, expected_outputs) 30 | p = Pipeline([SomeAssertionStep()]) 31 | 32 | with self.assertLogs() as captured: 33 | with pytest.raises(AssertionError): 34 | p.handle_fit_transform(dact, context=CX()) 35 | 36 | self.assertIn("Assertion failed", captured.output[0]) 37 | 38 | def test_assertion_step_just_logs_with_pipeline_in_prod(self): 39 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 40 | expected_outputs = data_inputs * 2 41 | dact = DACT(data_inputs, None, expected_outputs) 42 | p = Pipeline([SomeAssertionStep()]) 43 | context = CX(execution_phase=ExecutionPhase.PROD) 44 | try: 45 | p = p.handle_fit(dact, context=context) 46 | except AssertionError: 47 | pass 48 | 49 | with self.assertLogs() as captured: 50 | p.handle_predict(dact, context=context) 51 | 52 | # assert that the log still at least contains the expected message: 53 | self.assertIn("Assertion failed", captured.output[0]) 54 | 55 | 56 | def test_expectedoutputnull_raise_exception_when_notnull(tmpdir): 57 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 58 | expected_outputs = data_inputs * 2 59 | 60 | p = Pipeline([AssertExpectedOutputIsNone()]) 61 | 62 | with pytest.raises(AssertionError) as error_info: 63 | p.fit_transform(data_inputs, expected_outputs) 64 | 65 | 66 | def test_expectedoutputnull_is_fine_when_null(tmpdir): 67 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 68 | expected_outputs = None 69 | 70 | p = Pipeline([AssertExpectedOutputIsNone()]) 71 | p.fit_transform(data_inputs, expected_outputs) 72 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_column_selector_2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from neuraxle.steps.column_transformer import ColumnSelector2D, ColumnsSelectorND, NumpyColumnSelector2D 5 | 6 | 7 | @pytest.mark.parametrize('column_selector_2d_class', [ColumnSelector2D, NumpyColumnSelector2D]) 8 | def test_column_selector_2d_should_select_range(column_selector_2d_class): 9 | step = column_selector_2d_class(range(0, 10)) 10 | data_inputs, expected_outputs = _create_data_source((20, 20)) 11 | 12 | outputs = step.transform(data_inputs) 13 | 14 | assert np.array_equal(outputs, data_inputs[..., :10]) 15 | 16 | 17 | @pytest.mark.parametrize('column_selector_2d_class', [ColumnSelector2D, NumpyColumnSelector2D]) 18 | def test_column_selector_2d_should_select_int(column_selector_2d_class): 19 | step = column_selector_2d_class(10) 20 | data_inputs, expected_outputs = _create_data_source((20, 20)) 21 | 22 | outputs = step.transform(data_inputs) 23 | 24 | expected_data_inputs = np.expand_dims(data_inputs[..., 10], axis=-1) 25 | assert np.array_equal(outputs, expected_data_inputs) 26 | 27 | 28 | @pytest.mark.parametrize('column_selector_2d_class', [ColumnSelector2D, NumpyColumnSelector2D]) 29 | def test_column_selector_2d_should_select_slice(column_selector_2d_class): 30 | step = column_selector_2d_class(slice(0, 10, 1)) 31 | data_inputs, expected_outputs = _create_data_source((20, 20)) 32 | 33 | outputs = step.transform(data_inputs) 34 | 35 | assert np.array_equal(outputs, data_inputs[..., :10]) 36 | 37 | 38 | @pytest.mark.parametrize('column_selector_2d_class', [ColumnSelector2D, NumpyColumnSelector2D]) 39 | def test_column_selector_2d_should_select_list_of_indexes(column_selector_2d_class): 40 | step = column_selector_2d_class([0, 1, 2]) 41 | data_inputs, expected_outputs = _create_data_source((20, 20)) 42 | 43 | outputs = step.transform(data_inputs) 44 | 45 | assert np.array_equal(outputs, data_inputs[..., :3]) 46 | 47 | 48 | def test_column_selector_nd_should_transform_with_column_selector_2d(): 49 | step = ColumnsSelectorND(0, n_dimension=2) 50 | data_inputs, expected_outputs = _create_data_source((20, 20)) 51 | 52 | outputs = step.transform(data_inputs) 53 | 54 | assert np.array_equal(outputs, np.expand_dims(data_inputs[..., 0], axis=-1)) 55 | 56 | 57 | def _create_data_source(shape): 58 | data_inputs = np.random.random(shape).astype(np.float32) 59 | expected_outputs = np.random.random(shape).astype(np.float32) 60 | return data_inputs, expected_outputs 61 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_concatenate_data_container.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from neuraxle.base import ExecutionContext as CX 4 | from neuraxle.data_container import DataContainer as DACT 5 | from neuraxle.pipeline import Pipeline 6 | from neuraxle.steps.data import InnerConcatenateDataContainer, ZipBatchDataContainer 7 | 8 | TIMESTEPS = 10 9 | FEATURES = 5 10 | VALIDATION_SIZE = 0.1 11 | BATCH_SIZE = 32 12 | N_EPOCHS = 10 13 | SHAPE_3D = (BATCH_SIZE, TIMESTEPS, FEATURES) 14 | SHAPE_2D = (BATCH_SIZE, TIMESTEPS) 15 | SHAPE_1D = BATCH_SIZE 16 | 17 | 18 | def test_inner_concatenate_data_should_merge_3d_with_3d(): 19 | # Given 20 | data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D) 21 | data_inputs_3d_second, expected_outputs_3d_second = _create_data_source(SHAPE_3D) 22 | data_container_3d_second = DACT(data_inputs=data_inputs_3d_second, 23 | expected_outputs=expected_outputs_3d_second) 24 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \ 25 | .add_sub_data_container('2d', data_container_3d_second) 26 | 27 | # When 28 | p = Pipeline([ 29 | InnerConcatenateDataContainer(sub_data_container_names=['2d']) 30 | ]) 31 | 32 | data_container = p.handle_transform(data_container, CX()) 33 | 34 | # Then 35 | assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] * 2) 36 | assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] * 2) 37 | assert np.array_equal(data_container.data_inputs[..., -SHAPE_3D[2]:], data_container_3d_second.data_inputs) 38 | assert np.array_equal(data_container.expected_outputs[..., -SHAPE_3D[2]:], 39 | data_container_3d_second.expected_outputs) 40 | 41 | 42 | def test_inner_concatenate_data_should_merge_2d_with_3d(): 43 | # Given 44 | data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D) 45 | data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D) 46 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) 47 | data_container_3d = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \ 48 | .add_sub_data_container('2d', data_container_2d) 49 | 50 | # When 51 | p = Pipeline([ 52 | InnerConcatenateDataContainer(sub_data_container_names=['2d']) 53 | ]) 54 | 55 | data_container_3d = p.handle_transform(data_container_3d, CX()) 56 | 57 | # Then 58 | assert data_container_3d.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1) 59 | assert data_container_3d.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1) 60 | assert np.array_equal(data_container_3d.data_inputs[..., -1], data_container_2d.data_inputs) 61 | assert np.array_equal(data_container_3d.expected_outputs[..., -1], data_container_2d.expected_outputs) 62 | 63 | 64 | def test_inner_concatenate_data_should_merge_1d_with_3d(): 65 | # Given 66 | data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D) 67 | data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D) 68 | data_container_1d = DACT(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d) 69 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \ 70 | .add_sub_data_container('1d', data_container_1d) 71 | 72 | # When 73 | p = Pipeline([ 74 | InnerConcatenateDataContainer(sub_data_container_names=['1d']) 75 | ]) 76 | 77 | data_container = p.handle_transform(data_container, CX()) 78 | 79 | # Then 80 | broadcasted_data_inputs_1d = np.broadcast_to(np.expand_dims(data_container_1d.data_inputs, axis=-1), 81 | shape=(SHAPE_3D[0], SHAPE_3D[1])) 82 | broadcasted_expected_outputs_1d = np.broadcast_to(np.expand_dims(data_container_1d.expected_outputs, axis=-1), 83 | shape=(SHAPE_3D[0], SHAPE_3D[1])) 84 | 85 | assert np.array_equal(data_container.data_inputs[..., -1], broadcasted_data_inputs_1d) 86 | assert np.array_equal(data_container.expected_outputs[..., -1], broadcasted_expected_outputs_1d) 87 | 88 | assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1) 89 | assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1) 90 | 91 | 92 | def test_inner_concatenate_data_should_merge_1d_with_2d(): 93 | # Given 94 | data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D) 95 | data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D) 96 | data_container_1d = DACT(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d) 97 | data_container = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) \ 98 | .add_sub_data_container('1d', data_container_1d) 99 | 100 | # When 101 | p = Pipeline([ 102 | InnerConcatenateDataContainer(sub_data_container_names=['1d']) 103 | ]) 104 | 105 | data_container = p.handle_transform(data_container, CX()) 106 | 107 | # Then 108 | assert data_container.data_inputs.shape == (SHAPE_2D[0], SHAPE_2D[1] + 1) 109 | assert data_container.expected_outputs.shape == (SHAPE_2D[0], SHAPE_2D[1] + 1) 110 | assert np.array_equal(data_container.data_inputs[..., -1], data_container_1d.data_inputs) 111 | assert np.array_equal(data_container.expected_outputs[..., -1], data_container_1d.expected_outputs) 112 | 113 | 114 | def test_outer_concatenate_data_should_merge_2d_with_3d(): 115 | # Given 116 | data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D) 117 | data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D) 118 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) 119 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \ 120 | .add_sub_data_container('2d', data_container_2d) 121 | 122 | # When 123 | p = Pipeline([ 124 | ZipBatchDataContainer(sub_data_container_names=['2d']) 125 | ]) 126 | 127 | data_container = p.handle_transform(data_container, CX()) 128 | 129 | # Then 130 | for i, (first_di, second_di) in enumerate(zip(data_inputs_3d, data_inputs_2d)): 131 | assert np.array_equal(data_container.data_inputs[i][0], first_di) 132 | assert np.array_equal(data_container.data_inputs[i][1], second_di) 133 | 134 | for i, (first_eo, second_eo) in enumerate(zip(expected_outputs_3d, expected_outputs_2d)): 135 | assert np.array_equal(data_container.expected_outputs[i][0], first_eo) 136 | assert np.array_equal(data_container.expected_outputs[i][1], second_eo) 137 | 138 | 139 | def _create_data_source(shape): 140 | data_inputs = np.random.random(shape).astype(np.float32) 141 | expected_outputs = np.random.random(shape).astype(np.float32) 142 | return data_inputs, expected_outputs 143 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_data_shuffling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from neuraxle.pipeline import Pipeline 4 | from neuraxle.steps.data import DataShuffler 5 | from neuraxle.steps.misc import TapeCallbackFunction, FitTransformCallbackStep 6 | 7 | 8 | def test_data_shuffling_should_shuffle_data_inputs_and_expected_outputs(): 9 | callback_fit = TapeCallbackFunction() 10 | callback_transform = TapeCallbackFunction() 11 | data_shuffler = Pipeline([ 12 | DataShuffler(seed=42, increment_seed_after_each_fit=True), 13 | FitTransformCallbackStep(callback_transform, callback_fit) 14 | ]) 15 | data_inputs = np.array(range(10)) 16 | expected_outputs = np.array(range(10, 20)) 17 | 18 | outputs = data_shuffler.fit_transform(data_inputs, expected_outputs) 19 | 20 | assert not np.array_equal(outputs, data_inputs) 21 | assert not np.array_equal(callback_fit.data[0][0], data_inputs) 22 | assert not np.array_equal(callback_fit.data[0][1], expected_outputs) 23 | assert not np.array_equal(callback_transform.data, data_inputs) 24 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_epochs_repeater.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from neuraxle.base import ExecutionMode 5 | from neuraxle.pipeline import Pipeline 6 | from neuraxle.steps.data import EpochRepeater 7 | from neuraxle.steps.misc import TapeCallbackFunction, FitTransformCallbackStep 8 | from testing_neuraxle.steps.neuraxle_test_case import NeuraxleTestCase 9 | 10 | DATA_INPUTS = np.array(range(10)) 11 | EXPECTED_OUTPUTS = np.array(range(10, 20)) 12 | 13 | callback_fit = TapeCallbackFunction() 14 | callback_transform = TapeCallbackFunction() 15 | EPOCHS = 2 16 | 17 | 18 | @pytest.mark.parametrize("test_case", [ 19 | NeuraxleTestCase( 20 | pipeline=Pipeline([ 21 | EpochRepeater(FitTransformCallbackStep(callback_transform, callback_fit), epochs=EPOCHS) 22 | ]), 23 | callbacks=[callback_fit, callback_transform], 24 | expected_callbacks_data=[ 25 | [(DATA_INPUTS, EXPECTED_OUTPUTS), (DATA_INPUTS, EXPECTED_OUTPUTS)], 26 | [DATA_INPUTS] 27 | ], 28 | data_inputs=DATA_INPUTS, 29 | expected_outputs=EXPECTED_OUTPUTS, 30 | expected_processed_outputs=DATA_INPUTS, 31 | execution_mode=ExecutionMode.FIT_TRANSFORM 32 | ), 33 | NeuraxleTestCase( 34 | pipeline=Pipeline([ 35 | EpochRepeater(FitTransformCallbackStep(callback_transform, callback_fit), epochs=EPOCHS) 36 | ]), 37 | callbacks=[callback_fit, callback_transform], 38 | expected_callbacks_data=[ 39 | [], 40 | [DATA_INPUTS] 41 | ], 42 | data_inputs=DATA_INPUTS, 43 | expected_outputs=EXPECTED_OUTPUTS, 44 | expected_processed_outputs=DATA_INPUTS, 45 | execution_mode=ExecutionMode.TRANSFORM 46 | ), 47 | NeuraxleTestCase( 48 | pipeline=Pipeline([ 49 | EpochRepeater(FitTransformCallbackStep(callback_transform, callback_fit), epochs=EPOCHS) 50 | ]).set_train(False), 51 | callbacks=[callback_fit, callback_transform], 52 | expected_callbacks_data=[ 53 | [], 54 | [DATA_INPUTS] 55 | ], 56 | data_inputs=DATA_INPUTS, 57 | expected_outputs=EXPECTED_OUTPUTS, 58 | expected_processed_outputs=DATA_INPUTS, 59 | execution_mode=ExecutionMode.TRANSFORM 60 | ), 61 | NeuraxleTestCase( 62 | pipeline=Pipeline([ 63 | EpochRepeater(FitTransformCallbackStep(callback_transform, callback_fit), epochs=EPOCHS) 64 | ]), 65 | callbacks=[callback_fit, callback_transform], 66 | expected_callbacks_data=[ 67 | [(DATA_INPUTS, EXPECTED_OUTPUTS), (DATA_INPUTS, EXPECTED_OUTPUTS)], 68 | [] 69 | ], 70 | data_inputs=DATA_INPUTS, 71 | expected_outputs=EXPECTED_OUTPUTS, 72 | execution_mode=ExecutionMode.FIT 73 | ) 74 | ]) 75 | def test_epoch_repeater(test_case): 76 | processed_outputs = test_case.execute() 77 | 78 | test_case.assert_expected_processed_outputs(processed_outputs) 79 | test_case.assert_callback_data_is_as_expected() 80 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_expand_dim.py: -------------------------------------------------------------------------------- 1 | from typing import List, Iterable 2 | 3 | import numpy as np 4 | 5 | from neuraxle.hyperparams.space import HyperparameterSamples 6 | from neuraxle.pipeline import Pipeline 7 | from neuraxle.steps.flow import ExpandDim 8 | from neuraxle.steps.misc import HandleCallbackStep, TapeCallbackFunction 9 | 10 | 11 | def test_expand_dim_transform(): 12 | di = np.array(range(10)) 13 | eo = None 14 | fit_callback, transform_callback, fit_transform_callback = ( 15 | TapeCallbackFunction(), TapeCallbackFunction(), TapeCallbackFunction()) 16 | p = Pipeline([ 17 | ExpandDim( 18 | HandleCallbackStep(fit_callback, transform_callback, fit_transform_callback) 19 | ) 20 | ]) 21 | 22 | outputs = p.transform(di) 23 | 24 | assert np.array_equal(outputs, di) 25 | assert fit_callback.data == [] 26 | assert np.array_equal( 27 | np.array(transform_callback.data[0][0].di), 28 | np.array([di]) 29 | ) 30 | assert np.array_equal( 31 | np.array(transform_callback.data[0][0].eo), 32 | np.array([eo]) 33 | ) 34 | assert fit_transform_callback.data == [] 35 | 36 | 37 | def test_expand_dim_fit(): 38 | handle_fit_callback = TapeCallbackFunction() 39 | handle_transform_callback = TapeCallbackFunction() 40 | handle_fit_transform_callback = TapeCallbackFunction() 41 | p = Pipeline([ 42 | ExpandDim( 43 | HandleCallbackStep( 44 | handle_fit_callback, 45 | handle_transform_callback, 46 | handle_fit_transform_callback 47 | ) 48 | ) 49 | ]) 50 | 51 | p = p.fit(np.array(range(10)), np.array(range(10))) 52 | 53 | assert handle_transform_callback.data == [] 54 | assert handle_fit_transform_callback.data == [] 55 | assert np.array_equal( 56 | np.array(handle_fit_callback.data[0][0].data_inputs), 57 | np.array([np.array(range(10))]) 58 | ) 59 | assert np.array_equal( 60 | np.array(handle_fit_callback.data[0][0].expected_outputs), 61 | np.array([np.array(range(10))]) 62 | ) 63 | 64 | 65 | def test_expand_dim_fit_transform(): 66 | handle_fit_callback = TapeCallbackFunction() 67 | handle_transform_callback = TapeCallbackFunction() 68 | handle_fit_transform_callback = TapeCallbackFunction() 69 | p = Pipeline([ 70 | ExpandDim( 71 | HandleCallbackStep( 72 | handle_fit_callback, 73 | handle_transform_callback, 74 | handle_fit_transform_callback 75 | ) 76 | ) 77 | ]) 78 | 79 | p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10))) 80 | 81 | assert np.array_equal(outputs, np.array(range(10))) 82 | assert handle_transform_callback.data == [] 83 | assert handle_fit_callback.data == [] 84 | assert np.array_equal( 85 | np.array(handle_fit_transform_callback.data[0][0].data_inputs), 86 | np.array([np.array(range(10))]) 87 | ) 88 | assert np.array_equal( 89 | np.array(handle_fit_transform_callback.data[0][0].expected_outputs), 90 | np.array([np.array(range(10))]) 91 | ) 92 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_features.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for features steps 3 | ======================================== 4 | 5 | .. 6 | Copyright 2019, Neuraxio Inc. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | """ 21 | import pytest 22 | 23 | from neuraxle.hyperparams.space import HyperparameterSamples 24 | from neuraxle.steps.features import Cheap3DTo2DTransformer, FFTPeakBinWithValue 25 | import numpy as np 26 | 27 | 28 | def test_fft_peak_bin_with_values(): 29 | data_inputs = np.random.random((4, 5, 2)) 30 | step = FFTPeakBinWithValue() 31 | 32 | outputs = step.transform(data_inputs) 33 | 34 | assert outputs.shape == (4, 4) 35 | 36 | 37 | @pytest.mark.parametrize("hyperparams, expected_feature_count", [ 38 | (HyperparameterSamples({ 39 | 'FFT__enabled': True, 40 | 'NumpyMean__enabled': True, 41 | 'NumpyMedian__enabled': True, 42 | 'NumpyMin__enabled': True, 43 | 'NumpyMax__enabled': True 44 | }), 18), 45 | (HyperparameterSamples({ 46 | 'FFT__enabled': False, 47 | 'NumpyMean__enabled': True, 48 | 'NumpyMedian__enabled': True, 49 | 'NumpyMin__enabled': True, 50 | 'NumpyMax__enabled': True 51 | }), 8), 52 | (HyperparameterSamples({ 53 | 'FFT__enabled': True, 54 | 'NumpyMean__enabled': False, 55 | 'NumpyMedian__enabled': True, 56 | 'NumpyMin__enabled': True, 57 | 'NumpyMax__enabled': True 58 | }), 16), 59 | (HyperparameterSamples({ 60 | 'FFT__enabled': True, 61 | 'NumpyMean__enabled': True, 62 | 'NumpyMedian__enabled': False, 63 | 'NumpyMin__enabled': True, 64 | 'NumpyMax__enabled': True 65 | }), 16), 66 | (HyperparameterSamples({ 67 | 'FFT__enabled': True, 68 | 'NumpyMean__enabled': True, 69 | 'NumpyMedian__enabled': True, 70 | 'NumpyMin__enabled': False, 71 | 'NumpyMax__enabled': True 72 | }), 16), 73 | (HyperparameterSamples({ 74 | 'FFT__enabled': True, 75 | 'NumpyMean__enabled': True, 76 | 'NumpyMedian__enabled': True, 77 | 'NumpyMin__enabled': True, 78 | 'NumpyMax__enabled': False 79 | }), 16) 80 | ]) 81 | def test_cheap_3D_to_2D_transformer(hyperparams: HyperparameterSamples, expected_feature_count: int): 82 | step = Cheap3DTo2DTransformer() 83 | step.set_hyperparams(hyperparams=hyperparams) 84 | data_inputs = np.random.random((7, 5, 2)) 85 | 86 | outputs = step.transform(data_inputs) 87 | 88 | assert outputs.shape == (7, expected_feature_count) 89 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_flatten_for_each.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from neuraxle.base import ExecutionContext as CX 4 | from neuraxle.data_container import DataContainer as DACT 5 | from neuraxle.pipeline import Pipeline 6 | from neuraxle.steps.loop import FlattenForEach 7 | from neuraxle.steps.numpy import MultiplyByN 8 | from neuraxle.steps.output_handlers import OutputTransformerWrapper 9 | 10 | DATA_SHAPE = (3, 4) 11 | FLAT_DATA_SHAPE = (3 * 4, ) 12 | 13 | 14 | def test_flatten_for_each_unflatten_should_transform_data_inputs(): 15 | p = FlattenForEach(MultiplyByN(2), then_unflatten=True) 16 | data_inputs, _ = _create_random_of_shape(DATA_SHAPE) 17 | 18 | outputs = p.transform(data_inputs) 19 | 20 | assert np.array(outputs).shape == DATA_SHAPE 21 | assert np.array_equal(outputs, data_inputs * 2) 22 | 23 | 24 | def test_flatten_for_each_should_transform_data_inputs(): 25 | p = FlattenForEach(MultiplyByN(2), then_unflatten=False) 26 | data_inputs, _ = _create_random_of_shape(DATA_SHAPE) 27 | 28 | outputs = p.transform(data_inputs) 29 | 30 | assert np.array(outputs).shape == FLAT_DATA_SHAPE 31 | assert np.array_equal(outputs.flatten(), data_inputs.flatten() * 2) 32 | 33 | 34 | def test_flatten_for_each_should_transform_data_inputs_and_expected_outputs(): 35 | p = FlattenForEach(Pipeline([ 36 | MultiplyByN(2), 37 | OutputTransformerWrapper(MultiplyByN(3)) 38 | ])) 39 | # TODO: should use a tape here and ensure that the MultiplyByN received a flat 12 shape only once and not 3*4 things 40 | data_inputs, expected_outputs = _create_random_of_shape(DATA_SHAPE) 41 | 42 | p, outputs = p.handle_fit_transform( 43 | DACT(data_inputs=data_inputs, expected_outputs=expected_outputs), CX()) 44 | 45 | assert np.array(outputs.data_inputs).shape == DATA_SHAPE 46 | assert np.array_equal(outputs.data_inputs, data_inputs * 2) 47 | assert np.array(outputs.expected_outputs).shape == DATA_SHAPE 48 | assert np.array_equal(outputs.expected_outputs, expected_outputs * 3) 49 | 50 | 51 | def _create_random_of_shape(shape): 52 | data_inputs = np.random.random(shape).astype(np.float32) 53 | expected_outputs = np.random.random(shape).astype(np.float32) 54 | return data_inputs, expected_outputs 55 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_for_each.py: -------------------------------------------------------------------------------- 1 | from neuraxle.pipeline import Pipeline 2 | from neuraxle.steps.loop import ForEach 3 | from neuraxle.steps.misc import TransformCallbackStep, TapeCallbackFunction, FitCallbackStep, \ 4 | FitTransformCallbackStep 5 | 6 | 7 | def test_fit_for_each_should_fit_all_steps_for_each_data_inputs_expected_outputs(): 8 | tape = TapeCallbackFunction() 9 | p = Pipeline([ 10 | ForEach(Pipeline([ 11 | FitCallbackStep(tape.callback, ["1"]), 12 | FitCallbackStep(tape.callback, ["2"]), 13 | ])) 14 | ]) 15 | data_inputs = [[0, 1], [1, 2]] 16 | expected_outputs = [[2, 3], [4, 5]] 17 | 18 | p = p.fit(data_inputs, expected_outputs) 19 | 20 | assert isinstance(p, Pipeline) 21 | assert tape.get_name_tape() == ["1", "2", "1", "2"] 22 | assert tape.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])] 23 | 24 | 25 | def test_fit_transform_should_fit_transform_all_steps_for_each_data_inputs_expected_outputs(): 26 | tape = TapeCallbackFunction() 27 | tape_fit = TapeCallbackFunction() 28 | p = Pipeline([ 29 | ForEach(Pipeline([ 30 | FitTransformCallbackStep(tape.callback, tape_fit, ["1"]), 31 | FitTransformCallbackStep(tape.callback, tape_fit, ["2"]), 32 | ])) 33 | ]) 34 | data_inputs = [[0, 1], [1, 2]] 35 | expected_outputs = [[2, 3], [4, 5]] 36 | 37 | p, outputs = p.fit_transform(data_inputs, expected_outputs) 38 | 39 | assert tape.get_name_tape() == ["1", "2", "1", "2"] 40 | assert tape_fit.get_name_tape() == ["1", "2", "1", "2"] 41 | assert tape_fit.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])] 42 | 43 | 44 | def test_transform_should_transform_all_steps_for_each_data_inputs_expected_outputs(): 45 | tape = TapeCallbackFunction() 46 | p = Pipeline([ 47 | ForEach(Pipeline([ 48 | TransformCallbackStep(tape.callback, ["1"]), 49 | TransformCallbackStep(tape.callback, ["2"]), 50 | ])) 51 | ]) 52 | data_inputs = [[0, 1], [1, 2]] 53 | 54 | outputs = p.transform(data_inputs) 55 | 56 | assert tape.get_name_tape() == ["1", "2", "1", "2"] 57 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_if_execution_phase_is_then_do.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from pytest import skip 4 | 5 | from neuraxle.base import CX, ExecutionPhase 6 | from neuraxle.data_container import DataContainer as DACT 7 | from neuraxle.steps.flow import IfExecutionPhaseIsThen, ExecutionPhaseSwitch 8 | from testing_neuraxle.test_forcehandle_mixin import ForceHandleIdentity 9 | 10 | 11 | class SomeStep(ForceHandleIdentity): 12 | def __init__(self): 13 | ForceHandleIdentity.__init__(self) 14 | self.did_process = False 15 | 16 | def _did_process(self, data_container: DACT, context: CX) -> DACT: 17 | self.did_process = True 18 | return data_container 19 | 20 | 21 | def test_ifexecphase_same_then_execute_step(tmpdir): 22 | _run(tmpdir, ExecutionPhase.TRAIN, True) 23 | 24 | 25 | def test_ifexecphase_different_then_skip_step(tmpdir): 26 | _run(tmpdir, ExecutionPhase.TEST, False) 27 | 28 | 29 | def _run(tmpdir, phase, expected): 30 | context = CX(root=tmpdir, execution_phase=phase) 31 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 32 | 33 | some_step = SomeStep() 34 | p = IfExecutionPhaseIsThen(ExecutionPhase.TRAIN, some_step) 35 | p = p.with_context(context) 36 | 37 | p.fit_transform(data_inputs) 38 | assert some_step.did_process is expected 39 | 40 | 41 | def test_ifexecphase_raise_exception_when_unspecified(tmpdir): 42 | context = CX(root=tmpdir) 43 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 44 | 45 | some_step = SomeStep() 46 | p = IfExecutionPhaseIsThen(ExecutionPhase.TRAIN, some_step) 47 | p = p.with_context(context) 48 | 49 | with pytest.raises(ValueError) as error_info: 50 | p.fit_transform(data_inputs) 51 | assert some_step.did_process is False 52 | 53 | 54 | def test_execswitch(tmpdir): 55 | context = CX(root=tmpdir, execution_phase=ExecutionPhase.TRAIN) 56 | data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 57 | 58 | phase_to_step = {p: SomeStep() for p in (ExecutionPhase.PRETRAIN, ExecutionPhase.TRAIN, ExecutionPhase.TEST)} 59 | p = ExecutionPhaseSwitch(phase_to_step) 60 | p_c = p.with_context(context) 61 | 62 | p_c.fit_transform(data_inputs) 63 | assert phase_to_step[ExecutionPhase.PRETRAIN].did_process is False 64 | assert phase_to_step[ExecutionPhase.TRAIN].did_process is True 65 | assert phase_to_step[ExecutionPhase.TEST].did_process is False 66 | 67 | p_c = p.with_context(context.set_execution_phase(ExecutionPhase.UNSPECIFIED)) 68 | with pytest.raises(KeyError) as error_info: 69 | p_c.fit_transform(data_inputs) 70 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_numpy_steps.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for NumPy Steps 3 | ======================================== 4 | 5 | .. 6 | Copyright 2019, Neuraxio Inc. 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | 20 | """ 21 | 22 | import numpy as np 23 | from neuraxle.steps.numpy import (NumpyConcatenateInnerFeatures, 24 | NumpyFlattenDatum, NumpyShapePrinter, 25 | NumpyTranspose, NumpyFFT, NumpyRavel) 26 | 27 | 28 | def test_flatten_datum(): 29 | flat = NumpyFlattenDatum() 30 | data = np.random.random((10, 4, 5, 2)) # 4D array (could be ND with N>=2). 31 | expected_data = np.copy(data).reshape(10, 4 * 5 * 2) # 2D array. 32 | 33 | flat, received_data = flat.fit_transform(data) 34 | 35 | assert (received_data == expected_data).all() 36 | 37 | 38 | def test_concat_features(): 39 | concat = NumpyConcatenateInnerFeatures() 40 | # ND arrays 41 | data1 = np.random.random((10, 4, 5, 2)) 42 | data2 = np.random.random((10, 4, 5, 10)) 43 | expected_all_data = np.concatenate([data1, data2], axis=-1) 44 | 45 | concat, received_all_data = concat.fit_transform([data1, data2]) 46 | 47 | assert tuple(received_all_data.shape) == tuple(expected_all_data.shape) 48 | assert (received_all_data == expected_all_data).all() 49 | 50 | 51 | def test_numpy_transpose(): 52 | tr = NumpyTranspose() 53 | data = np.random.random((10, 7)) 54 | expected_data = np.copy(data).transpose() 55 | 56 | tr, received_data = tr.fit_transform(data) 57 | 58 | assert (received_data == expected_data).all() 59 | 60 | 61 | def test_numpy_shape_printer(): 62 | pr = NumpyShapePrinter() 63 | pr.fit_transform(np.ones((10, 11))) 64 | 65 | 66 | def test_numpy_fft(): 67 | fft = NumpyFFT() 68 | fft.fit_transform(np.ones((10, 11))) 69 | 70 | 71 | def test_numpy_ravel(): 72 | nr = NumpyRavel() 73 | nr, out = nr.fit_transform(np.ones((10, 11))) 74 | assert out.shape == (110,) 75 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_one_hot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from neuraxle.steps.numpy import OneHotEncoder 5 | 6 | 7 | @pytest.mark.parametrize("n_dims", [1, 2, 3]) 8 | @pytest.mark.parametrize("no_columns", [10]) 9 | def test_one_hot_encode_should_encode_data_inputs(n_dims, no_columns): 10 | one_hot_encode = OneHotEncoder(nb_columns=no_columns, name='one_hot') 11 | data_shape = list(range(100, 200))[:n_dims] 12 | data_inputs = np.random.randint(low=no_columns, size=data_shape) 13 | data_inputs[0] = 0 14 | data_inputs[1] = no_columns - 1 15 | data_inputs[-2] = -1 # or nan or inf. 16 | 17 | outputs = one_hot_encode.transform(data_inputs) 18 | 19 | assert outputs.shape[-1] == no_columns 20 | assert ((outputs == 1) | (outputs == 0)).all() 21 | 22 | if n_dims >= 2: 23 | assert (outputs[0, ..., 0] == 1).all() 24 | assert (outputs[1, ..., -1] == 1).all() 25 | assert (outputs[-2, ...] == 0).all() 26 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_output_transformer_wrapper.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Tuple 2 | 3 | from neuraxle.base import BaseTransformer 4 | from neuraxle.base import ExecutionContext as CX 5 | from neuraxle.data_container import DataContainer as DACT 6 | from neuraxle.hyperparams.space import HyperparameterSamples, HyperparameterSpace 7 | from neuraxle.pipeline import Pipeline 8 | from neuraxle.steps.output_handlers import IdsAndInputAndOutputTransformerMixin 9 | 10 | 11 | class MultiplyBy2OutputTransformer(IdsAndInputAndOutputTransformerMixin, BaseTransformer): 12 | def __init__( 13 | self, 14 | hyperparams: HyperparameterSamples = None, 15 | hyperparams_space: HyperparameterSpace = None, 16 | name: str = None 17 | ): 18 | BaseTransformer.__init__(self, hyperparams, hyperparams_space, name) 19 | IdsAndInputAndOutputTransformerMixin.__init__(self) 20 | 21 | def transform(self, data_inputs) -> Tuple[Any, Any]: 22 | ids, dis, eos = data_inputs 23 | 24 | new_dis = [] 25 | new_eos = [] 26 | for di, eo in zip(dis, eos): 27 | new_dis.append(di * 2) 28 | new_eos.append(eo * 2) 29 | 30 | return ids, new_dis, new_eos 31 | 32 | 33 | def test_output_transformer_should_zip_data_input_and_expected_output_in_the_transformed_output(): 34 | pipeline = Pipeline([ 35 | MultiplyBy2OutputTransformer() 36 | ]) 37 | 38 | pipeline, new_data_container = pipeline.handle_fit_transform( 39 | DACT(data_inputs=[1, 2, 3], ids=[0, 1, 2], expected_outputs=[2, 3, 4]), 40 | CX() 41 | ) 42 | 43 | assert new_data_container.data_inputs == [2, 4, 6] 44 | assert new_data_container.expected_outputs == [4, 6, 8] 45 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_reversible_preprocessing_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from neuraxle.base import ExecutionMode 5 | from neuraxle.pipeline import Pipeline 6 | from neuraxle.steps.flow import ReversiblePreprocessingWrapper 7 | from neuraxle.steps.misc import TapeCallbackFunction, CallbackWrapper 8 | from neuraxle.steps.numpy import MultiplyByN, AddN 9 | from testing_neuraxle.steps.neuraxle_test_case import NeuraxleTestCase 10 | 11 | DATA_INPUTS = np.array(range(5)) 12 | EXPECTED_OUTPUTS = np.array(range(5, 10)) 13 | EXPECTED_PROCESSED_OUTPUTS = np.array([5.0, 6.0, 7.0, 8.0, 9.0]) 14 | 15 | tape_transform_preprocessing = TapeCallbackFunction() 16 | tape_fit_preprocessing = TapeCallbackFunction() 17 | tape_transform_postprocessing = TapeCallbackFunction() 18 | tape_fit_postprocessing = TapeCallbackFunction() 19 | tape_inverse_transform_preprocessing = TapeCallbackFunction() 20 | 21 | 22 | @pytest.mark.parametrize('test_case', [ 23 | NeuraxleTestCase( 24 | pipeline=Pipeline([ 25 | ReversiblePreprocessingWrapper( 26 | preprocessing_step=CallbackWrapper(MultiplyByN(2), tape_transform_preprocessing, tape_fit_postprocessing, tape_inverse_transform_preprocessing), 27 | postprocessing_step=CallbackWrapper(AddN(10), tape_transform_postprocessing, tape_fit_postprocessing) 28 | )] 29 | ), 30 | callbacks=[tape_transform_preprocessing, tape_fit_preprocessing, tape_transform_postprocessing, tape_fit_postprocessing, tape_inverse_transform_preprocessing], 31 | expected_callbacks_data=[ 32 | [DATA_INPUTS], 33 | [], 34 | [DATA_INPUTS * 2], 35 | [], 36 | [(DATA_INPUTS * 2) + 10] 37 | ], 38 | data_inputs=DATA_INPUTS, 39 | expected_processed_outputs=EXPECTED_PROCESSED_OUTPUTS, 40 | execution_mode=ExecutionMode.TRANSFORM 41 | ), 42 | NeuraxleTestCase( 43 | pipeline=Pipeline([ 44 | ReversiblePreprocessingWrapper( 45 | preprocessing_step=CallbackWrapper(MultiplyByN(2), tape_transform_preprocessing, tape_fit_preprocessing, tape_inverse_transform_preprocessing), 46 | postprocessing_step=CallbackWrapper(AddN(10), tape_transform_postprocessing, tape_fit_postprocessing) 47 | )] 48 | ), 49 | callbacks=[tape_transform_preprocessing, tape_fit_preprocessing, tape_transform_postprocessing, tape_fit_postprocessing, tape_inverse_transform_preprocessing], 50 | expected_callbacks_data=[ 51 | [DATA_INPUTS], 52 | [(DATA_INPUTS, EXPECTED_OUTPUTS)], 53 | [DATA_INPUTS * 2], 54 | [(DATA_INPUTS * 2, EXPECTED_OUTPUTS)], 55 | [(DATA_INPUTS * 2) + 10] 56 | ], 57 | data_inputs=DATA_INPUTS, 58 | expected_outputs=EXPECTED_OUTPUTS, 59 | expected_processed_outputs=EXPECTED_PROCESSED_OUTPUTS, 60 | execution_mode=ExecutionMode.FIT_TRANSFORM 61 | ), 62 | NeuraxleTestCase( 63 | pipeline=Pipeline([ 64 | ReversiblePreprocessingWrapper( 65 | preprocessing_step=CallbackWrapper(MultiplyByN(2), tape_transform_preprocessing, tape_fit_preprocessing, tape_inverse_transform_preprocessing), 66 | postprocessing_step=CallbackWrapper(AddN(10), tape_transform_postprocessing, tape_fit_postprocessing) 67 | )] 68 | ), 69 | callbacks=[tape_transform_preprocessing, tape_fit_preprocessing, tape_transform_postprocessing, tape_fit_postprocessing, tape_inverse_transform_preprocessing], 70 | expected_callbacks_data=[ 71 | [DATA_INPUTS], 72 | [(DATA_INPUTS, EXPECTED_OUTPUTS)], 73 | [], 74 | [(DATA_INPUTS * 2, EXPECTED_OUTPUTS)], 75 | [] 76 | ], 77 | data_inputs=DATA_INPUTS, 78 | expected_outputs=EXPECTED_OUTPUTS, 79 | execution_mode=ExecutionMode.FIT 80 | ) 81 | ]) 82 | def test_reversible_preprocessing_wrapper(test_case): 83 | processed_outputs = test_case.execute() 84 | 85 | test_case.assert_expected_processed_outputs(processed_outputs) 86 | test_case.assert_callback_data_is_as_expected() 87 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_sklearn_wrapper.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | import pytest 5 | from neuraxle.base import Identity 6 | from neuraxle.hyperparams.distributions import RandInt, Uniform 7 | from neuraxle.hyperparams.space import (HyperparameterSamples, 8 | HyperparameterSpace) 9 | from neuraxle.metaopt.auto_ml import AutoML, RandomSearchSampler 10 | from neuraxle.metaopt.callbacks import ScoringCallback 11 | from neuraxle.metaopt.repositories.json import HyperparamsOnDiskRepository 12 | from neuraxle.metaopt.validation import KFoldCrossValidationSplitter 13 | from neuraxle.pipeline import Pipeline 14 | from neuraxle.steps.data import DataShuffler 15 | from neuraxle.steps.flow import TrainOnlyWrapper 16 | from neuraxle.steps.sklearn import SKLearnWrapper 17 | from sklearn.decomposition import PCA 18 | from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor 19 | from sklearn.linear_model import LinearRegression, SGDClassifier, SGDRegressor 20 | from sklearn.metrics import median_absolute_error 21 | 22 | 23 | def test_sklearn_wrapper_with_an_invalid_step(): 24 | with pytest.raises(ValueError): 25 | SKLearnWrapper(Identity()) 26 | 27 | 28 | def test_sklearn_wrapper_fit_transform_with_predict(): 29 | p = SKLearnWrapper(LinearRegression()) 30 | data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1) 31 | expected_outputs = np.expand_dims(np.array(list(range(10, 20))), axis=-1) 32 | 33 | p, outputs = p.fit_transform(data_inputs, expected_outputs) 34 | 35 | assert np.array_equal(outputs, expected_outputs) 36 | 37 | 38 | def test_sklearn_wrapper_transform_with_predict(): 39 | p = SKLearnWrapper(LinearRegression()) 40 | data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1) 41 | expected_outputs = np.expand_dims(np.array(list(range(10, 20))), axis=-1) 42 | 43 | p = p.fit(data_inputs, expected_outputs) 44 | outputs = p.transform(data_inputs) 45 | 46 | assert np.array_equal(outputs, expected_outputs) 47 | 48 | 49 | def test_sklearn_wrapper_fit_transform_with_transform(): 50 | n_components = 2 51 | p = SKLearnWrapper(PCA(n_components=n_components)) 52 | dim1 = 10 53 | dim2 = 10 54 | data_inputs, expected_outputs = _create_data_source((dim1, dim2)) 55 | 56 | p, outputs = p.fit_transform(data_inputs, expected_outputs) 57 | 58 | assert outputs.shape == (dim1, n_components) 59 | 60 | 61 | def test_sklearn_wrapper_transform_partial_fit_with_predict(): 62 | model = SKLearnWrapper(SGDRegressor(learning_rate='adaptive', eta0=0.05), use_partial_fit=True) 63 | p = Pipeline([TrainOnlyWrapper(DataShuffler()), model]) 64 | data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1) / 10 65 | expected_outputs = np.ravel(np.expand_dims(np.array(list(range(10, 20))), axis=-1)) / 10 66 | 67 | for _ in range(30): 68 | p = p.fit(data_inputs, expected_outputs) 69 | outputs = p.predict(data_inputs) 70 | 71 | assert all([np.isclose(a, b, atol=0.1) for a, b in zip(expected_outputs, outputs)]) 72 | 73 | 74 | def test_sklearn_wrapper_transform_partial_fit_classifier(): 75 | data_inputs = np.array([[0, 1], [0, 0], [3, -2], [-1, 1], [-2, 1], [2, 0], [2, -1], [4, -2], [-3, 1], [-1, 0]]) 76 | expected_outputs = np.ravel(np.expand_dims(data_inputs[:, 0] + 2 * data_inputs[:, 1] + 1, axis=-1)) 77 | data_inputs = data_inputs / (4 + 1) 78 | classes = np.array([0, 1, 2, 3]) 79 | model = SKLearnWrapper( 80 | SGDClassifier(learning_rate='adaptive', eta0=0.05), 81 | use_partial_fit=True, 82 | partial_fit_kwargs={'classes': classes} 83 | ) 84 | p = Pipeline([TrainOnlyWrapper(DataShuffler()), model]) 85 | 86 | for _ in range(30): 87 | p = p.fit(data_inputs, expected_outputs) 88 | outputs = p.predict(data_inputs) 89 | 90 | assert outputs.shape == (10,) 91 | assert len(set(outputs) - set(classes)) == 0 92 | 93 | 94 | def test_sklearn_wrapper_set_hyperparams(): 95 | p = SKLearnWrapper(PCA()) 96 | p.set_hyperparams(HyperparameterSamples({ 97 | 'n_components': 2 98 | })) 99 | 100 | assert p.wrapped_sklearn_predictor.n_components == 2 101 | 102 | 103 | def test_sklearn_wrapper_update_hyperparams(): 104 | p = SKLearnWrapper(PCA()) 105 | p.set_hyperparams(HyperparameterSamples({ 106 | 'n_components': 2, 107 | 'svd_solver': 'full' 108 | })) 109 | p.update_hyperparams(HyperparameterSamples({ 110 | 'n_components': 4 111 | })) 112 | 113 | assert p.wrapped_sklearn_predictor.n_components == 4 114 | assert p.wrapped_sklearn_predictor.svd_solver == 'full' 115 | 116 | 117 | def _create_data_source(shape): 118 | data_inputs = np.random.random(shape).astype(np.float32) 119 | expected_outputs = np.random.random(shape).astype(np.float32) 120 | return data_inputs, expected_outputs 121 | 122 | 123 | def _test_within_auto_ml_loop(tmpdir, pipeline): 124 | X_train = np.random.random((25, 50)).astype(np.float32) 125 | Y_train = np.random.random((25,)).astype(np.float32) 126 | 127 | validation_splitter = KFoldCrossValidationSplitter(3) 128 | scoring_callback = ScoringCallback( 129 | median_absolute_error, higher_score_is_better=False) 130 | 131 | auto_ml = AutoML( 132 | pipeline=pipeline, 133 | hyperparams_optimizer=RandomSearchSampler(), 134 | validation_splitter=validation_splitter, 135 | scoring_callback=scoring_callback, 136 | n_trials=2, 137 | epochs=1, 138 | hyperparams_repository=HyperparamsOnDiskRepository(cache_folder=tmpdir), 139 | refit_best_trial=True, 140 | continue_loop_on_error=False) 141 | 142 | auto_ml.fit(X_train, Y_train) 143 | 144 | 145 | def test_automl_sklearn(tmpdir): 146 | grad_boost = SKLearnWrapper(GradientBoostingRegressor()) 147 | _test_within_auto_ml_loop(tmpdir, grad_boost) 148 | 149 | 150 | def test_automl_sklearn_model_with_base_estimator(tmpdir): 151 | grad_boost = GradientBoostingRegressor() 152 | bagged_regressor = BaggingRegressor( 153 | grad_boost, random_state=5, n_jobs=-1) 154 | 155 | wrapped_bagged_regressor = SKLearnWrapper( 156 | bagged_regressor, 157 | HyperparameterSpace({ 158 | "n_estimators": RandInt(2, 15), 159 | "max_features": Uniform(0.6, 1.0)}), 160 | # return_all_sklearn_default_params_on_get=True 161 | ) 162 | _test_within_auto_ml_loop(tmpdir, wrapped_bagged_regressor) 163 | -------------------------------------------------------------------------------- /testing_neuraxle/steps/test_step_cloner_for_each_data_input.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | from neuraxle.base import ExecutionContext as CX 6 | from neuraxle.hyperparams.distributions import Boolean 7 | from neuraxle.hyperparams.space import HyperparameterSpace, HyperparameterSamples 8 | from neuraxle.pipeline import Pipeline 9 | from neuraxle.steps.loop import StepClonerForEachDataInput 10 | from neuraxle.steps.misc import FitCallbackStep, TapeCallbackFunction 11 | from neuraxle.steps.numpy import MultiplyByN 12 | 13 | HYPE_SPACE = HyperparameterSpace({ 14 | "a__test": Boolean() 15 | }) 16 | 17 | HYPE_SAMPLE = HyperparameterSamples({ 18 | "a__test": True 19 | }) 20 | 21 | 22 | def test_step_cloner_should_transform(): 23 | tape = TapeCallbackFunction() 24 | p = StepClonerForEachDataInput( 25 | Pipeline([ 26 | FitCallbackStep(tape), 27 | MultiplyByN(2) 28 | ]) 29 | ) 30 | data_inputs = _create_data((2, 2)) 31 | 32 | processed_outputs = p.transform(data_inputs) 33 | 34 | assert isinstance(p.steps_as_tuple[0][1], Pipeline) 35 | assert isinstance(p.steps_as_tuple[1][1], Pipeline) 36 | assert np.array_equal(processed_outputs, data_inputs * 2) 37 | 38 | 39 | def test_step_cloner_should_fit_transform(): 40 | # Given 41 | tape = TapeCallbackFunction() 42 | p = StepClonerForEachDataInput( 43 | Pipeline([ 44 | FitCallbackStep(tape), 45 | MultiplyByN(2) 46 | ]) 47 | ) 48 | data_inputs = _create_data((2, 2)) 49 | expected_outputs = _create_data((2, 2)) 50 | 51 | # When 52 | p, processed_outputs = p.fit_transform(data_inputs, expected_outputs) 53 | 54 | # Then 55 | assert isinstance(p.steps_as_tuple[0][1], Pipeline) 56 | assert np.array_equal(p.steps_as_tuple[0][1][0].callback_function.data[0][0], data_inputs[0]) 57 | assert np.array_equal(p.steps_as_tuple[0][1][0].callback_function.data[0][1], expected_outputs[0]) 58 | 59 | assert isinstance(p.steps_as_tuple[1][1], Pipeline) 60 | assert np.array_equal(p.steps_as_tuple[1][1][0].callback_function.data[0][0], data_inputs[1]) 61 | assert np.array_equal(p.steps_as_tuple[1][1][0].callback_function.data[0][1], expected_outputs[1]) 62 | 63 | assert np.array_equal(processed_outputs, data_inputs * 2) 64 | 65 | 66 | def test_step_cloner_should_inverse_transform(): 67 | tape = TapeCallbackFunction() 68 | p = StepClonerForEachDataInput( 69 | Pipeline([ 70 | FitCallbackStep(tape), 71 | MultiplyByN(2) 72 | ]) 73 | ) 74 | data_inputs = _create_data((2, 2)) 75 | expected_outputs = _create_data((2, 2)) 76 | 77 | p, processed_outputs = p.fit_transform(data_inputs, expected_outputs) 78 | 79 | assert np.array_equal(processed_outputs, data_inputs * 2) 80 | inverse_processed_outputs = p.inverse_transform(processed_outputs) 81 | assert np.array_equal(np.array(inverse_processed_outputs), np.array(data_inputs)) 82 | 83 | 84 | def test_step_cloner_should_set_train(): 85 | tape = TapeCallbackFunction() 86 | p = StepClonerForEachDataInput( 87 | Pipeline([ 88 | FitCallbackStep(tape), 89 | MultiplyByN(2) 90 | ]) 91 | ) 92 | data_inputs = _create_data((2, 2)) 93 | expected_outputs = _create_data((2, 2)) 94 | p, processed_outputs = p.fit_transform(data_inputs, expected_outputs) 95 | 96 | p.set_train(False) 97 | 98 | assert not p.is_train 99 | assert not p.steps_as_tuple[0][1].is_train 100 | assert not p.steps_as_tuple[1][1].is_train 101 | 102 | 103 | def test_step_cloner_should_save_sub_steps(tmpdir): 104 | tape = TapeCallbackFunction() 105 | p = StepClonerForEachDataInput( 106 | Pipeline([ 107 | FitCallbackStep(tape), 108 | MultiplyByN(2) 109 | ]) 110 | ).with_context(CX(tmpdir)) 111 | data_inputs = _create_data((2, 2)) 112 | expected_outputs = _create_data((2, 2)) 113 | p, processed_outputs = p.fit_transform(data_inputs, expected_outputs) 114 | 115 | p.save(CX(tmpdir), full_dump=True) 116 | 117 | saved_paths = [ 118 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/FitCallbackStep/FitCallbackStep.joblib'), 119 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'), 120 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'), 121 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/Pipeline[0].joblib'), 122 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/FitCallbackStep/FitCallbackStep.joblib'), 123 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/MultiplyByN/MultiplyByN.joblib'), 124 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/Pipeline[1].joblib'), 125 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline/FitCallbackStep/FitCallbackStep.joblib'), 126 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline/MultiplyByN/MultiplyByN.joblib'), 127 | os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline/Pipeline.joblib'), 128 | os.path.join(tmpdir, 'StepClonerForEachDataInput/StepClonerForEachDataInput.joblib') 129 | ] 130 | 131 | for p in saved_paths: 132 | assert os.path.exists(p) 133 | 134 | 135 | def test_step_cloner_should_load_sub_steps(tmpdir): 136 | tape = TapeCallbackFunction() 137 | p = StepClonerForEachDataInput( 138 | Pipeline([ 139 | FitCallbackStep(tape), 140 | MultiplyByN(2) 141 | ]) 142 | ).with_context(CX(tmpdir)) 143 | data_inputs = _create_data((2, 2)) 144 | expected_outputs = _create_data((2, 2)) 145 | p, _ = p.fit_transform(data_inputs, expected_outputs) 146 | 147 | p.save(CX(tmpdir), full_dump=True) 148 | 149 | loaded_step_cloner = CX(tmpdir).load('StepClonerForEachDataInput') 150 | assert isinstance(loaded_step_cloner.wrapped, Pipeline) 151 | assert len(loaded_step_cloner.steps_as_tuple) == len(data_inputs) 152 | assert isinstance(loaded_step_cloner.steps_as_tuple[0][1], Pipeline) 153 | assert isinstance(loaded_step_cloner.steps_as_tuple[1][1], Pipeline) 154 | 155 | 156 | def _create_data(shape): 157 | data_inputs = np.random.random(shape).astype(np.float32) 158 | return data_inputs 159 | -------------------------------------------------------------------------------- /testing_neuraxle/test_basestep.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from neuraxle.base import ExecutionContext as CX 4 | from neuraxle.data_container import DataContainer as DACT 5 | from neuraxle.pipeline import Pipeline 6 | from neuraxle.steps.flow import TestOnlyWrapper, TrainOnlyWrapper 7 | from neuraxle.steps.misc import TapeCallbackFunction, CallbackWrapper 8 | from neuraxle.steps.numpy import MultiplyByN 9 | 10 | from testing_neuraxle.mocks.step_mocks import SomeStepWithHyperparams 11 | 12 | 13 | def test_basestep_print_str_representation_works_correctly(): 14 | output = str(SomeStepWithHyperparams()) 15 | assert output == "SomeStepWithHyperparams(name='MockStep')" 16 | 17 | 18 | def test_basestep_repr_representation_works_correctly(): 19 | output = repr(SomeStepWithHyperparams()) 20 | assert output == """SomeStepWithHyperparams(name='MockStep', hyperparams=HyperparameterSamples([('learning_rate', 0.1), 21 | ('l2_weight_reg', 0.001), 22 | ('hidden_size', 32), 23 | ('num_layers', 3), 24 | ('num_lstm_layers', 1), 25 | ('use_xavier_init', True), 26 | ('use_max_pool_else_avg_pool', True), 27 | ('dropout_drop_proba', 0.5), 28 | ('momentum', 0.1)]))""" 29 | 30 | 31 | def test_handle_predict_should_predict_in_test_mode(): 32 | tape_fit = TapeCallbackFunction() 33 | tape_transform = TapeCallbackFunction() 34 | p = Pipeline([ 35 | TestOnlyWrapper(CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), 36 | TrainOnlyWrapper(CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) 37 | ]) 38 | 39 | data_container = p.handle_predict( 40 | data_container=DACT(data_inputs=np.array([1, 1]), expected_outputs=np.array([1, 1])), 41 | context=CX() 42 | ) 43 | 44 | assert np.array_equal(data_container.data_inputs, np.array([2, 2])) 45 | 46 | 47 | def test_handle_predict_should_handle_transform_with_initial_is_train_mode_after_predict(): 48 | tape_fit = TapeCallbackFunction() 49 | tape_transform = TapeCallbackFunction() 50 | p = Pipeline([ 51 | TestOnlyWrapper(CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), 52 | TrainOnlyWrapper(CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) 53 | ]) 54 | data_container = DACT(data_inputs=np.array([1, 1]), expected_outputs=np.array([1, 1])) 55 | 56 | p.handle_predict( 57 | data_container=data_container.copy(), 58 | context=CX() 59 | ) 60 | data_container = p.handle_transform(data_container, CX()) 61 | 62 | assert np.array_equal(data_container.data_inputs, np.array([4, 4])) 63 | 64 | 65 | def test_predict_should_predict_in_test_mode(): 66 | tape_fit = TapeCallbackFunction() 67 | tape_transform = TapeCallbackFunction() 68 | p = Pipeline([ 69 | TestOnlyWrapper(CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), 70 | TrainOnlyWrapper(CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) 71 | ]) 72 | 73 | outputs = p.predict(np.array([1, 1])) 74 | 75 | assert np.array_equal(outputs, np.array([2, 2])) 76 | 77 | 78 | def test_predict_should_transform_with_initial_is_train_mode_after_predict(): 79 | tape_fit = TapeCallbackFunction() 80 | tape_transform = TapeCallbackFunction() 81 | p = Pipeline([ 82 | TestOnlyWrapper(CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), 83 | TrainOnlyWrapper(CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) 84 | ]) 85 | 86 | p.predict(np.array([1, 1])) 87 | outputs = p.transform(np.array([1, 1])) 88 | 89 | assert np.array_equal(outputs, np.array([4, 4])) 90 | -------------------------------------------------------------------------------- /testing_neuraxle/test_data_container.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from neuraxle.data_container import DACT, ListDataContainer 4 | 5 | 6 | def test_data_container_iter_method_should_iterate_with_none_ids(): 7 | data_container = DACT( 8 | di=np.array(list(range(100))), 9 | eo=np.array(list(range(100, 200))) 10 | ).set_ids(None) 11 | 12 | for i, (_id, data_input, expected_outputs) in enumerate(data_container): 13 | assert _id == i 14 | assert data_input == i 15 | assert expected_outputs == i + 100 16 | 17 | 18 | def test_data_container_iter_method_should_iterate_with_none_expected_outputs(): 19 | data_container = DACT( 20 | ids=[str(i) for i in range(100)], 21 | data_inputs=np.array(list(range(100))), 22 | expected_outputs=None 23 | ) 24 | 25 | for i, (_, data_input, expected_outputs) in enumerate(data_container): 26 | assert data_input == i 27 | assert expected_outputs is None 28 | 29 | 30 | def test_data_container_len_method_should_return_data_inputs_len(): 31 | data_container = DACT.from_di(np.array(list(range(100)))) 32 | 33 | assert len(data_container) == 100 34 | 35 | 36 | def test_data_container_should_iterate_through_data_using_minibatches(): 37 | data_container = DACT( 38 | ids=[str(i) for i in range(100)], 39 | data_inputs=np.array(list(range(100))), 40 | expected_outputs=np.array(list(range(100, 200))) 41 | ) 42 | 43 | batches = [] 44 | for b in data_container.minibatches(batch_size=10): 45 | batches.append(b) 46 | 47 | for i, batch in enumerate(batches): 48 | assert np.array_equal(np.array(batch.data_inputs), np.array(list(range(i * 10, (i * 10) + 10)))) 49 | assert np.array_equal( 50 | np.array(batch.expected_outputs), 51 | np.array(list(range((i * 10) + 100, (i * 10) + 100 + 10))) 52 | ) 53 | 54 | 55 | def test_list_data_container_concat(): 56 | # Given 57 | data_container = ListDataContainer( 58 | ids=[str(i) for i in range(100)], 59 | data_inputs=np.array(list(range(100))), 60 | expected_outputs=np.array(list(range(100, 200))) 61 | ) 62 | 63 | # When 64 | data_container.extend(DACT( 65 | ids=[str(i) for i in range(100, 200)], 66 | data_inputs=np.array(list(range(100, 200))), 67 | expected_outputs=np.array(list(range(200, 300))) 68 | )) 69 | 70 | # Then 71 | assert np.array_equal(np.array(data_container.ids), np.array(list(range(0, 200))).astype(np.str)) 72 | 73 | expected_data_inputs = np.array(list(range(0, 200))).astype(np.int) 74 | actual_data_inputs = np.array(data_container.data_inputs).astype(np.int) 75 | assert np.array_equal(actual_data_inputs, expected_data_inputs) 76 | 77 | expected_expected_outputs = np.array(list(range(100, 300))).astype(np.int) 78 | assert np.array_equal(np.array(data_container.expected_outputs).astype(np.int), expected_expected_outputs) 79 | -------------------------------------------------------------------------------- /testing_neuraxle/test_data_container_batching.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from neuraxle.data_container import DACT, StripAbsentValues 4 | import numpy as np 5 | 6 | 7 | class LoadableItem: 8 | def __init__(self): 9 | self.loaded = False 10 | 11 | def load(self) -> 'LoadableItem': 12 | self.loaded = True 13 | return self 14 | 15 | def is_loaded(self): 16 | return self.loaded 17 | 18 | 19 | class SomeLazyLoadableCollection: 20 | def __init__(self, inner_list): 21 | self.inner_list = inner_list 22 | self.iterations = 0 23 | 24 | def __iter__(self): 25 | for item in self.inner_list: 26 | yield item.load() 27 | 28 | def __getitem__(self, item): 29 | return SomeLazyLoadableCollection([ 30 | item.load() 31 | for item in self.inner_list[item] 32 | ]) 33 | 34 | def __len__(self): 35 | return len(self.inner_list) 36 | 37 | 38 | def test_data_container_minibatch_should_be_lazy_and_use_getitem_when_data_is_lazy_loadable(): 39 | items = [LoadableItem() for _ in range(10)] 40 | data_inputs = SomeLazyLoadableCollection(items) 41 | expected_outputs = SomeLazyLoadableCollection([LoadableItem() for _ in range(10)]) 42 | data_container = DACT( 43 | data_inputs=data_inputs, 44 | expected_outputs=expected_outputs 45 | ) 46 | 47 | i = 0 48 | batch_size = 2 49 | for batch in data_container.minibatches(batch_size=batch_size): 50 | assert len(batch) == batch_size 51 | assert all(item.is_loaded() for item in data_inputs.inner_list[:(i * batch_size)]) 52 | for y in range((i + 1) * batch_size, len(data_inputs)): 53 | assert not items[y].is_loaded() 54 | i += 1 55 | 56 | 57 | @pytest.mark.parametrize('batch_size,include_incomplete_pass,default_value,expected_data_containers', [ 58 | (3, False, None, [ 59 | DACT(ids=[0, 1, 2], data_inputs=[0, 1, 2], expected_outputs=[10, 11, 12]), 60 | DACT(ids=[3, 4, 5], data_inputs=[3, 4, 5], expected_outputs=[13, 14, 15]), 61 | DACT(ids=[6, 7, 8], data_inputs=[6, 7, 8], expected_outputs=[16, 17, 18]), 62 | ]), 63 | (3, True, 0, [ 64 | DACT(ids=[0, 1, 2], data_inputs=[0, 1, 2], expected_outputs=[10, 11, 12]), 65 | DACT(ids=[3, 4, 5], data_inputs=[3, 4, 5], expected_outputs=[13, 14, 15]), 66 | DACT(ids=[6, 7, 8], data_inputs=[6, 7, 8], expected_outputs=[16, 17, 18]), 67 | DACT(ids=[0, 1, 2], data_inputs=[9, 0, 0], expected_outputs=[19, 0, 0]) 68 | ]), 69 | (3, True, StripAbsentValues(), [ 70 | DACT(ids=[0, 1, 2], data_inputs=[0, 1, 2], expected_outputs=[10, 11, 12]), 71 | DACT(ids=[3, 4, 5], data_inputs=[3, 4, 5], expected_outputs=[13, 14, 15]), 72 | DACT(ids=[6, 7, 8], data_inputs=[6, 7, 8], expected_outputs=[16, 17, 18]), 73 | DACT(ids=[9], data_inputs=[9], expected_outputs=[19]) 74 | ]) 75 | ]) 76 | def test_data_container_batching(batch_size, include_incomplete_pass, default_value, expected_data_containers): 77 | data_container = DACT( 78 | ids=[str(i) for i in range(10)], 79 | data_inputs=np.array(list(range(10))), 80 | expected_outputs=np.array(list(range(10, 20))) 81 | ) 82 | 83 | # When 84 | data_containers = [] 85 | for dc in data_container.minibatches( 86 | batch_size=batch_size, 87 | keep_incomplete_batch=include_incomplete_pass, 88 | default_value_data_inputs=default_value 89 | ): 90 | data_containers.append(dc) 91 | 92 | # Then 93 | assert len(expected_data_containers) == len(data_containers) 94 | for expected_data_container, actual_data_container in zip(expected_data_containers, data_containers): 95 | np.array_equal(expected_data_container.ids, actual_data_container.ids) 96 | np.array_equal(expected_data_container.data_inputs, actual_data_container.data_inputs) 97 | np.array_equal(expected_data_container.expected_outputs, actual_data_container.expected_outputs) 98 | -------------------------------------------------------------------------------- /testing_neuraxle/test_forcehandle_mixin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from neuraxle.base import BaseStep 4 | from neuraxle.base import ExecutionContext as CX 5 | from neuraxle.base import ForceHandleIdentity, ForceHandleMixin 6 | from neuraxle.data_container import DataContainer as DACT 7 | from neuraxle.pipeline import Pipeline 8 | 9 | 10 | class BadForceHandleStep(ForceHandleMixin, BaseStep): 11 | def __init__(self): 12 | BaseStep.__init__(self) 13 | ForceHandleMixin.__init__(self) 14 | 15 | 16 | def test_raises_exception_if_method_not_redefined(tmpdir): 17 | # Now that I think about it, this really just is a complicated way to test the self._ensure_method_overriden function. 18 | with pytest.raises(NotImplementedError) as exception_info: 19 | BadForceHandleStep() 20 | 21 | assert "Please define _fit_data_container" in exception_info.value.args[0] 22 | assert "in BadForceHandleStep" in exception_info.value.args[0] 23 | 24 | def _fit_data_container(self, data_container: DACT, context: CX): 25 | return self 26 | BadForceHandleStep._fit_data_container = _fit_data_container 27 | 28 | with pytest.raises(NotImplementedError) as exception_info: 29 | BadForceHandleStep() 30 | 31 | assert "Please define _fit_transform_data_container" in exception_info.value.args[0] 32 | assert "in BadForceHandleStep" in exception_info.value.args[0] 33 | 34 | def _fit_transform_data_container(self, data_container: DACT, context: CX): 35 | return self, data_container 36 | BadForceHandleStep._fit_transform_data_container = _fit_transform_data_container 37 | 38 | with pytest.raises(NotImplementedError) as exception_info: 39 | BadForceHandleStep() 40 | 41 | assert "Please define _transform_data_container" in exception_info.value.args[0] 42 | assert "in BadForceHandleStep" in exception_info.value.args[0] 43 | 44 | def _transform_data_container(self, data_container: DACT, context: CX): 45 | return data_container 46 | BadForceHandleStep._transform_data_container = _transform_data_container 47 | 48 | #Should not raise any error now. 49 | BadForceHandleStep() 50 | 51 | 52 | def test_forcehandleidentity_does_not_crash(tmpdir): 53 | p = Pipeline([ 54 | ForceHandleIdentity() 55 | ]) 56 | data_inputs = np.array([0, 1, 2, 3]) 57 | expected_outputs = data_inputs * 2 58 | p.fit(data_inputs, expected_outputs) 59 | p.fit_transform(data_inputs, expected_outputs) 60 | p.transform(data_inputs=data_inputs) 61 | -------------------------------------------------------------------------------- /testing_neuraxle/test_full_pipeline_dump.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from neuraxle.base import ExecutionContext as CX 5 | from neuraxle.base import Identity, StepWithContext 6 | from neuraxle.pipeline import Pipeline 7 | from neuraxle.steps.misc import FitTransformCallbackStep, TapeCallbackFunction 8 | from neuraxle.steps.output_handlers import OutputTransformerWrapper 9 | 10 | PIPELINE_NAME = 'saved_pipeline' 11 | 12 | DATA_INPUTS = np.array(range(10, 20)) 13 | EXPECTED_OUTPUTS = np.array(range(20, 30)) 14 | 15 | 16 | def test_load_full_dump_from_pipeline_name(tmpdir): 17 | # Given 18 | tape_fit_callback_function = TapeCallbackFunction() 19 | tape_transform_callback_function = TapeCallbackFunction() 20 | pipeline: StepWithContext = Pipeline([ 21 | ('step_a', Identity()), 22 | ('step_b', OutputTransformerWrapper( 23 | FitTransformCallbackStep(tape_fit_callback_function, tape_transform_callback_function) 24 | )) 25 | ]).set_name(PIPELINE_NAME).with_context(CX(tmpdir)) 26 | 27 | # When 28 | pipeline, _ = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS) 29 | 30 | step_b_wrapped_step = pipeline.wrapped['step_b'].wrapped 31 | assert np.array_equal(step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS) 32 | assert np.array_equal(step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS) 33 | assert np.array_equal(step_b_wrapped_step.fit_callback_function.data[0][1], None) 34 | 35 | pipeline.save(CX(tmpdir), full_dump=True) 36 | 37 | # Then 38 | loaded_pipeline = CX(tmpdir).load(PIPELINE_NAME) 39 | 40 | assert isinstance(loaded_pipeline, Pipeline) 41 | assert isinstance(loaded_pipeline['step_a'], Identity) 42 | assert isinstance(loaded_pipeline['step_b'], OutputTransformerWrapper) 43 | 44 | loaded_step_b_wrapped_step = loaded_pipeline['step_b'].wrapped 45 | assert np.array_equal(loaded_step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS) 46 | assert np.array_equal(loaded_step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS) 47 | assert np.array_equal(loaded_step_b_wrapped_step.fit_callback_function.data[0][1], None) 48 | 49 | 50 | def test_load_full_dump_from_path(tmpdir): 51 | # Given 52 | tape_fit_callback_function = TapeCallbackFunction() 53 | tape_transform_callback_function = TapeCallbackFunction() 54 | pipeline = Pipeline([ 55 | ('step_a', Identity()), 56 | ('step_b', OutputTransformerWrapper( 57 | FitTransformCallbackStep(tape_fit_callback_function, tape_transform_callback_function) 58 | )) 59 | ]).set_name(PIPELINE_NAME).with_context(CX(tmpdir)) 60 | 61 | # When 62 | pipeline, _ = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS) 63 | pipeline.save(CX(tmpdir), full_dump=True) 64 | 65 | # Then 66 | loaded_pipeline = CX(tmpdir).load(os.path.join(PIPELINE_NAME, 'step_b')) 67 | 68 | assert isinstance(loaded_pipeline, OutputTransformerWrapper) 69 | loaded_step_b_wrapped_step = loaded_pipeline.wrapped 70 | assert np.array_equal(loaded_step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS) 71 | assert np.array_equal(loaded_step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS) 72 | assert np.array_equal(loaded_step_b_wrapped_step.fit_callback_function.data[0][1], None) 73 | -------------------------------------------------------------------------------- /testing_neuraxle/test_metastep_mixin.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import typing 3 | from typing import Generic, TypeVar 4 | 5 | import pytest 6 | from neuraxle.base import BaseService, MetaStep, NonFittableMixin 7 | from neuraxle.pipeline import Pipeline 8 | from neuraxle.union import Identity 9 | 10 | from testing_neuraxle.mocks.step_mocks import SomeMetaStepWithHyperparams 11 | 12 | 13 | def test_metastepmixin_set_train_should_set_train_to_false(): 14 | p = MetaStep(Pipeline([ 15 | Identity() 16 | ])) 17 | 18 | p.set_train(False) 19 | 20 | assert not p.is_train 21 | assert not p.wrapped[0].is_train 22 | assert not p.wrapped.is_train 23 | 24 | 25 | def test_metastepmixin_set_train_should_set_train_to_true(): 26 | p = MetaStep(Pipeline([ 27 | Identity() 28 | ])) 29 | 30 | assert p.is_train 31 | assert p.wrapped[0].is_train 32 | assert p.wrapped.is_train 33 | 34 | 35 | def test_basestep_str_representation_works_correctly(): 36 | output = str(SomeMetaStepWithHyperparams()) 37 | assert output == "SomeMetaStepWithHyperparams(SomeStepWithHyperparams(name='MockStep'))" 38 | 39 | 40 | def test_subtyping_of_metastep_works_correctly(): 41 | some_step: MetaStep[Identity] = MetaStep(Identity()) 42 | 43 | assert issubclass(MetaStep, Generic) 44 | assert isinstance(some_step, MetaStep) 45 | assert isinstance(some_step.get_step(), Identity) 46 | 47 | 48 | @pytest.mark.skipif(sys.version_info < (3, 8), reason="Python 3.8 or more needed") 49 | def test_typable_mixin_can_hold_type_annotation(tmpdir): 50 | # Testing the type annotation "MetaStep[MyService]": 51 | wrapped_service: MetaStep[Identity] = MetaStep(Identity()) 52 | 53 | g: Generic = wrapped_service.__orig_bases__[-1] 54 | assert isinstance(wrapped_service.get_step(), g.__parameters__[0].__bound__) 55 | bt: TypeVar = typing.get_args(g)[0] 56 | assert isinstance(wrapped_service.get_step(), bt.__bound__) 57 | 58 | assert isinstance(wrapped_service.get_step(), Identity) 59 | assert isinstance(wrapped_service.get_step(), NonFittableMixin) 60 | assert isinstance(wrapped_service.get_step(), BaseService) 61 | -------------------------------------------------------------------------------- /testing_neuraxle/test_optional.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from neuraxle.hyperparams.space import HyperparameterSamples 4 | from neuraxle.steps.flow import OptionalStep 5 | from neuraxle.steps.numpy import MultiplyByN 6 | 7 | 8 | def test_optional_should_disable_wrapped_step_when_disabled(): 9 | p = OptionalStep(MultiplyByN(2), nullified_return_value=[]).set_hyperparams(HyperparameterSamples({ 10 | 'enabled': False 11 | })) 12 | data_inputs = np.array(list(range(10))) 13 | 14 | outputs = p.transform(data_inputs) 15 | 16 | assert outputs == [] 17 | 18 | 19 | def test_optional_should_enable_wrapped_step_when_enabled(): 20 | p = OptionalStep(MultiplyByN(2), nullified_return_value=[]).set_hyperparams(HyperparameterSamples({ 21 | 'enabled': True 22 | })) 23 | data_inputs = np.array(list(range(10))) 24 | 25 | outputs = p.transform(data_inputs) 26 | 27 | assert np.array_equal(outputs, data_inputs * 2) 28 | 29 | -------------------------------------------------------------------------------- /testing_neuraxle/test_pipeline_fitted_step_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Neuraxio/Neuraxle/af917c984241178436a759be3b830e6d8b03245f/testing_neuraxle/test_pipeline_fitted_step_checkpoint.py -------------------------------------------------------------------------------- /testing_neuraxle/test_pipeline_setup_teardown.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Tuple 2 | 3 | import pytest 4 | from neuraxle.base import BaseService, BaseStep 5 | from neuraxle.base import ExecutionContext as CX 6 | from neuraxle.base import Identity, MetaStep, NamedStepsList, _HasChildrenMixin 7 | from neuraxle.hyperparams.space import RecursiveDict 8 | from neuraxle.pipeline import Pipeline 9 | 10 | from testing_neuraxle.test_pipeline import SomeStep 11 | 12 | 13 | class SomePipeline(Pipeline): 14 | def __init__(self, steps: NamedStepsList): 15 | Pipeline.__init__(self, steps) 16 | self.teared_down = False 17 | 18 | def teardown(self) -> 'BaseStep': 19 | self.teared_down = True 20 | return Pipeline.teardown(self) 21 | 22 | 23 | class SomeException(BaseStep): 24 | def fit_transform(self, data_inputs, expected_outputs=None) -> Tuple['BaseStep', Any]: 25 | raise Exception() 26 | 27 | def fit(self, data_inputs, expected_outputs=None) -> Tuple['BaseStep', Any]: 28 | raise Exception() 29 | 30 | def transform(self, data_inputs, expected_outputs=None) -> Tuple['BaseStep', Any]: 31 | raise Exception() 32 | 33 | 34 | class SomeStepSetup(SomeStep): 35 | def __init__(self): 36 | SomeStep.__init__(self) 37 | self.called_with = None 38 | 39 | 40 | def test_fit_transform_should_setup_pipeline_and_steps(): 41 | step_setup = SomeStepSetup() 42 | p = SomePipeline([ 43 | step_setup 44 | ]) 45 | 46 | assert not p.is_initialized 47 | assert not step_setup.is_initialized 48 | 49 | p.fit_transform([1], [1]) 50 | 51 | assert p.is_initialized 52 | assert step_setup.is_initialized 53 | 54 | 55 | def test_transform_should_setup_pipeline_and_steps(): 56 | step_setup = SomeStepSetup() 57 | p = SomePipeline([ 58 | step_setup 59 | ]) 60 | assert not p.is_initialized 61 | assert not step_setup.is_initialized 62 | 63 | p.transform([1]) 64 | 65 | assert p.is_initialized 66 | assert step_setup.is_initialized 67 | 68 | 69 | def test_fit_should_setup_pipeline_and_steps(): 70 | step_setup = SomeStepSetup() 71 | p = SomePipeline([ 72 | step_setup 73 | ]) 74 | assert not p.is_initialized 75 | assert not step_setup.is_initialized 76 | 77 | p.fit([1], [1]) 78 | 79 | assert p.is_initialized 80 | assert step_setup.is_initialized 81 | 82 | 83 | class SomeService(BaseService): 84 | pass 85 | 86 | 87 | @pytest.mark.parametrize('base_service', [ 88 | Identity(), 89 | MetaStep(Identity()), 90 | SomePipeline([SomeStepSetup()]) 91 | ]) 92 | def test_that_steps_are_setuppeable(base_service: BaseService, tmpdir): 93 | assert not base_service.is_initialized 94 | _verify_subservices_initialization(base_service, False) 95 | base_service.setup(CX(tmpdir)) 96 | _verify_subservices_initialization(base_service, True) 97 | base_service.teardown() 98 | _verify_subservices_initialization(base_service, False) 99 | 100 | 101 | def _verify_subservices_initialization(sub_service, is_initialized: bool): 102 | assert sub_service.is_initialized == is_initialized 103 | if isinstance(sub_service, _HasChildrenMixin): 104 | for child in sub_service.get_children(): 105 | _verify_subservices_initialization(child, is_initialized) 106 | 107 | 108 | @pytest.mark.parametrize('base_service', [ 109 | Identity(), 110 | MetaStep(Identity()), 111 | SomePipeline([SomeStepSetup()]), 112 | CX(), 113 | CX().set_service_locator({ 114 | Identity: Identity(), 115 | SomeService: SomeService() 116 | }), 117 | CX().set_service_locator({ 118 | Pipeline: Pipeline([SomeStepSetup()]) 119 | }) 120 | ]) 121 | def test_that_steps_are_applyable_with_name(base_service: BaseService, tmpdir): 122 | 123 | names = base_service.apply(lambda self: RecursiveDict({"name": self.get_name()})) 124 | 125 | _verify_subservices_names(base_service, names) 126 | 127 | 128 | def _verify_subservices_names(sub_service, sub_service_name: RecursiveDict): 129 | assert sub_service.name == sub_service_name["name"], f"Not equal: {sub_service.name} != {sub_service_name['name']}." 130 | if isinstance(sub_service, _HasChildrenMixin): 131 | for child in sub_service.get_children(): 132 | _verify_subservices_names(child, sub_service_name[child.name]) 133 | -------------------------------------------------------------------------------- /testing_neuraxle/test_recursive_arguments.py: -------------------------------------------------------------------------------- 1 | from neuraxle.base import _RecursiveArguments 2 | from neuraxle.hyperparams.space import HyperparameterSamples 3 | 4 | 5 | def test_recursive_arguments_should_get_root_level(): 6 | ra = _RecursiveArguments(kwargs={'hyperparams': HyperparameterSamples({ 7 | 'hp0': 0, 8 | 'hp1': 1, 9 | 'pipeline__stepa__hp2': 2, 10 | 'pipeline__stepb__hp3': 3 11 | })}) 12 | 13 | root_ra = ra[None] 14 | 15 | root_ra.args == [] 16 | root_ra.kwargs == {'hyperparams': HyperparameterSamples({ 17 | 'hp0': 0, 18 | 'hp1': 1 19 | })} 20 | 21 | 22 | def test_recursive_arguments_should_get_recursive_levels(): 23 | ra = _RecursiveArguments(kwargs={'hyperparams': HyperparameterSamples({ 24 | 'hp0': 0, 25 | 'hp1': 1, 26 | 'stepa__hp2': 2, 27 | 'stepb__hp3': 3, 28 | 'stepb__stepd__hp4': 4 29 | })}) 30 | 31 | ra = ra['stepb'] 32 | 33 | ra.args == [] 34 | ra.kwargs == {'hyperparams': HyperparameterSamples({ 35 | 'stepb__hp3': 2, 36 | 'stepb__stepd__hp4': 4 37 | })} 38 | 39 | 40 | def test_recursive_arguments_should_have_copy_constructor(): 41 | ra = _RecursiveArguments( 42 | ra=_RecursiveArguments(kwargs={'hyperparams': HyperparameterSamples({ 43 | 'hp0': 0, 44 | 'hp1': 1 45 | })}), 46 | ) 47 | 48 | ra.args == [] 49 | ra.kwargs == {'hyperparams': HyperparameterSamples({ 50 | 'hp0': 0, 51 | 'hp1': 1, 52 | })} 53 | -------------------------------------------------------------------------------- /testing_neuraxle/test_recursive_dict.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from neuraxle.hyperparams.space import RecursiveDict, HyperparameterSamples 4 | 5 | POINT_SEPARATOR = '.' 6 | 7 | 8 | @pytest.mark.parametrize("separator", ["__", ".", "___"]) 9 | def test_recursive_dict_to_flat(separator): 10 | dict_values = { 11 | 'hp': 1, 12 | 'stepa': { 13 | 'hp': 2, 14 | 'stepb': { 15 | 'hp': 3 16 | } 17 | } 18 | } 19 | r = RecursiveDict(separator=separator, **dict_values) 20 | 21 | r = r.to_flat_dict() 22 | 23 | expected_dict_values = { 24 | 'hp': 1, 25 | 'stepa{}hp'.format(separator): 2, 26 | 'stepa{0}stepb{0}hp'.format(separator): 3 27 | } 28 | assert r == expected_dict_values 29 | 30 | 31 | def test_recursive_dict_to_flat_different_separator(): 32 | dict_values = { 33 | 'hp': 1, 34 | 'stepa': { 35 | 'hp': 2, 36 | 'stepb': { 37 | 'hp': 3 38 | } 39 | } 40 | } 41 | r = RecursiveDict(separator='__', **dict_values) 42 | r['stepa'] = RecursiveDict(r['stepa'], separator='.') 43 | r['stepa']['stepb'] = RecursiveDict(r['stepa']['stepb'], separator='$$$') 44 | 45 | nested_r = r.to_nested_dict() 46 | r = r.to_flat_dict() 47 | 48 | expected_dict_values = { 49 | 'hp': 1, 50 | 'stepa__hp': 2, 51 | 'stepa__stepb.hp': 3 52 | } 53 | assert r == expected_dict_values 54 | assert nested_r == dict_values 55 | 56 | def test_recursive_dict_to_nested_dict_constructor(): 57 | dict_values = { 58 | 'hp': 1, 59 | 'stepa__hp': 2, 60 | 'stepa__stepb__hp': 3 61 | } 62 | r = HyperparameterSamples(**dict_values) 63 | 64 | expected_dict_values = { 65 | 'hp': 1, 66 | 'stepa': { 67 | 'hp': 2, 68 | 'stepb': { 69 | 'hp': 3 70 | } 71 | } 72 | } 73 | assert r == HyperparameterSamples(**expected_dict_values) 74 | assert r.to_nested_dict() == expected_dict_values 75 | 76 | 77 | def test_recursive_dict_get_item(): 78 | dict_values = { 79 | 'hp': 1, 80 | 'stepa__hp': 2, 81 | 'stepa__stepb__hp': 3 82 | } 83 | r = HyperparameterSamples(**dict_values) 84 | 85 | assert r[None] == {'hp': 1} 86 | assert r["hp"] == 1 87 | assert r['stepa'].to_flat_dict() == {'hp': 2, 'stepb__hp': 3} 88 | assert r["stepa__hp"] == 2 89 | assert r["stepa"][None] == {'hp':2} 90 | assert r['stepa__stepb'].to_flat_dict() == {'hp': 3} 91 | assert r['stepa__stepb'][None] == {'hp': 3} 92 | assert r['stepa__stepb__hp'] == 3 93 | 94 | 95 | def test_hyperparams_to_nested_dict_constructor(): 96 | dict_values = { 97 | 'hp': 1, 98 | 'stepa__hp': 2, 99 | 'stepa__stepb__hp': 3 100 | } 101 | r = HyperparameterSamples(dict_values) 102 | 103 | expected_dict_values = { 104 | 'hp': 1, 105 | 'stepa': { 106 | 'hp': 2, 107 | 'stepb': { 108 | 'hp': 3 109 | } 110 | } 111 | } 112 | assert r.to_nested_dict() == expected_dict_values 113 | assert r == HyperparameterSamples(expected_dict_values) 114 | 115 | 116 | def test_recursive_dict_copy_constructor(): 117 | dict_values = { 118 | 'hp': 1, 119 | 'stepa__hp': 2, 120 | 'stepa__stepb__hp': 3 121 | } 122 | r = RecursiveDict(RecursiveDict(**dict_values), separator='__') 123 | 124 | assert r == RecursiveDict(**dict_values) 125 | 126 | 127 | def test_recursive_dict_copy_constructor_should_set_separator(): 128 | dict_values = { 129 | 'hp': 1, 130 | 'stepa__hp': 2, 131 | 'stepa__stepb__hp': 3 132 | } 133 | r = RecursiveDict(RecursiveDict(**dict_values, separator=POINT_SEPARATOR)) 134 | 135 | assert r.separator == POINT_SEPARATOR 136 | 137 | 138 | def test_recursive_dict_should_raise_when_item_missing(): 139 | with pytest.raises(KeyError): 140 | r = RecursiveDict() 141 | missing = r['missing'] 142 | 143 | 144 | @pytest.mark.parametrize("dict_values", 145 | [{ 146 | 'hp': 1, 147 | 'stepa__hp': 2, 148 | 'stepa__stepb__hp': 3 149 | },{ 150 | "stepa__hp1":1, 151 | 'stepa__hp2': 2, 152 | 'stepa__stepb__hp': 3 153 | }]) 154 | def test_hyperparams_copy_constructor(dict_values): 155 | r = HyperparameterSamples(HyperparameterSamples(**dict_values)) 156 | assert r == HyperparameterSamples(**dict_values) 157 | 158 | 159 | def test_hyperparams_to_flat(): 160 | dict_values = { 161 | 'hp': 1, 162 | 'stepa': { 163 | 'hp': 2, 164 | 'stepb': { 165 | 'hp': 3 166 | } 167 | } 168 | } 169 | r = HyperparameterSamples(**dict_values) 170 | 171 | r = r.to_flat_dict() 172 | 173 | expected_dict_values = { 174 | 'hp': 1, 175 | 'stepa__hp': 2, 176 | 'stepa__stepb__hp': 3 177 | } 178 | assert r == expected_dict_values 179 | -------------------------------------------------------------------------------- /testing_neuraxle/test_step_saving.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from joblib import dump 5 | from py._path.local import LocalPath 6 | from pprint import pprint 7 | 8 | from neuraxle.hyperparams.space import RecursiveDict 9 | from neuraxle.base import CX, StepWithContext, TruncableJoblibStepSaver 10 | from neuraxle.pipeline import Pipeline 11 | from neuraxle.steps.numpy import MultiplyByN 12 | 13 | OUTPUT = "OUTPUT" 14 | ROOT = 'Pipeline' 15 | PIPELINE_2 = 'Pipeline2' 16 | SOME_STEPS = ['some_step0', 'some_step1', 'some_step2'] 17 | 18 | EXPECTED_OUTPUTS = [0, 48, 96, 144, 192, 240, 288, 336, 384, 432] 19 | 20 | 21 | def create_some_step_path(tmpdir, step_no=0, create_dir=False): 22 | if step_no == 0: 23 | path1 = os.path.join(tmpdir, ROOT, SOME_STEPS[step_no]) 24 | else: 25 | path1 = os.path.join(tmpdir, ROOT, PIPELINE_2, SOME_STEPS[step_no]) 26 | if create_dir: 27 | os.makedirs(path1) 28 | path2 = os.path.join(path1, '{0}.joblib'.format(SOME_STEPS[step_no])) 29 | return path2 30 | 31 | 32 | def create_pipeline2_path(tmpdir, create_dir=False): 33 | path1 = os.path.join(tmpdir, ROOT, PIPELINE_2) 34 | if create_dir: 35 | os.makedirs(path1) 36 | path2 = os.path.join(path1, '{0}.joblib'.format(PIPELINE_2)) 37 | return path2 38 | 39 | 40 | def create_root_path(tmpdir, create_dir=False): 41 | path1 = os.path.join(tmpdir, ROOT) 42 | if create_dir and not os.path.exists(os.path.join(tmpdir, ROOT)): 43 | os.makedirs(path1) 44 | path2 = os.path.join(path1, '{0}.joblib'.format(ROOT)) 45 | return path2 46 | 47 | 48 | def test_nested_pipeline_fit_transform_should_save_some_fitted_pipeline_steps(tmpdir: LocalPath): 49 | p: StepWithContext = create_pipeline(tmpdir) 50 | 51 | p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10))) 52 | p.save() 53 | 54 | assert np.array_equal(outputs, EXPECTED_OUTPUTS) 55 | saved_paths = [ 56 | create_root_path(tmpdir), create_pipeline2_path(tmpdir), 57 | create_some_step_path(tmpdir, step_no=0), create_some_step_path(tmpdir, step_no=1), 58 | create_some_step_path(tmpdir, step_no=2) 59 | ] 60 | for path in saved_paths: 61 | assert os.path.exists(path), path 62 | 63 | 64 | def test_pipeline_transform_should_not_save_steps(tmpdir: LocalPath): 65 | p: StepWithContext = create_pipeline(tmpdir) 66 | 67 | outputs = p.transform(np.array(range(10))) 68 | p.wrapped.save(CX(tmpdir), full_dump=False) 69 | 70 | assert np.array_equal(outputs, EXPECTED_OUTPUTS) 71 | not_saved_paths = [ 72 | create_root_path(tmpdir), create_pipeline2_path(tmpdir), create_some_step_path(tmpdir, step_no=0), 73 | create_some_step_path(tmpdir, step_no=1), create_some_step_path(tmpdir, step_no=2)] 74 | for path in not_saved_paths: 75 | assert not os.path.exists(path), path 76 | 77 | 78 | def test_pipeline_fit_should_save_all_fitted_pipeline_steps(tmpdir: LocalPath): 79 | p: StepWithContext = create_pipeline(tmpdir) 80 | 81 | p = p.fit(np.array(range(10)), np.array(range(10))) 82 | p.save() 83 | 84 | saved_paths = [ 85 | create_root_path(tmpdir), create_pipeline2_path(tmpdir), 86 | create_some_step_path(tmpdir, step_no=0), create_some_step_path(tmpdir, step_no=1), 87 | create_some_step_path(tmpdir, step_no=2) 88 | ] 89 | for path in saved_paths: 90 | assert os.path.exists(path), path 91 | 92 | 93 | def test_pipeline_fit_transform_should_load_all_pipeline_steps(tmpdir: LocalPath): 94 | p = given_saved_pipeline(tmpdir) 95 | 96 | p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10))) 97 | 98 | assert np.array_equal(outputs, EXPECTED_OUTPUTS) 99 | 100 | 101 | def test_pipeline_transform_should_load_all_pipeline_steps(tmpdir: LocalPath): 102 | p = given_saved_pipeline(tmpdir) 103 | 104 | outputs = p.transform(np.array(range(10))) 105 | 106 | assert np.array_equal(outputs, EXPECTED_OUTPUTS) 107 | 108 | 109 | def test_pipeline_fit_should_load_all_pipeline_steps(tmpdir: LocalPath): 110 | p = given_saved_pipeline(tmpdir) 111 | 112 | p = p.fit(np.array(range(10)), np.array(range(10))) 113 | 114 | assert p.wrapped[SOME_STEPS[0]].hyperparams['multiply_by'] == 2 115 | assert p.wrapped[PIPELINE_2][SOME_STEPS[1]].hyperparams['multiply_by'] == 4 116 | assert p.wrapped[PIPELINE_2][SOME_STEPS[2]].hyperparams['multiply_by'] == 6 117 | 118 | 119 | def given_saved_pipeline(tmpdir: LocalPath) -> Pipeline: 120 | path = create_root_path(tmpdir, True) 121 | p = Pipeline([]).set_name(ROOT).with_context(CX(tmpdir)).with_context(CX(tmpdir)) 122 | dump(p, path) 123 | 124 | pipeline_2 = Pipeline([]).set_name(PIPELINE_2).with_context(CX(tmpdir)) 125 | pipeline_2.sub_steps_savers = [ 126 | (SOME_STEPS[0], []), 127 | (SOME_STEPS[1], []), 128 | ] 129 | dump(pipeline_2, create_pipeline2_path(tmpdir, True)) 130 | 131 | given_saved_some_step(multiply_by=2, step_no=0, path=create_some_step_path(tmpdir, step_no=0, create_dir=True)) 132 | given_saved_some_step(multiply_by=4, step_no=1, path=create_some_step_path(tmpdir, step_no=1, create_dir=True)) 133 | given_saved_some_step(multiply_by=6, step_no=2, path=create_some_step_path(tmpdir, step_no=2, create_dir=True)) 134 | 135 | p = create_pipeline(tmpdir) 136 | 137 | return p 138 | 139 | 140 | def create_pipeline(tmpdir) -> StepWithContext: 141 | return Pipeline([ 142 | (SOME_STEPS[0], MultiplyByN(multiply_by=2)), 143 | (PIPELINE_2, Pipeline([ 144 | (SOME_STEPS[1], MultiplyByN(multiply_by=4)), 145 | (SOME_STEPS[2], MultiplyByN(multiply_by=6)) 146 | ])) 147 | ]).set_name(ROOT).with_context(CX(tmpdir)) 148 | 149 | 150 | def given_saved_some_step(multiply_by, step_no, path): 151 | some_step1 = MultiplyByN(multiply_by=multiply_by) 152 | some_step1.name = SOME_STEPS[step_no] 153 | dump(some_step1, path) 154 | -------------------------------------------------------------------------------- /testing_neuraxle/test_truncable_steps.py: -------------------------------------------------------------------------------- 1 | from neuraxle.pipeline import Pipeline 2 | from neuraxle.steps.flow import TrainOnlyWrapper 3 | from testing_neuraxle.mocks.step_mocks import SomeSplitStep, SomeStep, SomeTruncableStep 4 | 5 | 6 | def test_truncable_steps_should_split_by_type(): 7 | pipeline = Pipeline([ 8 | SomeStep(), 9 | SomeStep(), 10 | SomeSplitStep(), 11 | SomeStep(), 12 | SomeStep(), 13 | SomeSplitStep(), 14 | SomeStep(), 15 | ]) 16 | 17 | sub_pipelines = pipeline.split(SomeSplitStep) 18 | 19 | assert 'SomeStep' in sub_pipelines[0] 20 | assert 'SomeStep1' in sub_pipelines[0] 21 | assert 'SomeSplitStep' in sub_pipelines[0] 22 | assert 'SomeStep2' in sub_pipelines[1] 23 | assert 'SomeStep3' in sub_pipelines[1] 24 | assert 'SomeSplitStep1' in sub_pipelines[1] 25 | assert 'SomeStep4' in sub_pipelines[2] 26 | 27 | 28 | def test_set_train_should_set_train_to_false(): 29 | pipeline = Pipeline([ 30 | SomeStep(), 31 | SomeStep(), 32 | Pipeline([ 33 | SomeStep(), 34 | ]) 35 | ]) 36 | 37 | pipeline.set_train(False) 38 | 39 | assert not pipeline.is_train 40 | assert not pipeline[0].is_train 41 | assert not pipeline[1].is_train 42 | assert not pipeline[2].is_train 43 | assert not pipeline[2][0].is_train 44 | 45 | 46 | def test_set_train_should_set_train_to_true(): 47 | pipeline = Pipeline([ 48 | SomeStep(), 49 | SomeStep(), 50 | Pipeline([ 51 | SomeStep(), 52 | ]) 53 | ]) 54 | 55 | assert pipeline.is_train 56 | assert pipeline[0].is_train 57 | assert pipeline[1].is_train 58 | assert pipeline[2].is_train 59 | assert pipeline[2][0].is_train 60 | 61 | 62 | def test_step_print_str_representation_works_correctly(): 63 | output = str(TrainOnlyWrapper(SomeTruncableStep())) 64 | assert output == """TrainOnlyWrapper(SomeTruncableStep([ 65 | SomeStepWithHyperparams(name='MockStep'), 66 | SomeStepWithHyperparams(name='MockStep1') 67 | ]))""" 68 | 69 | 70 | def test_step_repr_representation_works_correctly(): 71 | output = repr(TrainOnlyWrapper(SomeTruncableStep())) 72 | assert output == """TrainOnlyWrapper(SomeTruncableStep([ 73 | SomeStepWithHyperparams(name='MockStep', hyperparams=HyperparameterSamples([('learning_rate', 0.1), 74 | ('l2_weight_reg', 0.001), 75 | ('hidden_size', 32), 76 | ('num_layers', 3), 77 | ('num_lstm_layers', 1), 78 | ('use_xavier_init', True), 79 | ('use_max_pool_else_avg_pool', True), 80 | ('dropout_drop_proba', 0.5), 81 | ('momentum', 0.1)])), 82 | SomeStepWithHyperparams(name='MockStep1', hyperparams=HyperparameterSamples([('learning_rate', 0.1), 83 | ('l2_weight_reg', 0.001), 84 | ('hidden_size', 32), 85 | ('num_layers', 3), 86 | ('num_lstm_layers', 1), 87 | ('use_xavier_init', True), 88 | ('use_max_pool_else_avg_pool', True), 89 | ('dropout_drop_proba', 0.5), 90 | ('momentum', 0.1)])) 91 | ], hyperparams=HyperparameterSamples([('learning_rate', 0.1), 92 | ('l2_weight_reg', 0.001), 93 | ('hidden_size', 32), 94 | ('num_layers', 3), 95 | ('num_lstm_layers', 1), 96 | ('use_xavier_init', True), 97 | ('use_max_pool_else_avg_pool', True), 98 | ('dropout_drop_proba', 0.5), 99 | ('momentum', 0.1)])))""" 100 | -------------------------------------------------------------------------------- /testing_neuraxle/test_zip_data_container.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from neuraxle.data_container import ZipDataContainer, DACT 4 | 5 | 6 | def test_zip_data_container_should_merge_two_data_sources_together(): 7 | data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2)) 8 | data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10)) 9 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) 10 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) 11 | 12 | zip_data_container = ZipDataContainer.create_from(data_container, data_container_2d) 13 | 14 | assert zip_data_container.ids == data_container.ids 15 | for i, di in enumerate(zip_data_container.data_inputs): 16 | assert np.array_equal(di[0], data_inputs_3d[i]) 17 | assert np.array_equal(di[1], data_inputs_2d[i]) 18 | 19 | 20 | def test_zip_data_container_should_merge_1d_with_2d(): 21 | data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2)) 22 | data_inputs_1d, expected_outputs_1d = _create_data_source((10,)) 23 | data_container_1d = DACT(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d) 24 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) 25 | 26 | zip_data_container = ZipDataContainer.create_from(data_container, data_container_1d) 27 | 28 | assert zip_data_container.ids == data_container.ids 29 | for i, di in enumerate(zip_data_container.data_inputs): 30 | assert np.array_equal(di[0], data_inputs_3d[i]) 31 | assert np.array_equal(di[1], data_inputs_1d[i]) 32 | 33 | 34 | def test_zip_data_container_should_merge_multiple_data_sources_together(): 35 | data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2)) 36 | data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10)) 37 | data_inputs_1d, expected_outputs_1d = _create_data_source((10,)) 38 | data_container_1d = DACT(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d) 39 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) 40 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) 41 | 42 | zip_data_container = ZipDataContainer.create_from(data_container, data_container_2d, data_container_1d) 43 | 44 | assert zip_data_container.ids == data_container.ids 45 | for i, di in enumerate(zip_data_container.data_inputs): 46 | assert np.array_equal(di[0], data_inputs_3d[i]) 47 | assert np.array_equal(di[1], data_inputs_2d[i]) 48 | 49 | 50 | def test_zip_data_container_should_concatenate_inner_features(): 51 | data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2)) 52 | data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10)) 53 | data_container_2d = DACT(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) 54 | data_container = DACT(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) 55 | 56 | zip_data_container = ZipDataContainer.create_from(data_container, data_container_2d) 57 | zip_data_container.concatenate_inner_features() 58 | 59 | assert np.array_equal(np.array(zip_data_container.data_inputs)[..., -1], data_container_2d.data_inputs) 60 | assert np.array_equal(np.array(zip_data_container.expected_outputs), expected_outputs_3d) 61 | 62 | 63 | def _create_data_source(shape): 64 | data_inputs = np.random.random(shape).astype(np.float32) 65 | expected_outputs = np.random.random(shape).astype(np.float32) 66 | return data_inputs, expected_outputs 67 | --------------------------------------------------------------------------------