├── setup.cfg
├── hypernets
    ├── pipeline
    │   └── __init__.py
    ├── tests
    │   ├── board
    │   │   └── __init__.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── plain_model_test.py
    │   ├── experiment
    │   │   ├── __init__.py
    │   │   ├── run_export_experiment_report.py
    │   │   └── general_experiment_test.py
    │   ├── hyperctl
    │   │   ├── __init__.py
    │   │   ├── minimum_batch.json
    │   │   ├── plain_job_script.py
    │   │   ├── job_template.yml
    │   │   ├── test_cli.py
    │   │   ├── local_batch.json
    │   │   ├── remote_batch.json
    │   │   └── test_batch.py
    │   ├── pipeline
    │   │   └── __init__.py
    │   ├── tabular
    │   │   ├── __init__.py
    │   │   ├── ensemble
    │   │   │   └── __init__.py
    │   │   ├── lifelong_learning
    │   │   │   └── __init__.py
    │   │   ├── tb_cuml
    │   │   │   ├── __init__.py
    │   │   │   ├── data_cleaner_test.py
    │   │   │   ├── psudo_labeling_test.py
    │   │   │   ├── feature_importance_test.py
    │   │   │   └── cache_test.py
    │   │   ├── tb_dask
    │   │   │   ├── data_cleaner_test.py
    │   │   │   ├── psudo_labeling_test.py
    │   │   │   ├── feature_importance_test.py
    │   │   │   ├── __init__.py
    │   │   │   ├── toolbox_test.py
    │   │   │   └── cache_test.py
    │   │   ├── utils_test.py
    │   │   ├── cache_test.py
    │   │   ├── psudo_labeling_test.py
    │   │   └── toolbox_test.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   └── mutable_test.py
    │   ├── searchers
    │   │   ├── __init__.py
    │   │   ├── test_moo.py
    │   │   ├── playback_test.py
    │   │   └── test_genetic.py
    │   ├── trial
    │   │   ├── __init__.py
    │   │   └── trial_store_test.py
    │   ├── dispatchers
    │   │   ├── __init__.py
    │   │   └── process_test.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── perf_test.py
    │   │   ├── estimators_test.py
    │   │   ├── tic_toc_test.py
    │   │   ├── common_test.py
    │   │   └── tuning_test.py
    │   ├── __init__.py
    │   └── discriminators
    │   │   ├── base_test.py
    │   │   ├── __init__.py
    │   │   └── percentile.py
    ├── dispatchers
    │   ├── dask
    │   │   └── __init__.py
    │   ├── predict
    │   │   ├── __init__.py
    │   │   └── grpc
    │   │   │   ├── __init__.py
    │   │   │   ├── proto
    │   │   │       ├── __init__.py
    │   │   │       ├── readme.txt
    │   │   │       ├── predict.proto
    │   │   │       └── predict_pb2_grpc.py
    │   │   │   ├── predict_client.py
    │   │   │   └── predict_service.py
    │   ├── cluster
    │   │   ├── grpc
    │   │   │   ├── __init__.py
    │   │   │   └── proto
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── readme.txt
    │   │   │   │   └── spec.proto
    │   │   └── __init__.py
    │   ├── process
    │   │   ├── grpc
    │   │   │   ├── __init__.py
    │   │   │   └── proto
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── readme.txt
    │   │   │   │   └── proc.proto
    │   │   ├── __init__.py
    │   │   ├── grpc_process.py
    │   │   └── local_process.py
    │   ├── run_predict_server.py
    │   ├── run_broker.py
    │   ├── run_predict.py
    │   ├── __init__.py
    │   ├── cfg.py
    │   └── run.py
    ├── tabular
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── boston.csv.gz
    │   │   ├── bank-uci.csv.gz
    │   │   ├── adult-uci.csv.gz
    │   │   └── dsutils.py
    │   ├── lifelong_learning
    │   │   ├── _validation.py
    │   │   └── __init__.py
    │   ├── evaluator
    │   │   ├── __init__.py
    │   │   ├── h2o.py
    │   │   ├── tpot.py
    │   │   ├── auto_sklearn.py
    │   │   └── hyperdt.py
    │   ├── cuml_ex
    │   │   ├── __init__.py
    │   │   ├── _estimator_detector.py
    │   │   ├── _pseudo_labeling.py
    │   │   ├── _drift_detection.py
    │   │   ├── _data_hasher.py
    │   │   ├── _ensemble.py
    │   │   ├── _persistence.py
    │   │   ├── _dataframe_mapper.py
    │   │   └── _data_cleaner.py
    │   ├── ensemble
    │   │   ├── __init__.py
    │   │   ├── misc.py
    │   │   └── stacking.py
    │   ├── feature_generators
    │   │   ├── _base.py
    │   │   └── __init__.py
    │   ├── dask_ex
    │   │   ├── _collinearity.py
    │   │   ├── _feature_generators.py
    │   │   ├── _drift_detection.py
    │   │   ├── _model_selection.py
    │   │   ├── __init__.py
    │   │   ├── _data_hasher.py
    │   │   ├── _dataframe_mapper.py
    │   │   └── _data_cleaner.py
    │   └── __init__.py
    ├── hyperctl
    │   ├── __init__.py
    │   ├── consts.py
    │   ├── utils.py
    │   └── api.py
    ├── core
    │   ├── config.py
    │   ├── stateful.py
    │   ├── dispatcher.py
    │   ├── random_state.py
    │   ├── context.py
    │   ├── __init__.py
    │   ├── pareto.py
    │   ├── mutables.py
    │   ├── searcher.py
    │   ├── objective.py
    │   └── meta_learner.py
    ├── __init__.py
    ├── examples
    │   ├── __init__.py
    │   └── smoke_testing.py
    ├── server
    │   └── __init__.py
    ├── model
    │   └── __init__.py
    ├── conf
    │   └── __init__.py
    ├── experiment
    │   ├── __init__.py
    │   ├── general.py
    │   └── cfg.py
    ├── utils
    │   ├── const.py
    │   └── __init__.py
    ├── searchers
    │   ├── random_searcher.py
    │   ├── playback_searcher.py
    │   └── grid_searcher.py
    └── discriminators
    │   └── __init__.py
├── requirements-zhcn.txt
├── docs
    ├── source
    │   ├── examples.md
    │   ├── faq.md
    │   ├── tuning.md
    │   ├── _static
    │   │   └── css
    │   │   │   └── my_theme.css
    │   ├── images
    │   │   ├── DAT2.1.png
    │   │   ├── DAT2.5.png
    │   │   ├── Hypernets.png
    │   │   ├── moead_pbi.png
    │   │   ├── DAT_latest.png
    │   │   ├── enas_arch_1.png
    │   │   ├── enas_arch_2.png
    │   │   ├── crowding_distance.png
    │   │   ├── enas_arch_sample.png
    │   │   ├── nsga2_procedure.png
    │   │   ├── compete_experiment.png
    │   │   ├── connection_space_or.png
    │   │   ├── r_dominance_sorting.png
    │   │   ├── hypernets_search_space.png
    │   │   ├── notebook_plot_dataset.png
    │   │   ├── connection_space_repeat.png
    │   │   ├── excel_experiment_report.png
    │   │   ├── connection_space_optional.png
    │   │   ├── connection_space_permuation.png
    │   │   ├── connection_space_sequential.png
    │   │   ├── hyper_model_search_sequence.png
    │   │   ├── hypernets_conceptual_model.png
    │   │   ├── notebook_experiment_config.png
    │   │   ├── abstract_illustration_of_nas.png
    │   │   └── connection_space_inputchoice.png
    │   ├── modules.rst
    │   ├── release_notes.rst
    │   ├── hypernets.conf.rst
    │   ├── hypernets.server.rst
    │   ├── hypernets.tabular.cuml_ex.rst
    │   ├── hypernets.tabular.dask_ex.rst
    │   ├── hypernets.tabular.lifelong_learning.rst
    │   ├── hypernets.tabular.feature_generators.rst
    │   ├── hypernets.rst
    │   ├── hypernets.discriminators.rst
    │   ├── hypernets.tabular.datasets.rst
    │   ├── hypernets.dispatchers.dask.rst
    │   ├── hypernets.pipeline.rst
    │   ├── hypernets.dispatchers.predict.rst
    │   ├── hypernets.examples.rst
    │   ├── release_note_025.rst
    │   ├── hypernets.model.rst
    │   ├── release_note_030.rst
    │   ├── hypernets.dispatchers.cluster.grpc.proto.rst
    │   ├── hypernets.dispatchers.process.grpc.proto.rst
    │   ├── hypernets.dispatchers.predict.grpc.proto.rst
    │   ├── index.rst
    │   ├── hypernets.tabular.ensemble.rst
    │   ├── hypernets.dispatchers.predict.grpc.rst
    │   ├── hypernets.dispatchers.cluster.grpc.rst
    │   ├── hypernets.dispatchers.process.grpc.rst
    │   ├── hypernets.dispatchers.process.rst
    │   ├── hypernets.dispatchers.cluster.rst
    │   ├── hypermodels.rst
    │   ├── hypernets.experiment.rst
    │   ├── hypernets.utils.rst
    │   ├── hypernets.tabular.evaluator.rst
    │   ├── hypernets.dispatchers.rst
    │   ├── conf.py
    │   ├── overview.md
    │   ├── hypernets.hyperctl.rst
    │   └── hypernets.searchers.rst
    ├── requirements.txt
    ├── Makefile
    └── make.bat
├── requirements-fg.txt
├── requirements-tests.txt
├── requirements-board.txt
├── requirements-cuml.txt
├── DAT2.5.png
├── requirements-notebook.txt
├── requirements-extra.txt
├── requirements-dask.txt
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── 90-other-issues.md
    │   ├── 30-feature-request.md
    │   └── 00-bug-issue.md
    └── workflows
    │   └── dist-builder.yml
├── requirements.txt
├── .readthedocs.yml
├── CONTRIBUTING.md
└── .gitignore


/setup.cfg:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/tests/board/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/tests/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements-zhcn.txt:
--------------------------------------------------------------------------------
1 | jieba
2 | 


--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | # Exapmles
2 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/dask/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/tabular/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/tests/experiment/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/tests/hyperctl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/tests/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/source/faq.md:
--------------------------------------------------------------------------------
1 | # FAQ
2 | 
3 | ## How...


--------------------------------------------------------------------------------
/hypernets/dispatchers/predict/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/cluster/grpc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/predict/grpc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/process/grpc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements-fg.txt:
--------------------------------------------------------------------------------
1 | featuretools>=0.23.0
2 | 


--------------------------------------------------------------------------------
/requirements-tests.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | 


--------------------------------------------------------------------------------
/docs/source/tuning.md:
--------------------------------------------------------------------------------
1 | # Hyper-parameter Tuning
2 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/cluster/grpc/proto/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/predict/grpc/proto/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/process/grpc/proto/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements-board.txt:
--------------------------------------------------------------------------------
1 | # hboard
2 | ipywidgets
3 | 


--------------------------------------------------------------------------------
/requirements-cuml.txt:
--------------------------------------------------------------------------------
1 | cupy
2 | cudf
3 | cuml
4 | pynvml
5 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx_rtd_theme
2 | recommonmark
3 | 
4 | 


--------------------------------------------------------------------------------
/DAT2.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/DAT2.5.png


--------------------------------------------------------------------------------
/hypernets/hyperctl/__init__.py:
--------------------------------------------------------------------------------
1 | from . import api
2 | from .api import get_job_params
3 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/my_theme.css:
--------------------------------------------------------------------------------
1 | .wy-nav-content {
2 |   max-width: 1080px !important;
3 | }


--------------------------------------------------------------------------------
/hypernets/core/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/requirements-notebook.txt:
--------------------------------------------------------------------------------
1 | jupyterlab
2 | ipywidgets
3 | jupyterlab_widgets
4 | # hboard-widget
5 | 


--------------------------------------------------------------------------------
/hypernets/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | __version__ = '0.3.2'
4 | 


--------------------------------------------------------------------------------
/hypernets/examples/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/hypernets/server/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/hypernets/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/hypernets/tests/searchers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/hypernets/tests/trial/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/hypernets/tests/dispatchers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/hypernets/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """
6 | 


--------------------------------------------------------------------------------
/docs/source/images/DAT2.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/DAT2.1.png


--------------------------------------------------------------------------------
/docs/source/images/DAT2.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/DAT2.5.png


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | hypernets
2 | =========
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    hypernets
8 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/ensemble/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/requirements-extra.txt:
--------------------------------------------------------------------------------
1 | paramiko
2 | #protobuf<4.0
3 | #grpcio>=1.24.0
4 | s3fs
5 | python-geohash
6 | #pyarrow
7 | 


--------------------------------------------------------------------------------
/docs/source/images/Hypernets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/Hypernets.png


--------------------------------------------------------------------------------
/docs/source/images/moead_pbi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/moead_pbi.png


--------------------------------------------------------------------------------
/docs/source/images/DAT_latest.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/DAT_latest.png


--------------------------------------------------------------------------------
/docs/source/images/enas_arch_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/enas_arch_1.png


--------------------------------------------------------------------------------
/docs/source/images/enas_arch_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/enas_arch_2.png


--------------------------------------------------------------------------------
/hypernets/tests/tabular/lifelong_learning/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """


--------------------------------------------------------------------------------
/hypernets/tabular/lifelong_learning/_validation.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/source/images/crowding_distance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/crowding_distance.png


--------------------------------------------------------------------------------
/docs/source/images/enas_arch_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/enas_arch_sample.png


--------------------------------------------------------------------------------
/docs/source/images/nsga2_procedure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/nsga2_procedure.png


--------------------------------------------------------------------------------
/hypernets/tabular/datasets/boston.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/hypernets/tabular/datasets/boston.csv.gz


--------------------------------------------------------------------------------
/docs/source/images/compete_experiment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/compete_experiment.png


--------------------------------------------------------------------------------
/docs/source/images/connection_space_or.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_or.png


--------------------------------------------------------------------------------
/docs/source/images/r_dominance_sorting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/r_dominance_sorting.png


--------------------------------------------------------------------------------
/hypernets/tabular/datasets/bank-uci.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/hypernets/tabular/datasets/bank-uci.csv.gz


--------------------------------------------------------------------------------
/docs/source/images/hypernets_search_space.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/hypernets_search_space.png


--------------------------------------------------------------------------------
/docs/source/images/notebook_plot_dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/notebook_plot_dataset.png


--------------------------------------------------------------------------------
/hypernets/tabular/datasets/adult-uci.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/hypernets/tabular/datasets/adult-uci.csv.gz


--------------------------------------------------------------------------------
/docs/source/images/connection_space_repeat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_repeat.png


--------------------------------------------------------------------------------
/docs/source/images/excel_experiment_report.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/excel_experiment_report.png


--------------------------------------------------------------------------------
/docs/source/images/connection_space_optional.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_optional.png


--------------------------------------------------------------------------------
/docs/source/images/connection_space_permuation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_permuation.png


--------------------------------------------------------------------------------
/docs/source/images/connection_space_sequential.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_sequential.png


--------------------------------------------------------------------------------
/docs/source/images/hyper_model_search_sequence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/hyper_model_search_sequence.png


--------------------------------------------------------------------------------
/docs/source/images/hypernets_conceptual_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/hypernets_conceptual_model.png


--------------------------------------------------------------------------------
/docs/source/images/notebook_experiment_config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/notebook_experiment_config.png


--------------------------------------------------------------------------------
/docs/source/images/abstract_illustration_of_nas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/abstract_illustration_of_nas.png


--------------------------------------------------------------------------------
/docs/source/images/connection_space_inputchoice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_inputchoice.png


--------------------------------------------------------------------------------
/hypernets/tabular/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """
6 | 
7 | from ._base import BaseEstimator, Evaluator
8 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/cluster/__init__.py:
--------------------------------------------------------------------------------
1 | from .cluster import Cluster
2 | from .driver_dispatcher import DriverDispatcher
3 | from .executor_dispatcher import ExecutorDispatcher
4 | 


--------------------------------------------------------------------------------
/hypernets/tabular/lifelong_learning/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """
6 | 
7 | from ._split import PrequentialSplit,select_valid_oof


--------------------------------------------------------------------------------
/hypernets/model/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'yangjian'
3 | """
4 | 
5 | """
6 | 
7 | from .estimator import CrossValidationEstimator,Estimator
8 | from .hyper_model import HyperModel


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | """
3 | 
4 | """
5 | from ._toolbox import CumlToolBox
6 | from ._transformer import Localizable, copy_attrs_as_local, as_local_if_possible
7 | 


--------------------------------------------------------------------------------
/requirements-dask.txt:
--------------------------------------------------------------------------------
1 | dask!=2023.2.1,!=2023.3.*,!=2023.4.*,!=2023.5.*,<2024.5.0
2 | distributed!=2023.2.1,!=2023.3.*,!=2023.4.*,!=2023.5.*,<2024.5.0
3 | #dask<=2023.2.0
4 | #distributed<=2023.2.0
5 | dask-ml<2025.0.0
6 | 


--------------------------------------------------------------------------------
/docs/source/release_notes.rst:
--------------------------------------------------------------------------------
 1 | Release Notes
 2 | =============
 3 | 
 4 | Releasing history:
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 1
 8 | 
 9 |    v0.2.5 <release_note_025.rst>
10 |    v0.3.0 <release_note_030.rst>
11 | 


--------------------------------------------------------------------------------
/hypernets/conf/__init__.py:
--------------------------------------------------------------------------------
1 | from traitlets import Unicode, Unicode as String, Bool, Int, Float, Enum, List, Dict, Union
2 | 
3 | from ._configuration import Configurable, configure, observe, configure_and_observe, generate_config_file
4 | 


--------------------------------------------------------------------------------
/hypernets/tests/hyperctl/minimum_batch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "job_command": "pwd",
 3 |     "jobs": [
 4 |         {
 5 |             "params": {
 6 |                 "learning_rate": 0.1
 7 |             },
 8 | 
 9 |         }
10 |     ]
11 | }


--------------------------------------------------------------------------------
/docs/source/hypernets.conf.rst:
--------------------------------------------------------------------------------
 1 | hypernets.conf package
 2 | ======================
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: hypernets.conf
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.server.rst:
--------------------------------------------------------------------------------
 1 | hypernets.server package
 2 | ========================
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: hypernets.server
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 


--------------------------------------------------------------------------------
/hypernets/tests/hyperctl/plain_job_script.py:
--------------------------------------------------------------------------------
 1 | from hypernets import hyperctl
 2 | 
 3 | 
 4 | def main():
 5 |     params = hyperctl.get_job_params()
 6 |     assert params
 7 |     print(params)
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     main()
12 | 


--------------------------------------------------------------------------------
/hypernets/tabular/ensemble/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | from .base_ensemble import BaseEnsemble
 8 | from .stacking import StackingEnsemble
 9 | from .voting import AveragingEnsemble, GreedyEnsemble
10 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/90-other-issues.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Other Issues
 3 | about: Use this template for any other non-support related issues
 4 | labels: 'type:others'
 5 | 
 6 | ---
 7 | 
 8 | This template is for miscellaneous issues not covered by the other issue categories.
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.tabular.cuml_ex.rst:
--------------------------------------------------------------------------------
 1 | hypernets.tabular.cuml\_ex package
 2 | ==================================
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: hypernets.tabular.cuml_ex
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.tabular.dask_ex.rst:
--------------------------------------------------------------------------------
 1 | hypernets.tabular.dask\_ex package
 2 | ==================================
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: hypernets.tabular.dask_ex
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 


--------------------------------------------------------------------------------
/hypernets/core/stateful.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | 
 7 | class Stateful:
 8 |     def __init__(self):
 9 |         pass
10 | 
11 |     def load_state(self):
12 |         pass
13 | 
14 |     def save_state(self):
15 |         pass
16 | 
17 |     def get_state_path(self):
18 |         pass
19 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.tabular.lifelong_learning.rst:
--------------------------------------------------------------------------------
 1 | hypernets.tabular.lifelong\_learning package
 2 | ============================================
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: hypernets.tabular.lifelong_learning
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.tabular.feature_generators.rst:
--------------------------------------------------------------------------------
 1 | hypernets.tabular.feature\_generators package
 2 | =============================================
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: hypernets.tabular.feature_generators
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/cluster/grpc/proto/readme.txt:
--------------------------------------------------------------------------------
1 | requirements:
2 |     grpcio
3 |     grpcio-tools [ need to run protoc ]
4 | 
5 | run the following command to re-generate protobuf stub code for python:
6 | 
7 | python -m grpc_tools.protoc  --python_out=. --grpc_python_out=. -I. hypernets/dispatchers/cluster/grpc/proto/spec.proto
8 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/process/grpc/proto/readme.txt:
--------------------------------------------------------------------------------
1 | requirements:
2 |     grpcio
3 |     grpcio-tools [ need to run protoc ]
4 | 
5 | run the following command to re-generate protobuf stub code for python:
6 | 
7 | python -m grpc_tools.protoc  --python_out=. --grpc_python_out=. -I. hypernets/dispatchers/process/grpc/proto/proc.proto
8 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/predict/grpc/proto/readme.txt:
--------------------------------------------------------------------------------
1 | requirements:
2 |     grpcio
3 |     grpcio-tools [ need to run protoc ]
4 | 
5 | run the following command to re-generate protobuf stub code for python:
6 | 
7 | python -m grpc_tools.protoc  --python_out=. --grpc_python_out=. -I. hypernets/dispatchers/predict/grpc/proto/predict.proto
8 | 


--------------------------------------------------------------------------------
/hypernets/tabular/ensemble/misc.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from sklearn.metrics._scorer import _PredictScorer
 3 | 
 4 | 
 5 |     def is_predict_scorer(s):
 6 |         return isinstance(s, _PredictScorer)
 7 | except ImportError:
 8 |     # sklearn 1.4.0 +
 9 |     def is_predict_scorer(s):
10 |         return getattr(s, '_response_method', '') == 'predict'
11 | 


--------------------------------------------------------------------------------
/hypernets/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | import os
 7 | import tempfile
 8 | import time
 9 | 
10 | test_output_dir = tempfile.mkdtemp(prefix=time.strftime("hyn_test_%m%d%H%M_"))
11 | 
12 | os.environ['DEEPTABLES_HOME'] = test_output_dir
13 | os.environ['HYPERNETS_HOME'] = test_output_dir
14 | 


--------------------------------------------------------------------------------
/hypernets/tests/utils/perf_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from collections import OrderedDict
 3 | 
 4 | import psutil
 5 | 
 6 | from hypernets.utils import get_perf
 7 | 
 8 | 
 9 | def test_get_perf():
10 |     proc = psutil.Process(os.getpid())
11 |     perf = get_perf(proc)
12 |     assert isinstance(perf, OrderedDict)
13 |     assert 'cpu_total' in perf.keys()
14 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.rst:
--------------------------------------------------------------------------------
 1 | hypernets package
 2 | =================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    hypernets.experiment
11 |    hypernets.searchers
12 | 
13 | 
14 | Module contents
15 | ---------------
16 | 
17 | .. automodule:: hypernets
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.16.5,<2.0.0
 2 | pandas>=0.25.3
 3 | scikit-learn>=0.22.1,<1.6.0
 4 | scipy
 5 | lightgbm>=2.2.0
 6 | fsspec>=0.8.0
 7 | ipython
 8 | traitlets
 9 | XlsxWriter>=3.0.2
10 | psutil
11 | joblib; python_version >= '3.8' or platform_system != 'Windows'
12 | joblib<1.3.0; python_version < '3.8' and platform_system == 'Windows'
13 | pyyaml
14 | paramiko
15 | requests
16 | tornado
17 | prettytable
18 | tqdm
19 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_cuml/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import pytest
 6 | 
 7 | from hypernets.tabular import is_cuml_installed
 8 | 
 9 | if is_cuml_installed:
10 |     import cupy
11 | 
12 |     if_cuml_ready = pytest.mark.skipif(not cupy.cuda.is_available(), reason='Cuda is not available')
13 | else:
14 |     if_cuml_ready = pytest.mark.skipif(not is_cuml_installed, reason='Cuml is not installed')
15 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_cuml/data_cleaner_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from . import if_cuml_ready, is_cuml_installed
 6 | from ..data_cleaner_test import TestDataCleaner as _TestDataCleaner
 7 | 
 8 | if is_cuml_installed:
 9 |     import cudf
10 | 
11 | 
12 | @if_cuml_ready
13 | class TestCumlDataCleaner(_TestDataCleaner):
14 |     @staticmethod
15 |     def load_data():
16 |         return cudf.from_pandas(_TestDataCleaner.load_data())
17 | 


--------------------------------------------------------------------------------
/hypernets/core/dispatcher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | 
 7 | class Dispatcher(object):
 8 |     def __init__(self):
 9 |         super(Dispatcher, self).__init__()
10 | 
11 |     def dispatch(self, hyper_model, X, y, X_val, y_val, X_test, cv, num_folds, max_trials, dataset_id, trial_store,
12 |                  **fit_kwargs):
13 |         raise NotImplemented()
14 | 
15 |     # def run_trial(self, space_sample, trial_no, X, y, X_val, y_val, **fit_kwargs):
16 |     #     pass
17 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/predict/grpc/proto/predict.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | 
 4 | package hypernets.dispatchers.predict.grpc.proto;
 5 | 
 6 | 
 7 | service PredictService {
 8 |   rpc predict(PredictRequest) returns (PredictResponse) {}
 9 | }
10 | 
11 | message PredictRequest {
12 |   string data_file = 1;
13 |   string result_file = 2;
14 | }
15 | 
16 | message PredictResponse {
17 |   string data_file = 1;
18 |   string result_file = 2;
19 |   int32  code = 3;
20 |   string message = 4;
21 | }
22 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/process/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | from .local_process import LocalProcess
 4 | 
 5 | try:
 6 |     from .grpc_process import GrpcProcess
 7 | except ImportError:
 8 |     pass
 9 | except:
10 |     from hypernets.utils import logging
11 |     import sys
12 | 
13 |     logger = logging.get_logger(__name__)
14 |     logger.warning('Failed to load GrpcProcess', exc_info=sys.exc_info())
15 | 
16 | try:
17 |     from .ssh_process import SshProcess
18 | except ImportError:
19 |     pass
20 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_dask/data_cleaner_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from . import if_dask_ready, is_dask_installed
 6 | from ..data_cleaner_test import TestDataCleaner as _TestDataCleaner
 7 | 
 8 | if is_dask_installed:
 9 |     import dask.dataframe as dd
10 | 
11 | 
12 | @if_dask_ready
13 | class TestDaskDataCleaner(_TestDataCleaner):
14 |     @staticmethod
15 |     def load_data():
16 |         df = _TestDataCleaner.load_data()
17 |         return dd.from_pandas(df, npartitions=2)
18 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | # Build documentation in the docs/ directory with Sphinx
 4 | sphinx:
 5 |   configuration: docs/source/conf.py
 6 | 
 7 | # Build documentation with MkDocs
 8 | #mkdocs:
 9 | #  configuration: mkdocs.yml
10 | 
11 | # Optionally build your docs in additional formats such as PDF and ePub
12 | formats: all
13 | 
14 | # Optionally set the version of Python and requirements required to build your docs
15 | python:
16 |   version: 3.6
17 |   install:
18 |     - requirements: requirements.txt
19 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_dask/psudo_labeling_test.py:
--------------------------------------------------------------------------------
 1 | from . import if_dask_ready, is_dask_installed, setup_dask
 2 | from ..psudo_labeling_test import TestPseudoLabeling as _TestPseudoLabeling
 3 | 
 4 | if is_dask_installed:
 5 |     import dask.dataframe as dd
 6 | 
 7 | 
 8 | @if_dask_ready
 9 | class TestDaskPseudoLabeling(_TestPseudoLabeling):
10 |     @staticmethod
11 |     def load_data():
12 |         setup_dask(None)
13 |         df = _TestPseudoLabeling.load_data()
14 |         return dd.from_pandas(df, npartitions=2)
15 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/30-feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request
 3 | about: Use this template for raising a feature request
 4 | labels: 'type:feature'
 5 | 
 6 | ---
 7 | 
 8 | <em>Please make sure that this is a feature request.</em>
 9 | 
10 | **System information**
11 | - Hypernets version (you are using):
12 | - Are you willing to contribute it (Yes/No):
13 | 
14 | 
15 | **Describe the feature and the current behavior/state.**
16 | 
17 | 
18 | **Will this change the current api? How?**
19 | 
20 | 
21 | **Any Other info.**
22 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.discriminators.rst:
--------------------------------------------------------------------------------
 1 | hypernets.discriminators package
 2 | ================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.discriminators.percentile module
 8 | ------------------------------------------
 9 | 
10 | .. automodule:: hypernets.discriminators.percentile
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: hypernets.discriminators
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.tabular.datasets.rst:
--------------------------------------------------------------------------------
 1 | hypernets.tabular.datasets package
 2 | ==================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.tabular.datasets.dsutils module
 8 | -----------------------------------------
 9 | 
10 | .. automodule:: hypernets.tabular.datasets.dsutils
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: hypernets.tabular.datasets
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_cuml/psudo_labeling_test.py:
--------------------------------------------------------------------------------
 1 | from . import if_cuml_ready, is_cuml_installed
 2 | from ..psudo_labeling_test import TestPseudoLabeling as _TestPseudoLabeling
 3 | 
 4 | if is_cuml_installed:
 5 |     import cudf
 6 | 
 7 | 
 8 | @if_cuml_ready
 9 | class TestCumlPseudoLabeling(_TestPseudoLabeling):
10 | 
11 |     @staticmethod
12 |     def load_data():
13 |         df = _TestPseudoLabeling.load_data()
14 |         return cudf.from_pandas(df)
15 | 
16 |     @staticmethod
17 |     def is_quantile_exact():
18 |         return False
19 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.dask.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.dask package
 2 | ==================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.dispatchers.dask.dask\_dispatcher module
 8 | --------------------------------------------------
 9 | 
10 | .. automodule:: hypernets.dispatchers.dask.dask_dispatcher
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: hypernets.dispatchers.dask
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/hypernets/tabular/feature_generators/_base.py:
--------------------------------------------------------------------------------
 1 | import featuretools as ft
 2 | from hypernets.utils import Version
 3 | 
 4 | FT_V0 = Version(ft.__version__) < Version('1.0')
 5 | 
 6 | if FT_V0:
 7 |     from featuretools.variable_types import Categorical, LatLong, NaturalLanguage, Datetime, Numeric, Unknown
 8 | 
 9 | 
10 |     def ColumnSchema(*, logical_type, semantic_tags=None):
11 |         return logical_type
12 | 
13 | else:
14 |     from woodwork.logical_types import Categorical, LatLong, NaturalLanguage, Datetime, Double as Numeric, Unknown
15 |     from woodwork.column_schema import ColumnSchema
16 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_cuml/feature_importance_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | from ..feature_importance_test import TestPermutationImportance as _TestPermutationImportance
 7 | from . import if_cuml_ready, is_cuml_installed
 8 | 
 9 | if is_cuml_installed:
10 |     import cudf
11 | 
12 | 
13 | @if_cuml_ready
14 | class TestCumlPermutationImportance(_TestPermutationImportance):
15 |     @staticmethod
16 |     def load_data():
17 |         df = _TestPermutationImportance.load_data()
18 |         df = cudf.from_pandas(df)
19 |         return df
20 | 


--------------------------------------------------------------------------------
/hypernets/tabular/dask_ex/_collinearity.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | import dask
 7 | 
 8 | from ._transformers import SafeOrdinalEncoder
 9 | from ..collinearity import MultiCollinearityDetector
10 | 
11 | 
12 | class DaskMultiCollinearityDetector(MultiCollinearityDetector):
13 |     def _value_counts(self, X):
14 |         n_values = super()._value_counts(X)
15 |         return dask.compute(*n_values)
16 | 
17 |     def _corr(self, X, method=None):
18 |         Xt = SafeOrdinalEncoder().fit_transform(X)
19 |         corr = Xt.corr(method='pearson' if method is None else method).compute().values
20 |         return corr
21 | 


--------------------------------------------------------------------------------
/hypernets/tests/searchers/test_moo.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from hypernets.core.pareto import pareto_dominate
 4 | from hypernets.searchers.genetic import Individual
 5 | 
 6 | 
 7 | def test_dominate():
 8 |     s1 = np.array([0.5, 0.6])
 9 |     s2 = np.array([0.4, 0.6])
10 |     assert pareto_dominate(s2, s1)
11 | 
12 |     s3 = np.array([0.3, 0.7])
13 |     assert not pareto_dominate(s2, s3)
14 | 
15 |     s4 = np.array([0.2, 0.5])
16 |     assert not pareto_dominate(s3, s4)
17 | 
18 |     # different direction
19 |     s5 = np.array([0.8, 100])
20 |     s6 = np.array([0.7, 101])
21 |     assert pareto_dominate(s5, s6, directions=('max', 'min'))
22 | 


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/_estimator_detector.py:
--------------------------------------------------------------------------------
 1 | from ..estimator_detector import EstimatorDetector
 2 | 
 3 | 
 4 | class CumlEstimatorDetector(EstimatorDetector):
 5 |     def __call__(self, *args, **kwargs):
 6 |         from .. import CumlToolBox
 7 |         result = super(CumlEstimatorDetector, self).__call__(*args, **kwargs)
 8 | 
 9 |         estimator = self.create_estimator(self.get_estimator_cls())
10 |         X, y = self.prepare_data()
11 |         X, y = CumlToolBox.from_local(X, y)
12 | 
13 |         try:
14 |             self.fit_estimator(estimator, X, y)
15 |             result.add('fitted_with_cuml')
16 |         except:
17 |             pass
18 | 
19 |         return result
20 | 


--------------------------------------------------------------------------------
/hypernets/tests/hyperctl/job_template.yml:
--------------------------------------------------------------------------------
 1 | params:
 2 |     learning_rate: [0.1,0.5]
 3 |     dataset: ['path/d1.csv','path/d2.csv']
 4 | 
 5 | command: python3 cli.py
 6 | working_dir: /tmp/code
 7 | 
 8 | resource:
 9 |   cpu: 2
10 |   ram: 1024
11 |   gpu: 1
12 | 
13 | server:
14 |   port: 8060
15 | 
16 | scheduler:
17 |     interval: 1
18 |     exit_on_finish: True
19 | 
20 | backend:
21 |   type: remote
22 |   conf:
23 |     machines:
24 |       - connection:
25 |             hostname: host1
26 |             username: hyperctl
27 |             ssh_rsa_file: ~/.ssh/id_rsa
28 |       - connection:
29 |             hostname: host2
30 |             username: hyperctl
31 |             password: hyperctl
32 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_dask/feature_importance_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | import pytest
 7 | 
 8 | from . import if_dask_ready, is_dask_installed
 9 | from ..feature_importance_test import TestPermutationImportance as _TestPermutationImportance
10 | 
11 | if is_dask_installed:
12 |     import dask.dataframe as dd
13 | 
14 | 
15 | @if_dask_ready
16 | @pytest.mark.xfail(reasone='to be fixed')
17 | class TestDaskPermutationImportance(_TestPermutationImportance):
18 |     @staticmethod
19 |     def load_data():
20 |         df = _TestPermutationImportance.load_data()
21 |         df = dd.from_pandas(df, npartitions=2)
22 |         return df
23 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.pipeline.rst:
--------------------------------------------------------------------------------
 1 | hypernets.pipeline package
 2 | ==========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.pipeline.base module
 8 | ------------------------------
 9 | 
10 | .. automodule:: hypernets.pipeline.base
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.pipeline.transformers module
16 | --------------------------------------
17 | 
18 | .. automodule:: hypernets.pipeline.transformers
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: hypernets.pipeline
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/hypernets/core/random_state.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | import numpy as np
 7 | 
 8 | _hypernets_random_state = None
 9 | 
10 | 
11 | def set_random_state(seed):
12 |     global _hypernets_random_state
13 |     if seed is None:
14 |         _hypernets_random_state = None
15 |     else:
16 |         _hypernets_random_state = np.random.RandomState(seed=seed)
17 | 
18 | 
19 | def get_random_state():
20 |     global _hypernets_random_state
21 |     if _hypernets_random_state is None:
22 |         return np.random.RandomState()
23 |     else:
24 |         return _hypernets_random_state
25 | 
26 | 
27 | def randint():
28 |     return get_random_state().randint(0, 65535)
29 | 


--------------------------------------------------------------------------------
/hypernets/tabular/dask_ex/_feature_generators.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | from ..feature_generators import FeatureGenerationTransformer
 7 | from ..feature_generators import is_feature_generator_ready as _is_feature_generator_ready
 8 | 
 9 | is_feature_generator_ready = _is_feature_generator_ready
10 | 
11 | 
12 | class DaskFeatureGenerationTransformer(FeatureGenerationTransformer):
13 |     def _fix_input(self, X, y, for_fit=True):
14 |         from ._toolbox import DaskToolBox
15 | 
16 |         X, y = super()._fix_input(X, y, for_fit=for_fit)
17 |         X, y = [DaskToolBox.make_divisions_known(t) if DaskToolBox.is_dask_object(t) else t for t in (X, y)]
18 | 
19 |         return X, y
20 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.predict.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.predict package
 2 | =====================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    hypernets.dispatchers.predict.grpc
11 | 
12 | Submodules
13 | ----------
14 | 
15 | hypernets.dispatchers.predict.predict\_helper module
16 | ----------------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.predict.predict_helper
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: hypernets.dispatchers.predict
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.examples.rst:
--------------------------------------------------------------------------------
 1 | hypernets.examples package
 2 | ==========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.examples.plain\_model module
 8 | --------------------------------------
 9 | 
10 | .. automodule:: hypernets.examples.plain_model
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.examples.smoke\_testing module
16 | ----------------------------------------
17 | 
18 | .. automodule:: hypernets.examples.smoke_testing
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: hypernets.examples
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/run_predict_server.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import argparse
 3 | 
 4 | from hypernets.dispatchers.predict.grpc.predict_service import serve
 5 | 
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser('start predict server.')
 9 |     parser.add_argument('--port', '-port',
10 |                         type=int, default=8030,
11 |                         help='tcp port of the predict server')
12 |     args, argv = parser.parse_known_args()
13 | 
14 |     server, _ = serve(f'0.0.0.0:{args.port}', ' '.join(argv))
15 |     server.wait_for_termination()
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     try:
20 |         main()
21 |         print('done')
22 |     except KeyboardInterrupt as e:
23 |         print(e)
24 | 


--------------------------------------------------------------------------------
/hypernets/tabular/dask_ex/_drift_detection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | from hypernets.utils import logging
 7 | from ..drift_detection import FeatureSelectorWithDriftDetection, DriftDetector
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class DaskFeatureSelectionWithDriftDetector(FeatureSelectorWithDriftDetection):
13 |     parallelizable = False
14 | 
15 |     @staticmethod
16 |     def get_detector(preprocessor=None, estimator=None, random_state=9527):
17 |         return DaskDriftDetector(preprocessor=preprocessor, estimator=estimator, random_state=random_state)
18 | 
19 | 
20 | class DaskDriftDetector(DriftDetector):
21 |     @staticmethod
22 |     def _copy_data(X):
23 |         return X.copy()
24 | 


--------------------------------------------------------------------------------
/hypernets/core/context.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class Context(metaclass=abc.ABCMeta):
 5 | 
 6 |     def get(self, key):
 7 |         raise NotImplementedError
 8 | 
 9 |     def put(self, key, value):
10 |         raise NotImplementedError
11 | 
12 | 
13 | class DefaultContext(Context):
14 | 
15 |     def __init__(self):
16 |         super(DefaultContext, self).__init__()
17 |         self._map = {}
18 | 
19 |     def put(self, key, value):
20 |         self._map[key] = value
21 | 
22 |     def get(self, key):
23 |         return self._map.get(key)
24 | 
25 |     # def __getstate__(self):
26 |     #     states = dict(self.__dict__)
27 |     #     if '_map' in states:  # mark _map as transient
28 |     #         states['_map'] = {}
29 |     #     return states
30 | 


--------------------------------------------------------------------------------
/hypernets/experiment/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | from ._experiment import Experiment, ExperimentCallback
 8 | from .general import GeneralExperiment
 9 | from .compete import CompeteExperiment, SteppedExperiment, StepNames
10 | from ._extractor import ExperimentExtractor, ExperimentMeta, DatasetMeta, StepMeta, \
11 |     StepType, EarlyStoppingStatusMeta, EarlyStoppingConfigMeta, ConfusionMatrixMeta
12 | from ._callback import ConsoleCallback, SimpleNotebookCallback, MLReportCallback, \
13 |     MLEvaluateCallback, ResourceUsageMonitor, ABSExpVisExperimentCallback, ABSExpVisHyperModelCallback, ActionType
14 | from ._maker import make_experiment, default_experiment_callbacks, default_search_callbacks
15 | 


--------------------------------------------------------------------------------
/docs/source/release_note_025.rst:
--------------------------------------------------------------------------------
 1 | Version 0.2.5
 2 | -------------
 3 | 
 4 | We add a few new features to this version:
 5 | 
 6 | * Toolbox: A general computing layer for tabular data
 7 |   - Provide implementations of pandas, dask and cudf data types 
 8 |     - DefaultToolbox (Numpy + Pandas + Sklearn)
 9 |     - DaskToolbox (DaskCore + DaskML)
10 |     - CumlToolBox (Cupy + Cudf + Cuml)
11 | 
12 | 
13 | * HyperCtl: A tool package for multi-job management
14 |   - Support sequencial jobs with multi-parameter settings
15 |   - Support parallel jobs in remote multi-machines
16 |  
17 |  
18 | * Export experiment report (.xlsx)
19 |   - Include information of engineering features, ensembled models, evaluation scores, resource usages, etc.
20 |   - Generate plots automatically 
21 | 


--------------------------------------------------------------------------------
/hypernets/utils/const.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | TASK_AUTO = 'auto'
 8 | TASK_BINARY = 'binary'
 9 | TASK_MULTICLASS = 'multiclass'
10 | TASK_REGRESSION = 'regression'
11 | TASK_MULTILABEL = 'multilabel'
12 | 
13 | 
14 | COLUMNNAME_POSTFIX_DISCRETE = '_discrete'
15 | COLUMNNAME_POSTFIX_CATEGORIZE = '_cat'
16 | 
17 | # DATATYPE_TENSOR_FLOAT = 'float32'
18 | # DATATYPE_PREDICT_CLASS = 'int32'
19 | DATATYPE_LABEL = 'int16'
20 | 
21 | 
22 | SEARCHER_SOO = "soo"
23 | SEARCHER_MOO = "moo"
24 | 
25 | COMBINATION_SHUFFLE = "shuffle"
26 | COMBINATION_UNIFORM = "uniform"
27 | COMBINATION_SINGLE_POINT = "single_point"
28 | 
29 | DECOMPOSITION_TCHE = "tchebicheff"
30 | DECOMPOSITION_WS = "weighted_sum"
31 | DECOMPOSITION_PBI = "pbi"
32 | 


--------------------------------------------------------------------------------
/hypernets/tests/hyperctl/test_cli.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from pathlib import Path
 4 | 
 5 | from hypernets.hyperctl import cli, utils
 6 | 
 7 | SRC_DIR = Path(__file__).parent
 8 | 
 9 | 
10 | def test_run_generate_job_specs():
11 |     batch_config_path = (SRC_DIR / "job_template.yml").as_posix()
12 |     fd, fp = tempfile.mkstemp(prefix="jobs_spec_", suffix=".json")
13 |     os.close(fd)
14 |     os.remove(fp)
15 | 
16 |     cli.run_generate_job_specs(batch_config_path, fp)
17 |     fp_ = Path(fp)
18 | 
19 |     assert fp_.exists()
20 |     jobs_spec = utils.load_json(fp)
21 |     assert len(jobs_spec['jobs']) == 4
22 |     assert 'server' in jobs_spec
23 |     assert 'name' in jobs_spec
24 |     assert len(jobs_spec['backend']['conf']['machines']) == 2
25 |     os.remove(fp_)
26 | 


--------------------------------------------------------------------------------
/hypernets/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from .search_space import HyperNode, HyperSpace, ParameterSpace, ModuleSpace, \
 6 |     Int, Real, Bool, Constant, Choice, MultipleChoice, Dynamic, Cascade, get_default_space
 7 | from .ops import HyperInput, Identity, ConnectionSpace, Optional, ModuleChoice, Sequential, Permutation, \
 8 |     Repeat, InputChoice, ConnectLooseEnd, Reduction
 9 | from .searcher import OptimizeDirection, Searcher
10 | from .callbacks import Callback, FileStorageLoggingCallback, SummaryCallback, \
11 |     EarlyStoppingCallback, EarlyStoppingError, NotebookCallback, ProgressiveCallback
12 | from .trial import Trial, TrialStore, TrialHistory, DiskTrialStore
13 | from .dispatcher import Dispatcher
14 | from .random_state import set_random_state, get_random_state, randint
15 | 


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/_pseudo_labeling.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | from hypernets.utils import logging
 7 | from ..pseudo_labeling import PseudoLabeling
 8 | 
 9 | logger = logging.get_logger(__name__)
10 | 
11 | 
12 | class CumlPseudoLabeling(PseudoLabeling):
13 |     import cupy as np
14 | 
15 |     def _filter_by_quantile(self, proba):
16 |         """
17 |         cupy does not support *nanquantile*
18 |         """
19 |         np = self.np
20 | 
21 |         q = []
22 |         for i in range(proba.shape[1]):
23 |             p = proba[:, i]
24 |             p = p[p > 0.]
25 |             if len(p) > 0:
26 |                 q.append(np.quantile(p, self.quantile))
27 |             else:
28 |                 q.append(1.)
29 |         selected = (proba >= np.array(q))
30 |         return selected
31 | 


--------------------------------------------------------------------------------
/hypernets/tests/utils/estimators_test.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.tree import DecisionTreeClassifier
 5 | 
 6 | from hypernets.utils import get_tree_importances
 7 | 
 8 | 
 9 | def test_get_tree_importances():
10 |     X, y = load_iris(return_X_y=True)
11 |     rfc = DecisionTreeClassifier().fit(X, y)
12 |     print(rfc)
13 |     imps_dict = get_tree_importances(rfc)
14 |     assert len(imps_dict.keys()) == 4
15 |     for c in ['col_1', 'col_2', 'col_3', 'col_0']:
16 |         assert c in imps_dict.keys()
17 | 
18 |     values_type = list(set(map(lambda v: type(v), imps_dict.values())))
19 | 
20 |     assert len(values_type) == 1
21 |     assert values_type[0] == int or values_type[0] == float  # not numpy type
22 |     assert json.dumps(imps_dict)  # has only python base type
23 | 


--------------------------------------------------------------------------------
/hypernets/tabular/feature_generators/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | # from ._primitives import CrossCategorical, GeoHashPrimitive, DaskCompatibleHaversine, TfidfPrimitive
 6 | # from ._transformers import FeatureGenerationTransformer, is_geohash_installed
 7 | 
 8 | try:
 9 |     from ._transformers import FeatureGenerationTransformer, is_geohash_installed
10 | 
11 |     is_feature_generator_ready = True
12 | except ImportError as e:
13 |     _msg = f'{e}, install featuretools and try again'
14 | 
15 |     is_geohash_installed = False
16 |     is_feature_generator_ready = False
17 | 
18 |     from sklearn.base import BaseEstimator as _BaseEstimator
19 | 
20 | 
21 |     class FeatureGenerationTransformer(_BaseEstimator):
22 |         def __init__(self, *args, **kwargs):
23 |             raise ImportError(_msg)
24 | 


--------------------------------------------------------------------------------
/hypernets/tests/hyperctl/local_batch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "local-batch-example",
 3 |     "job_command": "sleep 100;echo \"finished\"",
 4 |     "jobs": [
 5 |         {
 6 |             "name": "job1",
 7 |             "params": {
 8 |                 "learning_rate": 0.1
 9 |             },
10 | 
11 |             "assets":[
12 |                 "/tmp/file-a",
13 |                 "/tmp/dir-a"
14 |             ]
15 |         },{
16 |             "name": "job2",
17 |             "params": {
18 |                 "learning_rate": 0.2
19 |             }
20 |         }
21 |     ],
22 |     "backend": {
23 |         "type": "local",
24 |         "conf": {}
25 |     },
26 |     "scheduler": {
27 |         "interval": 5000,
28 |         "exit_on_finish": true
29 |     },
30 |     "server": {
31 |         "host": "localhost",
32 |         "port": 8060
33 |     }
34 | }


--------------------------------------------------------------------------------
/hypernets/dispatchers/process/grpc/proto/proc.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | 
 4 | package hypernets.dispatchers.process.grpc.proto;
 5 | 
 6 | 
 7 | service ProcessBroker {
 8 |   rpc run(stream ProcessRequest) returns (stream DataChunk) {}
 9 |   rpc download(DownloadRequest) returns (stream DataChunk) {}
10 | }
11 | 
12 | message ProcessRequest {
13 |   string program = 1;
14 |   repeated string args = 2;
15 |   string cwd = 3;
16 |   int32  buffer_size = 4;
17 |   string encoding = 5;
18 | }
19 | 
20 | message DownloadRequest {
21 |   string peer = 1;
22 |   string path = 2;
23 |   int32  buffer_size = 3;
24 |   string encoding = 4;
25 | }
26 | 
27 | message DataChunk {
28 |   enum DataKind {
29 |     IN = 0;
30 |     OUT = 1;
31 |     ERR = 2;
32 |     DATA = 10;
33 |     END = 99;
34 |     EXCEPTION = 400;
35 |   }
36 |   DataKind kind = 1;
37 |   bytes data = 2;
38 | }
39 | 


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/_drift_detection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from ..drift_detection import FeatureSelectorWithDriftDetection, DriftDetector
 6 | 
 7 | 
 8 | class CumlFeatureSelectorWithDriftDetection(FeatureSelectorWithDriftDetection):
 9 |     # parallelizable = False
10 |     def _score_features(self, X_merged, y, scorer, cv):
11 |         from . import CumlToolBox
12 |         X_merged, y = CumlToolBox.to_local(X_merged, y)
13 |         return super()._score_features(X_merged, y, scorer, cv)
14 | 
15 |     @staticmethod
16 |     def get_detector(preprocessor=None, estimator=None, random_state=None):
17 |         return CumlDriftDetector(preprocessor=preprocessor, estimator=estimator, random_state=random_state)
18 | 
19 | 
20 | class CumlDriftDetector(DriftDetector):
21 |     @staticmethod
22 |     def _copy_data(X):
23 |         return X.copy()
24 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.model.rst:
--------------------------------------------------------------------------------
 1 | hypernets.model package
 2 | =======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.model.estimator module
 8 | --------------------------------
 9 | 
10 | .. automodule:: hypernets.model.estimator
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.model.hyper\_model module
16 | -----------------------------------
17 | 
18 | .. automodule:: hypernets.model.hyper_model
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.model.objectives module
24 | ---------------------------------
25 | 
26 | .. automodule:: hypernets.model.objectives
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: hypernets.model
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/hypernets/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | import sys as sys_
 7 | try:
 8 |     from packaging.version import Version
 9 | except ModuleNotFoundError:
10 |     from distutils.version import LooseVersion as Version
11 | 
12 | is_os_windows = sys_.platform.find('win') == 0
13 | is_os_darwin = sys_.platform.find('darwin') == 0
14 | is_os_linux = sys_.platform.find('linux') == 0
15 | 
16 | from ._doc_lens import DocLens
17 | from ._fsutils import filesystem as fs
18 | from ._tic_tok import tic_toc, report as tic_toc_report, report_as_dataframe as tic_toc_report_as_dataframe
19 | from .common import generate_id, combinations, isnotebook, Counter, to_repr, get_params, context, profile
20 | from .common import load_module
21 | from ._estimators import load_estimator, save_estimator, get_tree_importances
22 | from ._perf import get_perf, dump_perf, load_perf
23 | 


--------------------------------------------------------------------------------
/docs/source/release_note_030.rst:
--------------------------------------------------------------------------------
 1 | Version 0.3.0
 2 | -------------
 3 | 
 4 | We add a few new features to this version:
 5 | 
 6 | * Multi-objectives optimization
 7 | 
 8 |     * optimization algorithm
 9 |         - add MOEA/D(Multiobjective Evolutionary Algorithm Based on Decomposition)
10 |         - add Tchebycheff, Weighted Sum, Penalty-based boundary intersection approach(PBI) decompose approachs
11 |         - add shuffle crossover, uniform crossover, single point crossover strategies for GA based algorithms
12 |         - automatically normalize objectives of different dimensions
13 |         - automatically convert maximization problem to minimization problem
14 |         - add NSGA-II(Non-dominated Sorting Genetic Algorithm)
15 |         - add R-NSGA-II(A new dominance relation for multicriteria decision making)
16 | 
17 |     * builtin objectives
18 |         - number of features
19 |         - prediction performance
20 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/cluster/grpc/proto/spec.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package hypernets.dispatchers.cluster.grpc.proto;
 4 | 
 5 | 
 6 | service SearchDriver {
 7 |   rpc ping(PingMessage) returns (PingMessage){}
 8 |   rpc search(stream SearchRequest) returns (stream SearchResponse){}
 9 | }
10 | 
11 | message PingMessage{
12 |   string  message = 1;
13 | }
14 | 
15 | message SearchRequest {
16 |   string search_id = 1;
17 |   string trial_no = 2;
18 |   string space_id = 3;
19 |   bool success = 4;
20 |   float reward = 5;
21 |   string message = 6;
22 | }
23 | 
24 | 
25 | message SearchResponse {
26 |   enum SearchResponseCode{
27 |     OK = 0;
28 |     WAITING = 11;
29 |     FINISHED = 12;
30 |     FAILED = 99;
31 |   }
32 | 
33 |   SearchResponseCode code = 1;
34 |   string search_id = 2;
35 |   string trial_no = 3;
36 |   string space_id = 4;
37 |   string space_file = 5;
38 |   string model_file = 6;
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/hypernets/tabular/dask_ex/_model_selection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import dask.array as da
 6 | import dask.dataframe as dd
 7 | from sklearn import model_selection as sk_sel
 8 | 
 9 | 
10 | def _fake_X_y(X, y):
11 |     if isinstance(X, dd.DataFrame):
12 |         X = X.index.to_frame()
13 |         X.set_index(0)
14 |         X = X.compute()
15 | 
16 |     if isinstance(y, (dd.Series, dd.DataFrame, da.Array)):
17 |         y = y.compute()
18 | 
19 |     return X, y
20 | 
21 | 
22 | class FakeDaskKFold(sk_sel.KFold):
23 |     def split(self, X, y=None, groups=None):
24 |         X, y = _fake_X_y(X, y)
25 |         yield from super().split(X, y, groups=groups)
26 | 
27 | 
28 | class FakeDaskStratifiedKFold(sk_sel.StratifiedKFold):
29 |     def split(self, X, y, groups=None):
30 |         assert y is not None
31 | 
32 |         X, y = _fake_X_y(X, y)
33 |         yield from super().split(X, y, groups=groups)
34 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.cluster.grpc.proto.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.cluster.grpc.proto package
 2 | ================================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.dispatchers.cluster.grpc.proto.spec\_pb2 module
 8 | ---------------------------------------------------------
 9 | 
10 | .. automodule:: hypernets.dispatchers.cluster.grpc.proto.spec_pb2
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.dispatchers.cluster.grpc.proto.spec\_pb2\_grpc module
16 | ---------------------------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.cluster.grpc.proto.spec_pb2_grpc
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: hypernets.dispatchers.cluster.grpc.proto
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.process.grpc.proto.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.process.grpc.proto package
 2 | ================================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.dispatchers.process.grpc.proto.proc\_pb2 module
 8 | ---------------------------------------------------------
 9 | 
10 | .. automodule:: hypernets.dispatchers.process.grpc.proto.proc_pb2
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.dispatchers.process.grpc.proto.proc\_pb2\_grpc module
16 | ---------------------------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.process.grpc.proto.proc_pb2_grpc
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: hypernets.dispatchers.process.grpc.proto
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/hypernets/searchers/random_searcher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from ..core.searcher import Searcher, OptimizeDirection
 6 | 
 7 | 
 8 | class RandomSearcher(Searcher):
 9 |     def __init__(self, space_fn, optimize_direction=OptimizeDirection.Minimize, space_sample_validation_fn=None):
10 |         Searcher.__init__(self, space_fn, optimize_direction, space_sample_validation_fn=space_sample_validation_fn)
11 | 
12 |     @property
13 |     def parallelizable(self):
14 |         return True
15 | 
16 |     def sample(self, space_options=None):
17 |         sample = self._sample_and_check(self._random_sample)
18 |         return sample
19 | 
20 |     def get_best(self):
21 |         raise NotImplementedError
22 | 
23 |     def update_result(self, space, result):
24 |         pass
25 | 
26 |     def reset(self):
27 |         raise NotImplementedError
28 | 
29 |     def export(self):
30 |         raise NotImplementedError
31 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.predict.grpc.proto.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.predict.grpc.proto package
 2 | ================================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.dispatchers.predict.grpc.proto.predict\_pb2 module
 8 | ------------------------------------------------------------
 9 | 
10 | .. automodule:: hypernets.dispatchers.predict.grpc.proto.predict_pb2
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.dispatchers.predict.grpc.proto.predict\_pb2\_grpc module
16 | ------------------------------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.predict.grpc.proto.predict_pb2_grpc
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: hypernets.dispatchers.predict.grpc.proto
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/hypernets/tabular/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from ._base import get_tool_box, register_toolbox, register_transformer, tb_transformer
 6 | from .toolbox import ToolBox
 7 | 
 8 | register_toolbox(ToolBox, aliases=('default', 'pandas'))
 9 | 
10 | try:
11 |     import dask.dataframe as dd
12 | 
13 |     import dask_ml
14 |     from .dask_ex import DaskToolBox
15 | 
16 |     register_toolbox(DaskToolBox, pos=0, aliases=('dask',))
17 |     is_dask_installed = True
18 | except ImportError:
19 |     # import traceback
20 |     # traceback.print_exc()
21 |     is_dask_installed = False
22 | 
23 | try:
24 |     import cupy
25 |     import cudf
26 |     import cuml
27 |     from .cuml_ex import CumlToolBox
28 | 
29 |     register_toolbox(CumlToolBox, pos=0, aliases=('cuml', 'rapids'))
30 |     is_cuml_installed = True
31 | except ImportError:
32 |     # import traceback
33 |     #
34 |     # traceback.print_exc()
35 |     is_cuml_installed = False
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/00-bug-issue.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Issue
 3 | about: Use this template for reporting a bug
 4 | labels: 'type:bug'
 5 | 
 6 | ---
 7 | 
 8 | <em>Please make sure that this is a bug. </em>
 9 | 
10 | **System information**
11 | - OS Platform and Distribution (e.g., CentOS 7.6):
12 | - Python version:
13 | - Hypernets version:
14 | - Other Python packages(run `pip list`):
15 | 
16 | 
17 | **Describe the current behavior**
18 | 
19 | 
20 | **Describe the expected behavior**
21 | 
22 | 
23 | **Standalone code to reproduce the issue**
24 | Provide a reproducible test case that is the bare minimum necessary to generate
25 | the problem. If possible, please share a link to Jupyter notebook.
26 | 
27 | 
28 | **Are you willing to submit PR?(Yes/No)**
29 | 
30 | 
31 | **Other info / logs** 
32 | Include any logs or source code that would be helpful to diagnose the problem. 
33 | If including tracebacks, please include the full traceback. Large logs and files 
34 | should be attached.
35 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to Hypernets
 2 | =====================
 3 | 
 4 | Hypernets: A General Automated Machine Learning Framework
 5 | #########################################################
 6 | 
 7 | Hypernets is a general AutoML framework that can meet various needs such as feature engineering, hyperparameter optimization, and neural architecture search, thereby helping users achieve the end-to-end automated machine learning pipelines.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Home:
12 | 
13 |    Overview<overview.md>
14 |    Quick Start<quick_start.md>
15 |    Search Space<search_space.md>
16 |    Searchers<searchers.md>
17 |    HyperModels<hypermodels.md>
18 |    Neural Architecture Search<nas.md>
19 |    Experiment<experiment.md>
20 |    Hyperctl<hyperctl.rst>
21 |    API<hypernets.rst>
22 |    Release Notes<release_notes.rst>
23 |    FAQ<faq.md>
24 | 
25 | Indices and tables
26 | ==================
27 | 
28 | * :ref:`genindex`
29 | * :ref:`modindex`
30 | * :ref:`search`
31 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.tabular.ensemble.rst:
--------------------------------------------------------------------------------
 1 | hypernets.tabular.ensemble package
 2 | ==================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.tabular.ensemble.base\_ensemble module
 8 | ------------------------------------------------
 9 | 
10 | .. automodule:: hypernets.tabular.ensemble.base_ensemble
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.tabular.ensemble.stacking module
16 | ------------------------------------------
17 | 
18 | .. automodule:: hypernets.tabular.ensemble.stacking
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.tabular.ensemble.voting module
24 | ----------------------------------------
25 | 
26 | .. automodule:: hypernets.tabular.ensemble.voting
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: hypernets.tabular.ensemble
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.predict.grpc.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.predict.grpc package
 2 | ==========================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    hypernets.dispatchers.predict.grpc.proto
11 | 
12 | Submodules
13 | ----------
14 | 
15 | hypernets.dispatchers.predict.grpc.predict\_client module
16 | ---------------------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.predict.grpc.predict_client
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.dispatchers.predict.grpc.predict\_service module
24 | ----------------------------------------------------------
25 | 
26 | .. automodule:: hypernets.dispatchers.predict.grpc.predict_service
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: hypernets.dispatchers.predict.grpc
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.cluster.grpc.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.cluster.grpc package
 2 | ==========================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    hypernets.dispatchers.cluster.grpc.proto
11 | 
12 | Submodules
13 | ----------
14 | 
15 | hypernets.dispatchers.cluster.grpc.search\_driver\_client module
16 | ----------------------------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.cluster.grpc.search_driver_client
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.dispatchers.cluster.grpc.search\_driver\_service module
24 | -----------------------------------------------------------------
25 | 
26 | .. automodule:: hypernets.dispatchers.cluster.grpc.search_driver_service
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: hypernets.dispatchers.cluster.grpc
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/hypernets/tabular/dask_ex/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from ._toolbox import DaskToolBox
 6 | 
 7 | try:
 8 |     import dask_ml.preprocessing as dm_pre
 9 |     import dask_ml.model_selection as dm_sel
10 | 
11 |     from dask_ml.impute import SimpleImputer
12 |     from dask_ml.compose import ColumnTransformer
13 |     from dask_ml.preprocessing import \
14 |         LabelEncoder, OneHotEncoder, OrdinalEncoder, \
15 |         StandardScaler, MinMaxScaler, RobustScaler
16 | 
17 |     from ._transformers import \
18 |         SafeOneHotEncoder, TruncatedSVD, \
19 |         MaxAbsScaler, SafeOrdinalEncoder, DataInterceptEncoder, \
20 |         CallableAdapterEncoder, DataCacher, CacheCleaner, \
21 |         LgbmLeavesEncoder, CategorizeEncoder, MultiKBinsDiscretizer, \
22 |         LocalizedTfidfVectorizer, \
23 |         MultiVarLenFeatureEncoder, DataFrameWrapper
24 | 
25 |     from ..sklearn_ex import PassThroughEstimator
26 | 
27 |     dask_ml_available = True
28 | except ImportError:
29 |     # Not found dask_ml
30 |     dask_ml_available = False
31 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.process.grpc.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.process.grpc package
 2 | ==========================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    hypernets.dispatchers.process.grpc.proto
11 | 
12 | Submodules
13 | ----------
14 | 
15 | hypernets.dispatchers.process.grpc.process\_broker\_client module
16 | -----------------------------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.process.grpc.process_broker_client
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.dispatchers.process.grpc.process\_broker\_service module
24 | ------------------------------------------------------------------
25 | 
26 | .. automodule:: hypernets.dispatchers.process.grpc.process_broker_service
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: hypernets.dispatchers.process.grpc
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/hypernets/tests/experiment/run_export_experiment_report.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from sklearn.model_selection import train_test_split
 4 | 
 5 | from hypernets.examples.plain_model import PlainModel, PlainSearchSpace
 6 | from hypernets.experiment import make_experiment
 7 | from hypernets.tabular.datasets import dsutils
 8 | 
 9 | 
10 | def main():
11 |     df = dsutils.load_boston()
12 | 
13 |     df_train, df_eval = train_test_split(df, test_size=0.2)
14 |     search_space = PlainSearchSpace(enable_lr=False, enable_nn=False, enable_dt=False, enable_dtr=True)
15 | 
16 |     experiment = make_experiment(PlainModel, df_train,
17 |                                  target='target',
18 |                                  search_space=search_space,
19 |                                  log_level='info',
20 |                                  random_state=8086,
21 |                                  report_render='excel')
22 |     estimator = experiment.run(max_trials=10)
23 |     print(estimator)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     t = time.time()
28 |     main()
29 |     print(time.time() - t)
30 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/run_broker.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import argparse
 3 | 
 4 | from hypernets.dispatchers.process.grpc.process_broker_service import serve
 5 | 
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser('run HyperNets process broker.')
 9 |     parser.add_argument('--host', '-host',
10 |                         default='0.0.0.0',
11 |                         help='broker hostname or ip address'
12 |                              + ', default "0.0.0.0"')
13 |     parser.add_argument('--port', '-port',
14 |                         type=int, default=8010,
15 |                         help='broker tcp port, default 8010')
16 |     parser.add_argument('--workers', '-workers',
17 |                         type=int, default=10,
18 |                         help='max worker count, default 10')
19 | 
20 |     args = parser.parse_args()
21 | 
22 |     server, _ = serve(f'{args.host}:{args.port}', args.workers)
23 |     server.wait_for_termination()
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     try:
28 |         main()
29 |         print('done')
30 |     except KeyboardInterrupt as e:
31 |         print(e)
32 | 


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/_data_hasher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import cupy
 6 | import cudf
 7 | 
 8 | from ..data_hasher import DataHasher
 9 | 
10 | 
11 | class CumlDataHasher(DataHasher):
12 | 
13 |     def _iter_data(self, data):
14 |         if isinstance(data, cudf.DataFrame):
15 |             yield from self._iter_cudf_dataframe(data)
16 |         elif isinstance(data, cudf.Series):
17 |             yield from self._iter_cudf_dataframe(data.to_frame())
18 |         elif isinstance(data, cupy.ndarray):
19 |             yield from self._iter_cudf_dataframe(cudf.DataFrame(data), yield_columns=False)
20 |         else:
21 |             yield from super()._iter_data(data)
22 | 
23 |     @staticmethod
24 |     def _iter_cudf_dataframe(df, yield_columns=True):
25 |         if yield_columns:
26 |             yield ','.join(map(str, df.columns.tolist())).encode('utf-8')
27 | 
28 |         if hasattr(df, 'hash_columns'):
29 |             hashed = df.hash_columns()
30 |         else:
31 |             hashed = df.hash_values().values
32 |         # hashed = cudf.DataFrame(hashed).T.hash_columns()
33 |         yield cupy.asnumpy(hashed)
34 | 


--------------------------------------------------------------------------------
/.github/workflows/dist-builder.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub.
 2 | # They are provided by a third-party and are governed by
 3 | # separate terms of service, privacy policy, and support
 4 | # documentation.
 5 | 
 6 | name: Build Python distribution
 7 | 
 8 | on: workflow_dispatch
 9 | 
10 | permissions:
11 |   contents: read
12 | 
13 | jobs:
14 |   build_dist:
15 |     runs-on:  ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.8", ]
20 | 
21 |     steps:
22 |       - uses: actions/checkout@v3
23 |       - name: Set up Python
24 |         uses: actions/setup-python@v3
25 |         with:
26 |           python-version: ${{ matrix.python-version }}
27 | 
28 |       - name: Install dependencies
29 |         run: |
30 |           python -m pip install --upgrade pip
31 |           pip install "setuptools>57.0" wheel
32 |           pip list
33 | 
34 |       - name: Build package
35 |         run: |
36 |           python setup.py sdist bdist_wheel
37 | 
38 |       - uses: actions/upload-artifact@v3
39 |         with:
40 |           name: packages
41 |           path: dist/*
42 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/run_predict.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import argparse
 3 | 
 4 | from hypernets.dispatchers.predict.predict_helper import PredictHelper
 5 | 
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser('run predict.')
 9 |     parser.add_argument('--server', '-server',
10 |                         default='127.0.0.1:8030',
11 |                         help='predict server address, separated by comma')
12 |     parser.add_argument('--chunk-size', '-chunk-size',
13 |                         type=int, default=1000,
14 |                         help='chunk line number')
15 |     parser.add_argument('data_file',
16 |                         help='data file path')
17 |     parser.add_argument('result_file',
18 |                         help='result file path')
19 |     args = parser.parse_args()
20 | 
21 |     servers = list(filter(lambda s: len(s) > 0, args.server.split(',')))
22 |     ph = PredictHelper(servers)
23 |     ph.predict(args.data_file, args.result_file, args.chunk_size)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     try:
28 |         main()
29 |         print('done')
30 |     except KeyboardInterrupt as e:
31 |         print(e)
32 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.process.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.process package
 2 | =====================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    hypernets.dispatchers.process.grpc
11 | 
12 | Submodules
13 | ----------
14 | 
15 | hypernets.dispatchers.process.grpc\_process module
16 | --------------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.process.grpc_process
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.dispatchers.process.local\_process module
24 | ---------------------------------------------------
25 | 
26 | .. automodule:: hypernets.dispatchers.process.local_process
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | hypernets.dispatchers.process.ssh\_process module
32 | -------------------------------------------------
33 | 
34 | .. automodule:: hypernets.dispatchers.process.ssh_process
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | Module contents
40 | ---------------
41 | 
42 | .. automodule:: hypernets.dispatchers.process
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 


--------------------------------------------------------------------------------
/hypernets/tabular/evaluator/h2o.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | from . import BaseEstimator
 8 | import h2o
 9 | from h2o.automl import H2OAutoML
10 | 
11 | 
12 | class H2OEstimator(BaseEstimator):
13 |     def __init__(self, task, **kwargs):
14 |         super(H2OEstimator, self).__init__(task)
15 |         self.name = 'H2O AutoML'
16 |         self.kwargs = kwargs
17 |         self.estimator = None
18 | 
19 |     def train(self, X, y, X_test):
20 |         h2o.init()
21 |         target = '__tabular_toolbox_target__'
22 |         X.insert(0, target, y)
23 |         train = h2o.H2OFrame(X)
24 |         x_cols = train.columns
25 |         x_cols.remove(target)
26 |         train[target] = train[target].asfactor()
27 |         self.esitmator = H2OAutoML(max_models=20, seed=1)
28 |         self.esitmator.train(x=x_cols, y=target, training_frame=train)
29 | 
30 |     def predict_proba(self, X):
31 |         x = h2o.H2OFrame(X)
32 |         preds = self.esitmator.predict(x)
33 |         preds = preds[1:].as_data_frame().values
34 |         return preds
35 | 
36 |     def predict(self, X):
37 |         proba = self.predict_proba(X)
38 |         return self.proba2predict(proba)
39 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.cluster.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers.cluster package
 2 | =====================================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    hypernets.dispatchers.cluster.grpc
11 | 
12 | Submodules
13 | ----------
14 | 
15 | hypernets.dispatchers.cluster.cluster module
16 | --------------------------------------------
17 | 
18 | .. automodule:: hypernets.dispatchers.cluster.cluster
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.dispatchers.cluster.driver\_dispatcher module
24 | -------------------------------------------------------
25 | 
26 | .. automodule:: hypernets.dispatchers.cluster.driver_dispatcher
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | hypernets.dispatchers.cluster.executor\_dispatcher module
32 | ---------------------------------------------------------
33 | 
34 | .. automodule:: hypernets.dispatchers.cluster.executor_dispatcher
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | Module contents
40 | ---------------
41 | 
42 | .. automodule:: hypernets.dispatchers.cluster
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 


--------------------------------------------------------------------------------
/hypernets/tests/utils/tic_toc_test.py:
--------------------------------------------------------------------------------
 1 | from hypernets.utils import tic_toc, tic_toc_report_as_dataframe
 2 | from hypernets.tabular.datasets import dsutils
 3 | 
 4 | 
 5 | @tic_toc(details=True)
 6 | def fn_foo(a1, a2, k1=None, k2='foo'):
 7 |     pass
 8 | 
 9 | 
10 | class ClsBar:
11 |     @tic_toc(details=False)
12 |     def no_args(self):
13 |         pass
14 | 
15 |     @tic_toc(details=True)
16 |     def method_bar(self, a1, a2, k1=None, k2='foo'):
17 |         pass
18 | 
19 | 
20 | def foo():
21 |     fn_foo(1, 2, k1='lalala')
22 |     fn_foo('dict', {'a': 'aaa', 'b': 345})
23 |     fn_foo('list', list(range(5)))
24 |     fn_foo('big-list', list(range(100)))
25 |     fn_foo('big-range', range(100))
26 |     fn_foo('df', dsutils.load_blood())
27 |     fn_foo('ndarray', dsutils.load_blood().values)
28 |     fn_foo('fn', foo)
29 |     fn_foo('lambda', lambda: print('lambda'))
30 |     fn_foo(['aaa', 3, 4, ['aaa', 'bbb']], 2, k2='lalala')
31 | 
32 | 
33 | def cls_foo():
34 |     x = ClsBar()
35 |     x.method_bar(1, 2, k1='foo')
36 |     x.method_bar('dict', {'a': 'aaa', 'b': 345})
37 |     x.no_args()
38 | 
39 | 
40 | def test_tic_toc():
41 |     foo()
42 |     cls_foo()
43 | 
44 |     df = tic_toc_report_as_dataframe()
45 |     print(df)
46 | 


--------------------------------------------------------------------------------
/docs/source/hypermodels.rst:
--------------------------------------------------------------------------------
 1 | HyperModel
 2 | =============
 3 | 
 4 | HyperModel is an abstract class that needs to implement a dedicated HyperModel for different frameworks or domains. HyperModel explore hyper-parameters sample from Searcher, fit and evaluate Estimator, then reward the metric score to Searcher for optimization. The figure below shows HyperModel search sequence.
 5 | 
 6 | .. image:: images/hyper_model_search_sequence.png
 7 |    :width: 600
 8 |    :align: center
 9 |    :alt: search sequence
10 | 
11 | 
12 | Customize HyperModel
13 | -------------------------
14 | 
15 | To customize HyerModel, two components are required:
16 | 
17 | * HyperModel: subclass of *hypernets.model.HyperModel*, create newer Estimator instance with searched space sample, and load trained estimator from storage.
18 | 
19 | * Estimator: subclass of  *hypernets.model.Estimator*, the core component for model fitting/evaluation/prediction/persistence.
20 | 
21 | You can reference  *hypernets.examples.plain_model.PlainModel* and *hypernets.examples.plain_model.PlainEstimator* as start point. See `DeepTables <https://github.com/DataCanvasIO/DeepTables>`_, `HyperGBM <https://github.com/DataCanvasIO/HyperGBM>`_, `HyperKeras <https://github.com/DataCanvasIO/HyperKeras>`_ for more details.
22 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.experiment.rst:
--------------------------------------------------------------------------------
 1 | hypernets.experiment package
 2 | ============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.experiment.cfg module
 8 | -------------------------------
 9 | 
10 | .. automodule:: hypernets.experiment.cfg
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.experiment.compete module
16 | -----------------------------------
17 | 
18 | .. automodule:: hypernets.experiment.compete
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.experiment.general module
24 | -----------------------------------
25 | 
26 | .. automodule:: hypernets.experiment.general
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | hypernets.experiment.job module
32 | -------------------------------
33 | 
34 | .. automodule:: hypernets.experiment.job
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | hypernets.experiment.report module
40 | ----------------------------------
41 | 
42 | .. automodule:: hypernets.experiment.report
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | Module contents
48 | ---------------
49 | 
50 | .. automodule:: hypernets.experiment
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 


--------------------------------------------------------------------------------
/hypernets/tests/core/mutable_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | from hypernets.core.ops import Identity
 7 | from hypernets.core.search_space import *
 8 | 
 9 | 
10 | class Test_Mutable:
11 |     def test_scope(self):
12 |         with HyperSpace().as_default():
13 |             id1 = Identity()
14 |             id2 = Identity(name='named_id')
15 |             id3 = Identity()
16 |             id4 = Identity(name='named_id_2')
17 | 
18 |             assert id1.name == 'Module_Identity_1'
19 |             assert id1.id == 'Module_Identity_1'
20 | 
21 |             assert id2.name == 'named_id'
22 |             assert id2.id == 'ID_named_id'
23 | 
24 |             assert id3.name == 'Module_Identity_2'
25 |             assert id3.id == 'Module_Identity_2'
26 | 
27 |             assert id4.name == 'named_id_2'
28 |             assert id4.id == 'ID_named_id_2'
29 | 
30 |             hp1 = Int(0, 100)
31 |             hp2 = Real(0, 10.0)
32 |             hp3 = Choice([1, 2, 3, 4])
33 | 
34 |             assert hp1.name == 'Param_Int_1'
35 |             assert hp1.id == 'Param_Int_1'
36 | 
37 |             assert hp2.name == 'Param_Real_1'
38 |             assert hp2.id == 'Param_Real_1'
39 | 
40 |             assert hp3.name == 'Param_Choice_1'
41 |             assert hp3.id == 'Param_Choice_1'
42 | 


--------------------------------------------------------------------------------
/hypernets/tests/trial/trial_store_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from hypernets.core.trial import *
 6 | from hypernets.core.search_space import *
 7 | from hypernets.core.ops import *
 8 | from hypernets.tests import test_output_dir
 9 | 
10 | 
11 | class Test_TrialStore():
12 |     def get_space(self):
13 |         space = HyperSpace()
14 |         with space.as_default():
15 |             id1 = Identity(p1=Choice([1, 2]), p2=Int(1, 100))
16 |             id2 = Identity(p3=Real(0, 1, step=0.2))(id1)
17 |             id3 = Identity(p4=Dynamic(lambda p5: p5 * 3, p5=Choice([2, 4, 8])))(id2)
18 |         return space
19 | 
20 |     def test_basic(self):
21 |         store = DiskTrialStore(f'{test_output_dir}/trial_store')
22 |         dataset_id = 'test_dataset'
23 |         sample = self.get_space()
24 |         sample.random_sample()
25 | 
26 |         trial = Trial(sample, 1, 0.99, 100)
27 |         store.put(dataset_id, trial)
28 |         store.reset()
29 | 
30 |         trial_get = store.get(dataset_id, sample)
31 |         assert trial.trial_no == 1
32 |         assert trial.reward == 0.99
33 |         assert trial.elapsed == 100
34 |         assert trial.space_sample.vectors == trial_get.space_sample.vectors
35 | 
36 |         trials = store.get_all(dataset_id, sample.signature)
37 |         assert trials
38 | 


--------------------------------------------------------------------------------
/hypernets/tests/hyperctl/remote_batch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "remote-batch-example",
 3 |     "job_command": "sleep 3;echo \"finished\"",
 4 |     "jobs": [
 5 |         {
 6 |             "name": "job1",
 7 |             "params": {
 8 |                 "learning_rate": 0.1
 9 |             }
10 |         },{
11 |             "name": "job2",
12 |             "params": {
13 |                 "learning_rate": 0.2
14 |             }
15 |         }
16 |     ],
17 |     "backend": {
18 |         "type": "remote",
19 |         "machines": [
20 |             {
21 |                 "connection": {
22 |                     "hostname": "host1",
23 |                     "username": "hyperctl",
24 |                     "password": "hyperctl"
25 |                 },
26 |                 "environments": {
27 |                     "JAVA_HOME": "/usr/local/jdk"
28 |                 }
29 |             },
30 |             {
31 |                  "connection":{
32 |                     "hostname": "host2",
33 |                     "username": "hyperctl",
34 |                     "password": "hyperctl"
35 |                 }
36 |             }
37 |         ]
38 |     },
39 |     "scheduler": {
40 |         "interval": 5000,
41 |         "exit_on_finish": false
42 |     },
43 |     "server": {
44 |         "host": "localhost",
45 |         "port": 8061
46 |     }
47 | }


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/_ensemble.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import cudf
 6 | import cupy
 7 | 
 8 | from hypernets.tabular.ensemble import GreedyEnsemble
 9 | from ._transformer import Localizable, as_local_if_possible, copy_attrs_as_local
10 | 
11 | 
12 | class CumlGreedyEnsemble(GreedyEnsemble, Localizable):
13 |     np = cupy
14 | 
15 |     @staticmethod
16 |     def _to_local(y):
17 |         if isinstance(y, cupy.ndarray):
18 |             y = cupy.asnumpy(y)
19 |         elif isinstance(y, cudf.Series):
20 |             y = y.to_pandas()
21 | 
22 |         return y
23 | 
24 |     def _score(self, y_true, y_preds):
25 |         y_true = self._to_local(y_true)
26 |         y_preds = list(map(self._to_local, y_preds))
27 | 
28 |         r = super()._score(y_true, y_preds)
29 |         return r
30 | 
31 |     def as_local(self):
32 |         estimators = list(map(as_local_if_possible, self.estimators))
33 |         target = GreedyEnsemble(estimators=estimators, task=self.task, need_fit=self.need_fit,
34 |                                 n_folds=self.n_folds, method=self.method, random_state=self.random_state,
35 |                                 scoring=self.scoring, ensemble_size=self.ensemble_size)
36 |         copy_attrs_as_local(self, target, 'weights_', 'scores_', 'hits_', 'best_stack_')
37 |         return target
38 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/predict/grpc/predict_client.py:
--------------------------------------------------------------------------------
 1 | import grpc
 2 | 
 3 | from hypernets.dispatchers.predict.grpc.proto import predict_pb2_grpc
 4 | from hypernets.dispatchers.predict.grpc.proto.predict_pb2 import PredictRequest
 5 | from hypernets.utils import logging
 6 | 
 7 | logger = logging.get_logger(__name__)
 8 | 
 9 | 
10 | class PredictClient(object):
11 | 
12 |     def __init__(self, server):
13 |         super(PredictClient, self).__init__()
14 |         self.channel = grpc.insecure_channel(server)
15 |         self.stub = predict_pb2_grpc.PredictServiceStub(self.channel)
16 | 
17 |         self.server = server
18 |         self._closed = False
19 | 
20 |     def __del__(self):
21 |         self.close()
22 | 
23 |     def close(self):
24 |         if not self._closed:
25 |             self.channel.close()
26 | 
27 |     def predict(self, data_file, result_file):
28 |         try:
29 |             request = PredictRequest(data_file=data_file, result_file=result_file)
30 |             response = self.stub.predict(request)
31 |             code = response.code
32 |             return code
33 |         except Exception as e:
34 |             import traceback
35 |             msg = f'[Predict {self.server}] {e.__class__.__name__}:\n'
36 |             logger.error(msg + traceback.format_exc())
37 | 
38 |             return 98 if isinstance(e, grpc.RpcError) else 99
39 | 


--------------------------------------------------------------------------------
/hypernets/tests/discriminators/base_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | from hypernets.discriminators import get_previous_trials_scores, get_percentile_score
 7 | 
 8 | from . import history, group_id, group_id2
 9 | 
10 | 
11 | def test_base():
12 |     ts = get_previous_trials_scores(history, 0, 9, group_id)
13 |     assert ts.shape == (5, 10)
14 |     ts = get_previous_trials_scores(history, 0, 8, group_id)
15 |     assert ts.shape == (6, 9)
16 |     ts2 = get_previous_trials_scores(history, 0, 9, group_id2)
17 |     assert ts2.shape == (1, 10)
18 | 
19 |     def get_0_100_50_percentile_score(n_step, sign=-1):
20 |         s1 = get_percentile_score(history, n_step, group_id, 0, sign)
21 |         s2 = get_percentile_score(history, n_step, group_id, 100, sign)
22 |         s3 = get_percentile_score(history, n_step, group_id, 50, sign)
23 |         return s1, s2, s3
24 | 
25 |     p1 = get_0_100_50_percentile_score(0)
26 |     assert p1 == (0.9, 0.9, 0.9)
27 | 
28 |     p2 = get_0_100_50_percentile_score(1)
29 |     assert p2 == (0.85, 0.8, 0.85)
30 | 
31 |     p3 = get_0_100_50_percentile_score(5)
32 |     assert p3 == (0.45, 0.4, 0.425)
33 | 
34 |     p4 = get_0_100_50_percentile_score(9)
35 |     assert p4 == (0.25, 0.21, 0.23)
36 | 
37 |     p5 = get_0_100_50_percentile_score(9, 1)
38 |     assert p5 == (0.21, 0.25, 0.23)
39 | 


--------------------------------------------------------------------------------
/hypernets/searchers/playback_searcher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from ..core import TrialHistory
 6 | from ..core.callbacks import EarlyStoppingError
 7 | from ..core.searcher import Searcher, OptimizeDirection
 8 | 
 9 | 
10 | class PlaybackSearcher(Searcher):
11 |     def __init__(self, history: TrialHistory, top_n=None, reverse=False,
12 |                  optimize_direction=OptimizeDirection.Minimize):
13 |         assert history is not None
14 |         assert len(history.trials) > 0
15 | 
16 |         self.history = history
17 |         self.top_n = top_n if top_n is not None else len(history.trials)
18 |         self.samples = [t.space_sample for t in self.history.get_top(self.top_n)]
19 |         self.index = 0
20 |         self.reverse = reverse
21 | 
22 |         if reverse:
23 |             self.samples.reverse()
24 | 
25 |         super(PlaybackSearcher, self).__init__(None, use_meta_learner=False, optimize_direction=optimize_direction)
26 | 
27 |     @property
28 |     def parallelizable(self):
29 |         return True
30 | 
31 |     def sample(self, space_options=None):
32 |         if self.index >= len(self.samples):
33 |             raise EarlyStoppingError('no more samples.')
34 |         sample = self.samples[self.index]
35 |         self.index += 1
36 |         return sample
37 | 
38 |     def update_result(self, space, result):
39 |         pass
40 | 


--------------------------------------------------------------------------------
/hypernets/tabular/evaluator/tpot.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | from tpot import TPOTClassifier, TPOTRegressor
 8 | from . import BaseEstimator
 9 | from ..column_selector import column_object_category_bool
10 | from ..sklearn_ex import SafeOrdinalEncoder
11 | 
12 | 
13 | class TpotEstimator(BaseEstimator):
14 |     def __init__(self, task, **kwargs):
15 |         super(TpotEstimator, self).__init__(task)
16 |         if task == 'regression':
17 |             self.tpot = TPOTRegressor(**kwargs)
18 |         else:
19 |             self.tpot = TPOTClassifier(**kwargs)
20 |         self.name = 'tpot'
21 |         self.label_encoder = None
22 |         self.obj_cols = None
23 | 
24 |     def train(self, X, y, X_test):
25 |         self.obj_cols = column_object_category_bool(X)
26 |         self.label_encoder = SafeOrdinalEncoder()
27 |         X[self.obj_cols] = self.label_encoder.fit_transform(X[self.obj_cols])
28 |         self.tpot.fit(X, y)
29 | 
30 |     def predict_proba(self, X):
31 |         X[self.obj_cols] = self.label_encoder.transform(X[self.obj_cols])
32 |         proba = self.tpot.predict_proba(X)
33 |         print(f'proba.shape:{proba.shape}')
34 |         return proba
35 | 
36 |     def predict(self, X):
37 |         X[self.obj_cols] = self.label_encoder.transform(X[self.obj_cols])
38 |         return self.tpot.predict(X)
39 | 


--------------------------------------------------------------------------------
/hypernets/tabular/dask_ex/_data_hasher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import dask.array as da
 6 | import dask.dataframe as dd
 7 | 
 8 | from ..data_hasher import DataHasher
 9 | 
10 | 
11 | class DaskDataHasher(DataHasher):
12 | 
13 |     def _iter_data(self, data):
14 |         if isinstance(data, dd.DataFrame):
15 |             yield from self._iter_dask_dataframe(data)
16 |         elif isinstance(data, dd.Series):
17 |             yield from self._iter_dask_dataframe(data.to_frame())
18 |         elif isinstance(data, da.Array):
19 |             yield from self._iter_dask_array(data)
20 |         else:
21 |             yield from super()._iter_data(data)
22 | 
23 |     @staticmethod
24 |     def _iter_dask_dataframe(df):
25 |         yield ','.join(map(str, df.columns.tolist())).encode('utf-8')
26 | 
27 |         # x = df.map_partitions(DataHasher._hash_pd_dataframe, meta=(None, 'u8')).compute()
28 |         name = 'hashed'
29 |         x = df.map_partitions(lambda part: DataHasher._hash_pd_dataframe(part).to_frame(name),
30 |                               meta={name: 'u8'}).compute()
31 |         yield x.values
32 | 
33 |     @staticmethod
34 |     def _iter_dask_array(arr):
35 |         if len(arr.shape) == 1:
36 |             arr = arr.compute_chunk_sizes().reshape(-1, 1)
37 |         x = arr.map_blocks(DataHasher._hash_ndarray, dtype='u8').compute()
38 |         yield x
39 | 


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/_persistence.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | import cudf
 7 | import cupy
 8 | 
 9 | from ..persistence import ParquetPersistence
10 | 
11 | _my_cached_types = (cudf.DataFrame, cudf.Series, cupy.ndarray)
12 | 
13 | _META_CUML_KEY = b'cuml_type'
14 | 
15 | 
16 | class CumlParquetPersistence(ParquetPersistence):
17 |     acceptable_types = ParquetPersistence.acceptable_types + _my_cached_types
18 | 
19 |     def store(self, data, path, *, filesystem=None, **kwargs):
20 |         assert isinstance(data, self.acceptable_types)
21 | 
22 |         metadata = {}
23 |         if isinstance(data, _my_cached_types):
24 |             from . import CumlToolBox
25 |             data, = CumlToolBox.to_local(data)
26 |             metadata[_META_CUML_KEY] = type(data).__name__.encode()
27 | 
28 |         return super().store(data, path, filesystem=filesystem, metadata=metadata, **kwargs)
29 | 
30 |     def load(self, path, *, filesystem=None, return_metadata=False, **kwargs):
31 |         data, metadata = super().load(path, filesystem=filesystem, return_metadata=True, **kwargs)
32 | 
33 |         if metadata is not None and metadata.get(_META_CUML_KEY, None) is not None:
34 |             from . import CumlToolBox
35 |             data, = CumlToolBox.from_local(data)
36 | 
37 |         if return_metadata:
38 |             return data, metadata
39 |         else:
40 |             return data
41 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.utils.rst:
--------------------------------------------------------------------------------
 1 | hypernets.utils package
 2 | =======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.utils.common module
 8 | -----------------------------
 9 | 
10 | .. automodule:: hypernets.utils.common
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.utils.const module
16 | ----------------------------
17 | 
18 | .. automodule:: hypernets.utils.const
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.utils.df\_utils module
24 | --------------------------------
25 | 
26 | .. automodule:: hypernets.utils.df_utils
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | hypernets.utils.logging module
32 | ------------------------------
33 | 
34 | .. automodule:: hypernets.utils.logging
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | hypernets.utils.param\_tuning module
40 | ------------------------------------
41 | 
42 | .. automodule:: hypernets.utils.param_tuning
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | hypernets.utils.ssh\_utils module
48 | ---------------------------------
49 | 
50 | .. automodule:: hypernets.utils.ssh_utils
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 
55 | Module contents
56 | ---------------
57 | 
58 | .. automodule:: hypernets.utils
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 


--------------------------------------------------------------------------------
/hypernets/tabular/evaluator/auto_sklearn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | import autosklearn.classification
 7 | import autosklearn.regression
 8 | from . import BaseEstimator
 9 | from ..column_selector import column_object
10 | 
11 | 
12 | class AutoSklearnEstimator(BaseEstimator):
13 |     def __init__(self, task, **kwargs):
14 |         super(AutoSklearnEstimator, self).__init__(task)
15 |         if task == 'regression':
16 |             self.automl = autosklearn.regression.AutoSklearnRegressor(**kwargs)
17 |         else:
18 |             self.automl = autosklearn.classification.AutoSklearnClassifier(**kwargs)
19 |         self.name = 'auto-sklearn'
20 | 
21 |     def train(self, X, y, X_test):
22 |         target = '__tabular_toolbox_target__'
23 |         X.insert(0, target, y)
24 |         obj_cols = column_object(X)
25 |         if len(obj_cols) > 0:
26 |             X[obj_cols] = X[obj_cols].astype('category')
27 |         y = X.pop(target)
28 |         self.automl.fit(X, y)
29 | 
30 |     def predict_proba(self, X):
31 |         obj_cols = column_object(X)
32 |         if len(obj_cols) > 0:
33 |             X[obj_cols] = X[obj_cols].astype('category')
34 |         return self.automl.predict_proba(X)
35 | 
36 |     def predict(self, X):
37 |         obj_cols = column_object(X)
38 |         if len(obj_cols) > 0:
39 |             X[obj_cols] = X[obj_cols].astype('category')
40 |         return self.automl.predict(X)
41 | 


--------------------------------------------------------------------------------
/hypernets/tests/discriminators/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | from hypernets.discriminators import PercentileDiscriminator, get_previous_trials_scores, get_percentile_score
 7 | from hypernets.core import TrialHistory, Trial
 8 | 
 9 | history = TrialHistory(optimize_direction='min')
10 | group_id = 'lightgbm_cv_1'
11 | group_id2 = 'lightgbm_cv_2'
12 | t1 = Trial(None, 1, 0.9, 0, succeeded=True)
13 | t1.iteration_scores[group_id] = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.3, 0.3]
14 | t2 = Trial(None, 1, 0.8, 0, succeeded=True)
15 | t2.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.55, 0.45, 0.35, 0.35, 0.35, 0.25]
16 | t3 = Trial(None, 1, 0.8, 0, succeeded=True)
17 | t3.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.54, 0.44, 0.34, 0.34, 0.34, 0.24]
18 | t4 = Trial(None, 1, 0.8, 0, succeeded=True)
19 | t4.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.53, 0.43, 0.33, 0.33, 0.33, 0.23]
20 | t5 = Trial(None, 1, 0.8, 0, succeeded=True)
21 | t5.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.52, 0.42, 0.32, 0.32, 0.32, 0.22]
22 | t6 = Trial(None, 1, 0.8, 0, succeeded=True)
23 | t6.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.51, 0.41, 0.31, 0.31, 0.31, 0.21]
24 | t6.iteration_scores[group_id2] = [0.9, 0.85, 0.75, 0.65, 0.51, 0.41, 0.31, 0.31, 0.31, 0.21]
25 | history.append(t1)
26 | history.append(t2)
27 | history.append(t3)
28 | history.append(t4)
29 | history.append(t5)
30 | history.append(t6)


--------------------------------------------------------------------------------
/hypernets/tests/searchers/playback_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | import pytest
 8 | 
 9 | from hypernets.core.ops import *
10 | from hypernets.core.search_space import *
11 | from hypernets.searchers import PlaybackSearcher
12 | from hypernets.core import TrialHistory, Trial
13 | from hypernets.core import EarlyStoppingError
14 | 
15 | def get_space():
16 |     space = HyperSpace()
17 |     with space.as_default():
18 |         id1 = Identity(p1=Choice(['a', 'b']), p2=Int(1, 100), p3=Real(0, 1.0))
19 |     return space
20 | 
21 | 
22 | th = TrialHistory('min')
23 | sample = get_space()
24 | sample.assign_by_vectors([0, 1, 0.1])
25 | trial = Trial(sample, 1, 0.99, 100)
26 | th.append(trial)
27 | 
28 | sample = get_space()
29 | sample.assign_by_vectors([1, 2, 0.2])
30 | trial = Trial(sample, 2, 0.9, 50)
31 | th.append(trial)
32 | 
33 | sample = get_space()
34 | sample.assign_by_vectors([0, 3, 0.3])
35 | trial = Trial(sample, 3, 0.7, 200)
36 | th.append(trial)
37 | 
38 | 
39 | class Test_PlaybackSearcher():
40 |     def test_playback_searcher(self):
41 |         searcher = PlaybackSearcher(th, top_n=2)
42 |         sample1 = searcher.sample()
43 |         assert sample1.vectors == [0, 3, 0.3]
44 |         sample2 = searcher.sample()
45 |         assert sample2.vectors == [1, 2, 0.2]
46 |         with pytest.raises(EarlyStoppingError) as ese:
47 |             searcher.sample()
48 |         assert ese.value.args[0] == 'no more samples.'
49 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | import time
 4 | 
 5 | from .cfg import DispatchCfg as c
 6 | 
 7 | 
 8 | def get_dispatcher(hyper_model, **kwargs):
 9 |     timestamp = time.strftime('%Y%m%d%H%M%S')
10 |     experiment = c.experiment if len(c.experiment) > 0 else f'experiment_{timestamp}'
11 |     work_dir = c.work_dir if len(c.work_dir) > 0 else f'{experiment}'
12 | 
13 |     if hyper_model.searcher.parallelizable:
14 |         if c.backend == 'dask':
15 |             from .dask.dask_dispatcher import DaskDispatcher
16 |             return DaskDispatcher(work_dir)
17 |         elif c.backend == 'cluster':
18 |             driver_address = c.cluster_driver
19 |             if c.cluster_role == 'driver':
20 |                 from hypernets.dispatchers.cluster import DriverDispatcher
21 |                 return DriverDispatcher(driver_address, work_dir)
22 |             elif c.cluster_role == 'executor':
23 |                 if driver_address is None:
24 |                     raise Exception('Not found setting "driver" for executor role.')
25 |                 from hypernets.dispatchers.cluster import ExecutorDispatcher
26 |                 return ExecutorDispatcher(driver_address)
27 | 
28 |     return default_dispatcher(work_dir)
29 | 
30 | 
31 | def default_dispatcher(work_dir=None):
32 |     from .in_process_dispatcher import InProcessDispatcher
33 | 
34 |     models_dir = f'{work_dir}/models' if work_dir else ''
35 |     return InProcessDispatcher(models_dir)
36 | 


--------------------------------------------------------------------------------
/hypernets/examples/smoke_testing.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import numpy as np
 6 | 
 7 | from hypernets.core.ops import Choice, Bool, Identity
 8 | from hypernets.core.search_space import HyperSpace, Int, Real
 9 | from hypernets.searchers.evolution_searcher import EvolutionSearcher
10 | from hypernets.searchers.mcts_searcher import MCTSSearcher
11 | from hypernets.searchers.random_searcher import RandomSearcher
12 | 
13 | 
14 | def get_space():
15 |     space = HyperSpace()
16 |     with space.as_default():
17 |         p1 = Int(1, 100)
18 |         p2 = Choice(['a', 'b', 'c'])
19 |         p3 = Bool()
20 |         p4 = Real(0.0, 1.0)
21 |         id1 = Identity(p1=p1)
22 |         id2 = Identity(p2=p2)(id1)
23 |         id3 = Identity(p3=p3)(id2)
24 |         id4 = Identity(p4=p4)(id3)
25 |     return space
26 | 
27 | 
28 | def run_search():
29 |     searchers = (
30 |         RandomSearcher(get_space, space_sample_validation_fn=lambda s: True),
31 |         MCTSSearcher(get_space, max_node_space=10),
32 |         EvolutionSearcher(get_space, 5, 3, regularized=False)
33 |     )
34 | 
35 |     for searcher in searchers:
36 |         for i in range(100):
37 |             space_sample = searcher.sample()
38 |             assert space_sample.all_assigned == True
39 |             print(searcher.__class__.__name__, i, space_sample.params_summary())
40 |             searcher.update_result(space_sample, [np.random.uniform(0.1, 0.9)])
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     run_search()
45 | 


--------------------------------------------------------------------------------
/hypernets/tests/model/plain_model_test.py:
--------------------------------------------------------------------------------
 1 | from hypernets.core.callbacks import SummaryCallback
 2 | from hypernets.examples.plain_model import PlainModel, PlainSearchSpace
 3 | from hypernets.examples.plain_model import train_heart_disease
 4 | from hypernets.searchers import make_searcher
 5 | from hypernets.tabular.sklearn_ex import MultiLabelEncoder
 6 | 
 7 | 
 8 | class DaskPlainModel(PlainModel):
 9 |     def _get_estimator(self, space_sample):
10 |         from hypernets.tabular import get_tool_box
11 |         import dask.dataframe as dd
12 | 
13 |         estimator = super()._get_estimator(space_sample)
14 | 
15 |         return get_tool_box(dd.DataFrame).wrap_local_estimator(estimator)
16 | 
17 | 
18 | def create_plain_model(reward_metric='auc', optimize_direction='max',
19 |                        with_encoder=False, with_dask=False):
20 |     search_space = PlainSearchSpace(enable_dt=True, enable_lr=True, enable_nn=False)
21 |     searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction)
22 | 
23 |     encoder = MultiLabelEncoder if with_encoder else None
24 |     cls = DaskPlainModel if with_dask else PlainModel
25 |     hyper_model = cls(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()],
26 |                       transformer=encoder)
27 | 
28 |     return hyper_model
29 | 
30 | 
31 | def test_train_heart_disease():
32 |     train_heart_disease(cv=False, max_trials=5)
33 | 
34 | 
35 | def test_train_heart_disease_with_cv():
36 |     train_heart_disease(cv=True, max_trials=5)
37 | 


--------------------------------------------------------------------------------
/hypernets/hyperctl/consts.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | KEY_ENV_BATCHES_DATA_DIR = 'HYPERCTL_BATCHES_DATA_DIR'
 5 | KEY_ENV_JOB_NAME = 'HYPERCTL_JOB_NAME'
 6 | KEY_ENV_JOB_DATA_DIR = 'HYPERCTL_JOB_DATA_DIR'
 7 | KEY_ENV_JOB_WORKING_DIR = 'HYPERCTL_JOB_WORKING_DIR'
 8 | KEY_ENV_SERVER_PORTAL = 'HYPERCTL_SERVER_PORTAL'
 9 | KEY_ENV_TMP = 'TMP'
10 | 
11 | # placeholder
12 | P_HOST_ENV = 'P_HOST_ENV'
13 | P_TMP_ENV = 'P_TMP_ENV'
14 | 
15 | KEY_TEMPLATE_COMMAND = "COMMAND"
16 | 
17 | RUN_SH_TEMPLATE = f"""#!/bin/sh
18 | export {KEY_ENV_JOB_NAME}="#{KEY_ENV_JOB_NAME}#"
19 | export {KEY_ENV_JOB_DATA_DIR}="#{KEY_ENV_JOB_DATA_DIR}#"
20 | export {KEY_ENV_SERVER_PORTAL}="#{KEY_ENV_SERVER_PORTAL}#"
21 | export {KEY_ENV_JOB_WORKING_DIR}="#{KEY_ENV_JOB_WORKING_DIR}#"
22 | 
23 | #{P_TMP_ENV}
24 | #{P_HOST_ENV}
25 | 
26 | if [ -n "$TMP" ]; then
27 |   if [ ! -d "$TMP" ]; then
28 |     mkdir -p $TMP
29 |   fi
30 | fi
31 | 
32 | cd ${KEY_ENV_JOB_WORKING_DIR}
33 | #{KEY_TEMPLATE_COMMAND}# >"${KEY_ENV_JOB_DATA_DIR}/stdout" 2>"${KEY_ENV_JOB_DATA_DIR}/stderr"
34 | """
35 | 
36 | HOST_LOCALHOST = "localhost"
37 | 
38 | BATCH_TEMP = "hynctl_batch_temp"
39 | 
40 | JOB_DATA_DIR_PREFIX = "hynctl_job_"
41 | 
42 | 
43 | def default_batches_data_dir(batches_data_dir):
44 |     if batches_data_dir is None:
45 |         bdd_env = os.environ.get(KEY_ENV_BATCHES_DATA_DIR)
46 |         if bdd_env is None:
47 |             bdd_default = Path("~/hyperctl-batches-data-dir").expanduser().as_posix()
48 |             return bdd_default
49 |         else:
50 |             return bdd_env
51 |     else:
52 |         return batches_data_dir
53 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/utils_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | import copy
 7 | import io
 8 | 
 9 | import pandas as pd
10 | 
11 | from hypernets.tabular import get_tool_box
12 | 
13 | csv_str = '''x1_int_nanchar,x2_all_nan,x3_const_str,x4_const_int,x5_dup_1,x6_dup_2,x7_dup_f1,x8_dup_f2,x9_f,x10,y
14 | 1.0,,const,5,dup,dup,0.1,0.1,1.23,\\N,1
15 | 2.2,,const,5,dupa,dupa,0.111,0.111,4.4,\\N,1
16 | \\N,,const,5,dupb,dupb,0.12323,0.12323,1.233,\\N,1
17 | 4.,,const,5,dupc,dupc,0.14334,0.14334,4534434.2,\\N,0
18 | 5,,const,5,dupd,dupd,0.144,0.144,2302.2,\\N,0
19 | 6,,const,5,dupe,dupe,0.155,0.155,34334.1,\\N,\\N
20 | '''
21 | 
22 | 
23 | class Test_DataCleaner():
24 |     def test_basic(self):
25 |         hasher = get_tool_box(pd.DataFrame).data_hasher()
26 |         df1 = pd.read_csv(io.StringIO(csv_str))
27 |         hash1 = hasher(df1)
28 | 
29 |         df2 = pd.read_csv(io.StringIO(csv_str))
30 |         hash2 = hasher(df2)
31 |         assert hash1 == hash2
32 | 
33 |         df3 = df1.head(5)
34 |         hash3 = hasher(df3)
35 |         assert hash1 != hash3
36 | 
37 |         df4 = pd.concat([df1, df1.head(1)], axis=0)
38 |         hash4 = hasher(df4)
39 |         assert hash1 != hash4
40 | 
41 |         df5 = copy.deepcopy(df1)
42 |         df5['x1_int_nanchar'] = ['1.0', '2.2', '\\N', '4.', '5', '6']
43 |         hash5 = hasher(df5)
44 |         assert hash1 == hash5
45 | 
46 |         df6 = copy.deepcopy(df1)
47 |         df6['x1_int_nanchar'] = ['2.0', '2.2', '\\N', '4.', '5', '6']
48 |         hash6 = hasher(df6)
49 |         assert hash1 != hash6
50 | 
51 |     # TODO @lxf add unit tests for Dask.DataFrame
52 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.tabular.evaluator.rst:
--------------------------------------------------------------------------------
 1 | hypernets.tabular.evaluator package
 2 | ===================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.tabular.evaluator.auto\_sklearn module
 8 | ------------------------------------------------
 9 | 
10 | .. automodule:: hypernets.tabular.evaluator.auto_sklearn
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.tabular.evaluator.h2o module
16 | --------------------------------------
17 | 
18 | .. automodule:: hypernets.tabular.evaluator.h2o
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.tabular.evaluator.hyperdt module
24 | ------------------------------------------
25 | 
26 | .. automodule:: hypernets.tabular.evaluator.hyperdt
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | hypernets.tabular.evaluator.hypergbm module
32 | -------------------------------------------
33 | 
34 | .. automodule:: hypernets.tabular.evaluator.hypergbm
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | hypernets.tabular.evaluator.tests module
40 | ----------------------------------------
41 | 
42 | .. automodule:: hypernets.tabular.evaluator.tests
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | hypernets.tabular.evaluator.tpot module
48 | ---------------------------------------
49 | 
50 | .. automodule:: hypernets.tabular.evaluator.tpot
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 
55 | Module contents
56 | ---------------
57 | 
58 | .. automodule:: hypernets.tabular.evaluator
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 


--------------------------------------------------------------------------------
/hypernets/discriminators/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | from ._base import get_previous_trials_scores, get_percentile_score, UnPromisingTrial, BaseDiscriminator
 8 | from .percentile import PercentileDiscriminator, ProgressivePercentileDiscriminator, OncePercentileDiscriminator
 9 | 
10 | _discriminators = {
11 |     'percentile': PercentileDiscriminator,
12 |     'once_percentile': OncePercentileDiscriminator,
13 |     'percentile_discriminator': PercentileDiscriminator,
14 |     'progressive': ProgressivePercentileDiscriminator,
15 |     'progressive_percentile': ProgressivePercentileDiscriminator,
16 |     'progressive_percentile_discriminator': ProgressivePercentileDiscriminator,
17 | }
18 | 
19 | 
20 | def _get_discriminator_cls(identifier):
21 |     if isinstance(identifier, str):
22 |         cls = _discriminators.get(identifier.lower(), None)
23 |         if cls is not None:
24 |             return cls
25 |     elif isinstance(identifier, type) and issubclass(identifier, BaseDiscriminator):
26 |         return identifier
27 | 
28 |     raise ValueError(f'Illegal discriminator:{identifier}')
29 | 
30 | 
31 | def make_discriminator(cls, optimize_direction='min', **kwargs):
32 |     cls = _get_discriminator_cls(cls)
33 | 
34 |     if cls == PercentileDiscriminator:
35 |         default_kwargs = dict(percentile=0)
36 |     elif cls == ProgressivePercentileDiscriminator:
37 |         default_kwargs = dict(percentile_list=[0])
38 |     else:
39 |         default_kwargs = {}
40 | 
41 |     kwargs = {**default_kwargs, **kwargs}
42 |     discriminator = cls(optimize_direction=optimize_direction, **kwargs)
43 |     return discriminator
44 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_dask/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import math
 6 | import os
 7 | 
 8 | import psutil
 9 | import pytest
10 | 
11 | from hypernets.tabular import is_dask_installed
12 | 
13 | if_dask_ready = pytest.mark.skipif(not is_dask_installed, reason='dask or dask_ml are not installed')
14 | 
15 | 
16 | def _startup_dask(overload):
17 |     from dask.distributed import LocalCluster, Client
18 | 
19 |     if os.environ.get('DASK_SCHEDULER_ADDRESS') is not None:
20 |         # use dask default settings
21 |         client = Client()
22 |     else:
23 |         # start local cluster
24 |         cores = psutil.cpu_count()
25 |         workers = math.ceil(cores / 3)
26 |         workers = max(2, workers)
27 |         if workers > 1:
28 |             if overload <= 0:
29 |                 overload = 1.0
30 |             mem_total = psutil.virtual_memory().available / (1024 ** 3)  # GB
31 |             mem_per_worker = math.ceil(mem_total / workers * overload)
32 |             if mem_per_worker > mem_total:
33 |                 mem_per_worker = mem_total
34 |             cluster = LocalCluster(processes=True, n_workers=workers, threads_per_worker=4,
35 |                                    memory_limit=f'{mem_per_worker}GB')
36 |         else:
37 |             cluster = LocalCluster(processes=False)
38 | 
39 |         client = Client(cluster)
40 |     return client
41 | 
42 | 
43 | def setup_dask(cls):
44 |     try:
45 |         from dask.distributed import default_client
46 |         client = default_client()
47 |     except:
48 |         client = _startup_dask(2.0)
49 |     print('Dask Client:', client)
50 | 
51 |     if cls is not None:
52 |         setattr(cls, 'dask_client_', client)
53 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | Hypernets has been developed and used by many active community members. Everyone is more than welcomed  to make the project better and more accessible to more users. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community.
 3 | 
 4 | We are proud of this project and have been working to make it great since day one. We believe you will love it and we need your help and everything about Hypernets you have in your mind pushes this project forward.
 5 | 
 6 | Join Us!
 7 | 
 8 | # Bug Reports and Feature Requests
 9 | The single most important contribution that you can make is to report bugs and make feature requests. The development work on Hypernets is largely driven by these, so please make your voice heard!
10 | 
11 | Here are some issue templates we recommend  while you report bugs or suggest features.
12 |  - Bug Issue
13 |  - Feature Request
14 |  - Other Issues
15 | 
16 | Ideally, you can attach some code in your issue to reproduce the bug.
17 | 
18 | 
19 | # Contributing via Pull Requests
20 | Code contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
21 | 
22 |  1. You are working against the latest source on the master branch.
23 |  2. You check existing open, and recently merged, pull requests to make
24 |    sure someone else hasn't addressed the problem already.
25 |  3. You open an issue to discuss any significant work - we would hate for
26 |    your time to be wasted.
27 | 
28 | To send us a pull request, please:
29 | 
30 |  1. Fork the repository.
31 |  2. Modify the source 
32 |  3. Ensure local tests pass.
33 |  4. Commit to your fork using clear commit messages.
34 |  5. Send us a pull request, 
35 | 


--------------------------------------------------------------------------------
/hypernets/hyperctl/utils.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Optional
 3 | 
 4 | import yaml
 5 | import json
 6 | import requests
 7 | 
 8 | 
 9 | def load_yaml(file_path):
10 | 
11 |     if not Path(file_path).exists():
12 |         raise FileNotFoundError(file_path)
13 | 
14 |     with open(file_path, 'r') as f:
15 |         content = f.read()
16 |     return yaml.load(content, Loader=yaml.CLoader)
17 | 
18 | 
19 | def load_json(file_path):
20 |     if not Path(file_path).exists():
21 |         raise FileNotFoundError(file_path)
22 | 
23 |     with open(file_path, 'r') as f:
24 |         content = f.read()
25 |     return json.loads(content)
26 | 
27 | 
28 | def copy_item(src, dest, key):
29 |     v = src.get(key)
30 |     if v is not None:
31 |         dest[key] = v
32 | 
33 | 
34 | def http_portal(host, port):
35 |     return f"http://{host}:{port}"
36 | 
37 | 
38 | def get_request(url):
39 |     def f(url_, request_data_: str):
40 |         return requests.get(url_)
41 | 
42 |     return _request(url, f, None)
43 | 
44 | 
45 | def post_request(url, request_data: Optional[str]):
46 |     def f(url_, request_data_: str):
47 |         return requests.post(url_, data=request_data_)
48 | 
49 |     return _request(url, f, request_data)
50 | 
51 | 
52 | def _request(url,  req_func, request_data=None):
53 |     from hypernets.utils import logging as hyn_logging
54 |     logger = hyn_logging.getLogger(__name__)
55 | 
56 |     logger.debug(f"request data :\n{request_data}\nto {url}")
57 |     resp = req_func(url, request_data)
58 |     txt_resp = resp.text
59 |     logger.debug(f"response text: \n{txt_resp}")
60 |     json_resp = json.loads(txt_resp)
61 |     code = json_resp['code']
62 |     if code == 0:
63 |         return json_resp['data']
64 |     else:
65 |         raise RuntimeError(txt_resp)
66 | 


--------------------------------------------------------------------------------
/hypernets/tests/dispatchers/process_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | def start_broker(host, port):
 8 |     from hypernets.dispatchers import run_broker
 9 |     from hypernets.dispatchers.process import LocalProcess
10 | 
11 |     broker_cmd = f'python -m {run_broker.__name__} --host={host} --port={port}'
12 |     broker = LocalProcess(broker_cmd, None, None, None)
13 |     broker.start()
14 | 
15 |     return broker
16 | 
17 | 
18 | @pytest.mark.xfail(reasone='Ignore')
19 | def test_grpc_broker_run():
20 |     try:
21 |         from paramiko import SSHClient, AutoAddPolicy
22 |         import grpc
23 |         package_exists = True
24 |     except:
25 |         package_exists = False
26 |     if not package_exists:
27 |         return
28 | 
29 |     import tempfile
30 |     from hypernets.dispatchers.process import GrpcProcess
31 |     from hypernets.utils.common import generate_id
32 | 
33 |     broker_host = '127.0.0.1'
34 |     broker_port = 43218
35 |     broker = start_broker(broker_host, broker_port)
36 |     time.sleep(2)
37 | 
38 |     # run process
39 |     cmd = 'echo 123'
40 |     temp_dir = tempfile.gettempdir()
41 |     test_id = generate_id()
42 |     out_file, err_file = f'{temp_dir}/test_out_{test_id}.out', f'{temp_dir}/test_out_{test_id}.err'
43 |     proc = GrpcProcess(f'{broker_host}:{broker_port}', cmd, None, out_file, err_file)
44 |     proc.run()
45 |     code = proc.exitcode
46 | 
47 |     with open(out_file, 'r') as f:
48 |         out = f.read()
49 |     with open(err_file, 'r') as f:
50 |         err = f.read()
51 | 
52 |     # clean down
53 |     os.remove(out_file), os.remove(err_file)
54 |     broker.terminate()  # todo: fix LocalProcess
55 | 
56 |     # assert
57 |     assert code == 0
58 |     assert out == '123\n'
59 |     assert err.startswith('pid:')
60 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/predict/grpc/predict_service.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from hypernets.dispatchers.predict.grpc.proto import predict_pb2_grpc
 4 | from hypernets.dispatchers.predict.grpc.proto.predict_pb2 import PredictResponse
 5 | from hypernets.dispatchers.process import LocalProcess
 6 | from hypernets.utils import logging
 7 | 
 8 | logger = logging.get_logger(__name__)
 9 | 
10 | 
11 | class PredictService(predict_pb2_grpc.PredictServiceServicer):
12 |     def __init__(self, cmd):
13 |         super(PredictService, self).__init__()
14 |         assert cmd
15 | 
16 |         self.cmd = cmd
17 | 
18 |     def predict(self, request, context):
19 |         data_file = request.data_file
20 |         result_file = request.result_file
21 | 
22 |         start_at = time.time()
23 | 
24 |         if logger.is_info_enabled():
25 |             print(f'predict {data_file} --> {result_file}', end='')
26 | 
27 |         cmd = f'{self.cmd} {data_file} {result_file}'
28 |         p = LocalProcess(cmd, None, None, None)
29 |         p.start()
30 |         p.join()
31 |         code = p.exitcode
32 | 
33 |         res = PredictResponse(data_file=data_file, result_file=result_file, code=code)
34 | 
35 |         done_at = time.time()
36 |         if logger.is_info_enabled():
37 |             print(' done, elapsed %.3f seconds.' % (done_at - start_at))
38 |         return res
39 | 
40 | 
41 | def serve(addr, cmd):
42 |     import grpc
43 |     from concurrent import futures
44 | 
45 |     if logger.is_info_enabled():
46 |         logger.info(f'start predict service at {addr}')
47 |     service = PredictService(cmd)
48 |     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
49 |     predict_pb2_grpc.add_PredictServiceServicer_to_server(service, server)
50 | 
51 |     server.add_insecure_port(addr)
52 |     server.start()
53 | 
54 |     return server, service
55 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.dispatchers.rst:
--------------------------------------------------------------------------------
 1 | hypernets.dispatchers package
 2 | =============================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    hypernets.dispatchers.cluster
11 |    hypernets.dispatchers.dask
12 |    hypernets.dispatchers.predict
13 |    hypernets.dispatchers.process
14 | 
15 | Submodules
16 | ----------
17 | 
18 | hypernets.dispatchers.cfg module
19 | --------------------------------
20 | 
21 | .. automodule:: hypernets.dispatchers.cfg
22 |    :members:
23 |    :undoc-members:
24 |    :show-inheritance:
25 | 
26 | hypernets.dispatchers.in\_process\_dispatcher module
27 | ----------------------------------------------------
28 | 
29 | .. automodule:: hypernets.dispatchers.in_process_dispatcher
30 |    :members:
31 |    :undoc-members:
32 |    :show-inheritance:
33 | 
34 | hypernets.dispatchers.run module
35 | --------------------------------
36 | 
37 | .. automodule:: hypernets.dispatchers.run
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 
42 | hypernets.dispatchers.run\_broker module
43 | ----------------------------------------
44 | 
45 | .. automodule:: hypernets.dispatchers.run_broker
46 |    :members:
47 |    :undoc-members:
48 |    :show-inheritance:
49 | 
50 | hypernets.dispatchers.run\_predict module
51 | -----------------------------------------
52 | 
53 | .. automodule:: hypernets.dispatchers.run_predict
54 |    :members:
55 |    :undoc-members:
56 |    :show-inheritance:
57 | 
58 | hypernets.dispatchers.run\_predict\_server module
59 | -------------------------------------------------
60 | 
61 | .. automodule:: hypernets.dispatchers.run_predict_server
62 |    :members:
63 |    :undoc-members:
64 |    :show-inheritance:
65 | 
66 | Module contents
67 | ---------------
68 | 
69 | .. automodule:: hypernets.dispatchers
70 |    :members:
71 |    :undoc-members:
72 |    :show-inheritance:
73 | 


--------------------------------------------------------------------------------
/hypernets/core/pareto.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def pareto_dominate(x1, x2, directions=None):
 5 |     """dominance in pareto scene, if x1 dominate x2 return True.
 6 |     """
 7 |     if not isinstance(x1, np.ndarray):
 8 |         x1 = np.array(x1)
 9 | 
10 |     if not isinstance(x2, np.ndarray):
11 |         x2 = np.array(x2)
12 | 
13 |     if directions is None:
14 |         directions = ['min'] * x1.shape[0]
15 | 
16 |     ret = []
17 |     for i in range(x1.shape[0]):
18 |         if directions[i] == 'min':
19 |             if x1[i] < x2[i]:
20 |                 ret.append(1)
21 |             elif x1[i] == x2[i]:
22 |                 ret.append(0)
23 |             else:
24 |                 return False
25 |         else:
26 |             if x1[i] > x2[i]:
27 |                 ret.append(1)
28 |             elif x1[i] == x2[i]:
29 |                 ret.append(0)
30 |             else:
31 |                 return False
32 | 
33 |     return np.sum(np.array(ret)) >= 1
34 | 
35 | 
36 | def calc_nondominated_set(solutions: np.ndarray, dominate_func=None, directions=None):
37 | 
38 |     assert solutions.ndim == 2
39 | 
40 |     if directions is None:
41 |         directions = ['min'] * solutions.shape[1]
42 | 
43 |     if dominate_func is None:
44 |         dominate_func = pareto_dominate
45 | 
46 |     def is_pareto_optimal(scores_i):
47 |         if (scores_i == None).any():  # illegal individual for the None scores
48 |             return False
49 |         for scores_j in solutions:
50 |             if (scores_i == scores_j).all():
51 |                 continue
52 |             if dominate_func(x1=scores_j, x2=scores_i, directions=directions):
53 |                 return False
54 |         return True
55 | 
56 |     optimal = []
57 |     for i, solution in enumerate(solutions):
58 |         if is_pareto_optimal(solution):
59 |             optimal.append(i)
60 |     return optimal
61 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_dask/toolbox_test.py:
--------------------------------------------------------------------------------
 1 | import os.path as path
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from hypernets.tabular import get_tool_box
 6 | from hypernets.tabular.datasets import dsutils
 7 | from . import if_dask_ready, is_dask_installed
 8 | 
 9 | if is_dask_installed:
10 |     import dask.dataframe as dd
11 |     from hypernets.tabular.dask_ex import DaskToolBox
12 | 
13 | 
14 | @if_dask_ready
15 | class TestDaskToolBox:
16 |     def test_get_tool_box(self):
17 |         tb = get_tool_box(dd.DataFrame)
18 |         assert tb is DaskToolBox
19 | 
20 |         ddf = dd.from_pandas(pd.DataFrame(dict(
21 |             x1=['a', 'b', 'c'],
22 |             x2=[1, 2, 3]
23 |         )), npartitions=1)
24 |         tb = get_tool_box(ddf)
25 |         assert tb is DaskToolBox
26 | 
27 |     def test_concat_df(self):
28 |         df = pd.DataFrame(dict(
29 |             x1=['a', 'b', 'c'],
30 |             x2=[1, 2, 3]
31 |         ))
32 |         ddf = dd.from_pandas(df, npartitions=2)
33 |         tb = get_tool_box(ddf)
34 | 
35 |         # DataFrame + DataFrame
36 |         df1 = tb.concat_df([ddf, ddf], axis=0)
37 |         assert isinstance(df1, dd.DataFrame)
38 | 
39 |         df1 = df1.compute()
40 |         df2 = pd.concat([df, df], axis=0).reset_index(drop=True)
41 |         assert (df1 == df2).all().all()
42 | 
43 |         # DataFrame + array
44 |         df1 = tb.concat_df([ddf, ddf.to_dask_array(lengths=True)], axis=0)
45 |         assert isinstance(df1, dd.DataFrame)
46 | 
47 |         df1 = df1.compute()
48 |         df2 = pd.concat([df, df], axis=0).reset_index(drop=True)
49 |         assert (df1 == df2).all().all()
50 | 
51 |     def test_load_data(self, ):
52 |         data_dir = path.split(dsutils.__file__)[0]
53 |         data_file = f'{data_dir}/blood.csv'
54 | 
55 |         df = DaskToolBox.load_data(data_file, reset_index=True)
56 |         assert isinstance(df, dd.DataFrame)
57 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/cfg.py:
--------------------------------------------------------------------------------
 1 | from hypernets.conf import configure, Configurable, String, Int, Float, Enum
 2 | 
 3 | 
 4 | @configure()
 5 | class DispatchCfg(Configurable):
 6 |     experiment = String(help='experiment id',
 7 |                         ).tag(config=True)
 8 |     work_dir = String(help='storage directory path to store running data.'
 9 |                       ).tag(config=True)
10 |     backend = Enum(['standalone', 'dask', 'cluster', None],
11 |                    default_value=None,
12 |                    help='dispatcher backend'
13 |                    ).tag(config=True)
14 |     trial_retry_limit = Int(1000, min=1,
15 |                             help='maximum retry number to run trial.'
16 |                             ).tag(config=True)
17 | 
18 |     cluster_driver = String(help='driver address, used if backend="cluster"'
19 |                             ).tag(config=True)
20 |     cluster_role = Enum(['driver', 'executor'],
21 |                         help='node role, used if backend="cluster"'
22 |                         ).tag(config=True)
23 |     cluster_search_queue = Int(1, min=1,
24 |                                help='search queue size, used if backend="cluster"'
25 |                                ).tag(config=True)
26 |     cluster_summary_interval = Float(60.0,
27 |                                      help='summary interval seconds',
28 |                                      ).tag(config=True)
29 | 
30 |     dask_search_queue = Int(1, min=1,
31 |                             help='search queue size, used if backend="dask"'
32 |                             ).tag(config=True)
33 |     dask_search_executors = Int(3, min=1,
34 |                                 help='search executor number, used if backend="dask"'
35 |                                 ).tag(config=True)
36 | 
37 |     grpc_worker_count = Int(10, min=1,
38 |                             help='grpc worker count'
39 |                             ).tag(config=True)
40 | 


--------------------------------------------------------------------------------
/hypernets/tabular/dask_ex/_dataframe_mapper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import numpy as np
 3 | from dask import array as da
 4 | from dask import dataframe as dd
 5 | from scipy import sparse as _sparse
 6 | 
 7 | from hypernets.tabular.dataframe_mapper import DataFrameMapper
 8 | from hypernets.utils import logging
 9 | 
10 | logger = logging.get_logger(__name__)
11 | 
12 | 
13 | class DaskDataFrameMapper(DataFrameMapper):
14 |     @staticmethod
15 |     def _fix_feature(fea):
16 |         from ._toolbox import DaskToolBox
17 | 
18 |         if DaskToolBox.is_dask_object(fea):
19 |             pass
20 |         elif _sparse.issparse(fea):
21 |             fea = fea.toarray()
22 | 
23 |         if len(fea.shape) == 1:
24 |             """
25 |             Convert 1-dimensional arrays to 2-dimensional column vectors.
26 |             """
27 |             if isinstance(fea, da.Array):
28 |                 fea = da.stack([fea], axis=-1)
29 |             else:
30 |                 fea = np.array([fea]).T
31 | 
32 |         return fea
33 | 
34 |     @staticmethod
35 |     def _hstack_array(extracted):
36 |         from ._toolbox import DaskToolBox
37 | 
38 |         if DaskToolBox.exist_dask_object(*extracted):
39 |             extracted = [a.values if isinstance(a, dd.DataFrame) else a for a in extracted]
40 |             stacked = DaskToolBox.hstack_array(extracted)
41 |         else:
42 |             stacked = np.hstack(extracted)
43 |         return stacked
44 | 
45 |     def _to_df(self, X, extracted, columns):
46 |         if isinstance(X, dd.DataFrame):
47 |             from ._toolbox import DaskToolBox
48 | 
49 |             dfs = [dd.from_dask_array(arr, index=None) if isinstance(arr, da.Array) else arr for arr in extracted]
50 |             df = DaskToolBox.concat_df(dfs, axis=1) if len(dfs) > 1 else dfs[0]
51 |             df.columns = columns
52 |         else:
53 |             df = super()._to_df(X, extracted, columns)
54 | 
55 |         return df
56 | 


--------------------------------------------------------------------------------
/hypernets/tests/utils/common_test.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import os
 3 | 
 4 | from hypernets.utils import common as common_util
 5 | 
 6 | 
 7 | def test_camel_keys_to_snake():
 8 |     input_dict = {
 9 |         'datasetConf': {
10 |             'trainData': './train.csv'
11 |         },
12 |         'name': 'with-feature-selection',
13 |         'jobs': [
14 |             {
15 |                 'featureSelection': {
16 |                     'leastFeatures': 10
17 |                 },
18 |                 'callbackSetting': [{
19 |                     'className': 'hypernets.core.ConsoleCallback'
20 |                 }]
21 |             }
22 |         ]
23 |     }
24 | 
25 |     ret_dict = common_util.camel_keys_to_snake(input_dict)
26 |     assert ret_dict['dataset_conf']['train_data'] == input_dict['datasetConf']['trainData']
27 |     assert ret_dict['name'] == input_dict['name']
28 | 
29 |     input_job_conf_dict = input_dict['jobs'][0]
30 |     ret_job_conf_dict = ret_dict['jobs'][0]
31 | 
32 |     assert ret_job_conf_dict['feature_selection']['least_features'] == \
33 |            input_job_conf_dict['featureSelection']['leastFeatures']
34 | 
35 |     assert ret_job_conf_dict['callback_setting'][0]['class_name'] == \
36 |            input_job_conf_dict['callbackSetting'][0]['className']
37 | 
38 | 
39 | def test_make_tempfile():
40 | 
41 |     temp_file_path: str = common_util.get_temp_file_path(prefix='prefix', suffix='.txt')
42 |     assert not os.path.exists(temp_file_path)
43 | 
44 |     assert os.path.basename(temp_file_path).startswith('prefix')
45 |     assert os.path.basename(temp_file_path).endswith('.txt')
46 | 
47 |     temp_file_dir_created = common_util.get_temp_dir_path(prefix='prefix', suffix='prefix', create=True)
48 |     assert os.path.exists(temp_file_dir_created)
49 | 
50 |     temp_file_dir_not_created = common_util.get_temp_dir_path(prefix='prefix', suffix='prefix', create=False)
51 |     assert not os.path.exists(temp_file_dir_not_created)
52 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/process/grpc_process.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | import sys
 4 | from multiprocessing import Process, Value as PValue
 5 | 
 6 | from hypernets.dispatchers.process.grpc.process_broker_client import ProcessBrokerClient
 7 | from hypernets.utils import logging
 8 | 
 9 | logger = logging.get_logger(__name__)
10 | 
11 | 
12 | class GrpcProcess(Process):
13 |     def __init__(self, grpc_broker, cmd, in_file, out_file, err_file, environment=None):
14 |         super(GrpcProcess, self).__init__()
15 | 
16 |         self.grpc_broker = grpc_broker
17 |         self.cmd = cmd
18 |         self.in_file = in_file
19 |         self.out_file = out_file
20 |         self.err_file = err_file
21 |         self.environment = environment
22 |         self._exit_code = PValue('i', -1)
23 | 
24 |     def run(self, verbose=False):
25 |         if verbose and logger.is_info_enabled():
26 |             msg = f'[{self.name}] [GRPC {self.grpc_broker}] {self.cmd}, out={self.out_file}, err={self.err_file}'
27 |             logger.info(msg)
28 | 
29 |         try:
30 |             client = ProcessBrokerClient(self.grpc_broker)
31 |             buffer_size = 16
32 |             if self.out_file and self.err_file:
33 |                 with open(self.out_file, 'wb', buffering=0)as o, open(self.err_file, 'wb', buffering=0) as e:
34 |                     code = client.run(self.cmd.split(' '), stdout=o, stderr=e, buffer_size=buffer_size)
35 |             else:
36 |                 code = client.run(self.cmd.split(' '), stdout=sys.stdout, stderr=sys.stderr, buffer_size=buffer_size)
37 |         except KeyboardInterrupt:
38 |             code = 137
39 | 
40 |         if verbose and logger.is_info_enabled():
41 |             logger.info(f'[{self.name}] [GRPC {self.grpc_broker}] {self.cmd} done with {code}')
42 |         self._exit_code.value = code
43 | 
44 |     @property
45 |     def exitcode(self):
46 |         code = self._exit_code.value
47 |         return code if code >= 0 else None
48 | 


--------------------------------------------------------------------------------
/hypernets/tabular/datasets/dsutils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import os
 3 | 
 4 | basedir = os.path.dirname(__file__)
 5 | 
 6 | 
 7 | def load_boston():
 8 |     import pandas as pd
 9 |     from sklearn import datasets
10 |     # boston_dataset = datasets.load_boston()
11 |     # data = pd.DataFrame(boston_dataset.data)
12 |     # data.columns = boston_dataset.feature_names
13 |     # data.insert(0, 'target', boston_dataset.target)
14 |     data = pd.read_csv(f'{basedir}/boston.csv.gz', compression='gzip')
15 |     return data
16 | 
17 | 
18 | def load_heart_disease_uci():
19 |     import pandas as pd
20 |     data = pd.read_csv(f'{basedir}/heart-disease-uci.csv')
21 |     return data
22 | 
23 | 
24 | def load_bank():
25 |     import pandas as pd
26 |     data = pd.read_csv(f'{basedir}/bank-uci.csv.gz')
27 |     return data
28 | 
29 | 
30 | def load_bank_by_dask():
31 |     from dask import dataframe as dd
32 |     data = dd.read_csv(f'{basedir}/bank-uci.csv.gz', compression='gzip', blocksize=None)
33 |     return data
34 | 
35 | 
36 | def load_adult():
37 |     import pandas as pd
38 |     # print(f'Base dir:{basedir}')
39 |     data = pd.read_csv(f'{basedir}/adult-uci.csv.gz', compression='gzip', header=None)
40 |     return data
41 | 
42 | 
43 | def load_glass_uci():
44 |     import pandas as pd
45 |     # print(f'Base dir:{basedir}')
46 |     data = pd.read_csv(f'{basedir}/glass_uci.csv', header=None)
47 |     return data
48 | 
49 | 
50 | def load_blood():
51 |     import pandas as pd
52 |     data = pd.read_csv(f'{basedir}/blood.csv')
53 |     return data
54 | 
55 | 
56 | def load_telescope():
57 |     import pandas as pd
58 |     data = pd.read_csv(f'{basedir}/telescope.csv')
59 |     return data
60 | 
61 | 
62 | def load_Bike_Sharing():
63 |     import pandas as pd
64 |     data = pd.read_csv(f'{basedir}/Bike_Sharing.csv')
65 |     return data
66 | 
67 | 
68 | def load_movielens():
69 |     import pandas as pd
70 |     data = pd.read_csv(f'{basedir}/movielens_sample.txt')
71 |     return data
72 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/predict/grpc/proto/predict_pb2_grpc.py:
--------------------------------------------------------------------------------
 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 2 | import grpc
 3 | 
 4 | from hypernets.dispatchers.predict.grpc.proto import predict_pb2 as hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2
 5 | 
 6 | 
 7 | class PredictServiceStub(object):
 8 |   # missing associated documentation comment in .proto file
 9 |   pass
10 | 
11 |   def __init__(self, channel):
12 |     """Constructor.
13 | 
14 |     Args:
15 |       channel: A grpc.Channel.
16 |     """
17 |     self.predict = channel.unary_unary(
18 |         '/hypernets.dispatchers.predict.grpc.proto.PredictService/predict',
19 |         request_serializer=hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2.PredictRequest.SerializeToString,
20 |         response_deserializer=hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2.PredictResponse.FromString,
21 |         )
22 | 
23 | 
24 | class PredictServiceServicer(object):
25 |   # missing associated documentation comment in .proto file
26 |   pass
27 | 
28 |   def predict(self, request, context):
29 |     # missing associated documentation comment in .proto file
30 |     pass
31 |     context.set_code(grpc.StatusCode.UNIMPLEMENTED)
32 |     context.set_details('Method not implemented!')
33 |     raise NotImplementedError('Method not implemented!')
34 | 
35 | 
36 | def add_PredictServiceServicer_to_server(servicer, server):
37 |   rpc_method_handlers = {
38 |       'predict': grpc.unary_unary_rpc_method_handler(
39 |           servicer.predict,
40 |           request_deserializer=hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2.PredictRequest.FromString,
41 |           response_serializer=hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2.PredictResponse.SerializeToString,
42 |       ),
43 |   }
44 |   generic_handler = grpc.method_handlers_generic_handler(
45 |       'hypernets.dispatchers.predict.grpc.proto.PredictService', rpc_method_handlers)
46 |   server.add_generic_rpc_handlers((generic_handler,))
47 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/process/local_process.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | import subprocess
 4 | import sys
 5 | from multiprocessing import Process, Value as PValue
 6 | 
 7 | from hypernets.utils import logging
 8 | 
 9 | logger = logging.get_logger(__name__)
10 | 
11 | 
12 | class LocalProcess(Process):
13 |     def __init__(self, cmd, in_file, out_file, err_file, environment=None):
14 |         super(LocalProcess, self).__init__()
15 |         self.cmd = cmd
16 |         self.in_file = in_file
17 |         self.out_file = out_file
18 |         self.err_file = err_file
19 |         self.environment = environment
20 |         self._exit_code = PValue('i', -1)
21 | 
22 |     def run(self, verbose=False):
23 |         if verbose and logger.is_info_enabled():
24 |             logger.info(f'[{self.name}] [CMD] {self.cmd}, out={self.out_file}, err={self.err_file}')
25 | 
26 |         try:
27 |             if self.out_file and self.err_file:
28 |                 with open(self.out_file, 'wb', buffering=0)as o, open(self.err_file, 'wb', buffering=0) as e:
29 |                     p = subprocess.run(self.cmd.split(' '),
30 |                                        shell=False,
31 |                                        stdin=subprocess.DEVNULL,
32 |                                        stdout=o,
33 |                                        stderr=e)
34 |                     code = p.returncode
35 |             else:
36 |                 p = subprocess.run(self.cmd.split(' '),
37 |                                    shell=False,
38 |                                    stdin=subprocess.DEVNULL,
39 |                                    stdout=sys.stdout,
40 |                                    stderr=sys.stderr)
41 |                 code = p.returncode
42 |         except KeyboardInterrupt:
43 |             code = 137
44 | 
45 |         if verbose and logger.is_info_enabled():
46 |             logger.info(f'[{self.name}] [CMD] {self.cmd} done with {code}')
47 | 
48 |         self._exit_code.value = code
49 | 
50 |     @property
51 |     def exitcode(self):
52 |         code = self._exit_code.value
53 |         return code if code >= 0 else None
54 | 


--------------------------------------------------------------------------------
/hypernets/tests/hyperctl/test_batch.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from pathlib import Path
 3 | 
 4 | from hypernets.hyperctl.appliation import BatchApplication
 5 | from hypernets.hyperctl.batch import _ShellJob
 6 | from hypernets.hyperctl.executor import LocalExecutorManager, RemoteSSHExecutorManager
 7 | from hypernets.tests.hyperctl.batch_factory import create_minimum_batch, create_local_batch
 8 | 
 9 | 
10 | def test_batch_to_config():
11 |     server_port = 8061
12 |     scheduler_interval = 1
13 |     # 1. create a batch
14 |     batch = create_minimum_batch()
15 |     app = BatchApplication(batch, server_port=server_port,
16 |                            scheduler_exit_on_finish=True,
17 |                            scheduler_interval=scheduler_interval)
18 | 
19 |     # 2. to_config
20 |     batch_config_dict = app.to_config()
21 | 
22 |     assert batch_config_dict['job_command'] == 'pwd'
23 | 
24 |     # 3. asset config content
25 |     # 3.1. check jobs
26 |     jobs_config = batch_config_dict['jobs']
27 |     assert len(jobs_config) == 1
28 |     job_config = jobs_config[0]
29 | 
30 |     assert job_config['name'] == 'job1'
31 |     assert job_config['params']["learning_rate"] == 0.1
32 | 
33 |     assert job_config['working_dir']
34 | 
35 |     # 3.2 TODO check backend
36 |     # backend_config = batch_config_dict['backend']
37 |     # assert backend_config['type'] == 'local'
38 | 
39 |     # 3.3 check server config
40 |     server_config = batch_config_dict['server']
41 |     assert server_config['host'] == 'localhost'
42 |     assert server_config['port'] == server_port
43 | 
44 |     # 3.4 check scheduler
45 |     scheduler_config = batch_config_dict['scheduler']
46 |     assert scheduler_config['exit_on_finish'] is True
47 |     assert scheduler_config['interval'] == 1
48 | 
49 |     # 3.4. check version
50 |     assert batch_config_dict['version']
51 | 
52 | 
53 | def test_get_job_by_name():
54 |     batch = create_local_batch()
55 |     req_job_name = "job2"
56 |     job = batch.get_job_by_name(req_job_name)
57 |     assert job.name == req_job_name
58 |     assert batch.get_persisted_job_status(req_job_name) == _ShellJob.STATUS_INIT
59 |     assert job.params['learning_rate'] == 0.2
60 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/cache_test.py:
--------------------------------------------------------------------------------
 1 | from hypernets.tabular import sklearn_ex as skex, get_tool_box
 2 | from hypernets.tabular.cache import cache, clear, CacheCallback
 3 | from hypernets.tabular.datasets import dsutils
 4 | from hypernets.utils import Counter
 5 | 
 6 | 
 7 | class CacheCounter(CacheCallback):
 8 |     def __init__(self):
 9 |         super(CacheCounter, self).__init__()
10 | 
11 |         self.enter_counter = Counter()
12 |         self.apply_counter = Counter()
13 |         self.store_counter = Counter()
14 | 
15 |     def on_enter(self, fn, *args, **kwargs):
16 |         self.enter_counter()
17 | 
18 |     def on_apply(self, fn, cached_data, *args, **kwargs):
19 |         self.apply_counter()
20 | 
21 |     def on_store(self, fn, cached_data, *args, **kwargs):
22 |         self.store_counter()
23 | 
24 |     def reset(self):
25 |         self.enter_counter.reset()
26 |         self.apply_counter.reset()
27 |         self.store_counter.reset()
28 | 
29 | 
30 | class CachedMultiLabelEncoder(skex.MultiLabelEncoder):
31 |     @cache(attr_keys='columns', attrs_to_restore='columns,encoders')
32 |     def fit_transform(self, X, *args):
33 |         return super().fit_transform(X, *args)
34 | 
35 |     @cache(attr_keys='columns', attrs_to_restore='columns,encoders')
36 |     def fit_transform_as_tuple_result(self, X, *args):
37 |         Xt = super().fit_transform(X.copy(), *args)
38 |         return X, Xt
39 | 
40 | 
41 | def test_cache():
42 |     clear()
43 | 
44 |     df = dsutils.load_bank()
45 |     t = skex.MultiLabelEncoder()
46 |     X = t.fit_transform(df.copy())
47 | 
48 |     t1 = CachedMultiLabelEncoder()
49 |     X1 = t1.fit_transform(df.copy())
50 |     t2 = CachedMultiLabelEncoder()
51 |     X2 = t2.fit_transform(df.copy())
52 | 
53 |     hasher = get_tool_box(df).data_hasher()
54 |     assert hasher(X) == hasher(X1) == hasher(X2)
55 | 
56 |     t3 = CachedMultiLabelEncoder()
57 |     X3 = t3.fit_transform_as_tuple_result(df.copy())
58 |     t4 = CachedMultiLabelEncoder()
59 |     X4 = t4.fit_transform_as_tuple_result(df.copy())
60 |     assert isinstance(X3, (tuple, list))
61 |     assert isinstance(X4, (tuple, list))
62 |     assert hasher(X3[1]) == hasher(X4[1])
63 | 


--------------------------------------------------------------------------------
/hypernets/experiment/general.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | from sklearn.model_selection import train_test_split
 8 | 
 9 | from hypernets.utils import logging, const
10 | from . import Experiment
11 | 
12 | logger = logging.get_logger(__name__)
13 | 
14 | 
15 | class GeneralExperiment(Experiment):
16 |     def __init__(self, hyper_model, X_train, y_train, X_eval=None, y_eval=None, X_test=None, eval_size=0.3,
17 |                  task=None, id=None, callbacks=None, random_state=9527):
18 |         super(GeneralExperiment, self).__init__(hyper_model, X_train, y_train, X_eval=X_eval,
19 |                                                 y_eval=y_eval, X_test=X_test, eval_size=eval_size, task=task,
20 |                                                 id=id, callbacks=callbacks, random_state=random_state)
21 | 
22 |     def train(self, hyper_model, X_train, y_train, X_test, X_eval=None, y_eval=None, **kwargs):
23 |         """Run an experiment
24 |         """
25 |         self.step_start('data split')
26 |         if X_eval is None or y_eval is None:
27 |             stratify = y_train
28 |             if self.task == const.TASK_REGRESSION:
29 |                 stratify = None
30 |             X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=self.eval_size,
31 |                                                                 random_state=self.random_state, stratify=stratify)
32 |         self.step_end(output={'X_train.shape': X_train.shape,
33 |                               'y_train.shape': y_train.shape,
34 |                               'X_eval.shape': X_eval.shape,
35 |                               'y_eval.shape': y_eval.shape,
36 |                               'X_test.shape': None if X_test is None else X_test.shape})
37 | 
38 |         self.step_start('search')
39 |         hyper_model.search(X_train, y_train, X_eval, y_eval, **kwargs)
40 |         best_trial = hyper_model.get_best_trial()
41 |         self.step_end(output={'best_trial': best_trial})
42 | 
43 |         self.step_start('load estimator')
44 |         estimator = hyper_model.load_estimator(best_trial.model_file)
45 |         self.step_end(output={'estimator': estimator})
46 | 
47 |         return estimator
48 | 


--------------------------------------------------------------------------------
/hypernets/tests/experiment/general_experiment_test.py:
--------------------------------------------------------------------------------
 1 | from sklearn.model_selection import train_test_split
 2 | 
 3 | from hypernets.examples.plain_model import PlainModel, PlainSearchSpace
 4 | from hypernets.experiment import GeneralExperiment
 5 | from hypernets.searchers import make_searcher
 6 | from hypernets.tabular.datasets import dsutils
 7 | 
 8 | 
 9 | def create_hyper_model(reward_metric='auc', optimize_direction='max'):
10 |     search_space = PlainSearchSpace()
11 |     searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction)
12 |     hyper_model = PlainModel(searcher=searcher, reward_metric=reward_metric, callbacks=[])
13 | 
14 |     return hyper_model
15 | 
16 | 
17 | def test_general_experiment_of_heart_disease_simple():
18 |     hyper_model = create_hyper_model()
19 | 
20 |     X = dsutils.load_heart_disease_uci()
21 |     y = X.pop('target')
22 | 
23 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
24 | 
25 |     experiment = GeneralExperiment(hyper_model, X_train, y_train, eval_size=0.3)
26 |     estimator = experiment.run(max_trials=5)
27 |     trials = hyper_model.get_top_trials(5)
28 | 
29 |     assert estimator
30 |     assert 1 < len(trials) <= 5
31 | 
32 |     score = estimator.evaluate(X_test, y_test, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'])
33 |     print('evaluate score:', score)
34 |     assert score
35 | 
36 | 
37 | def test_general_experiment_of_heart_disease_with_eval_and_cv():
38 |     hyper_model = create_hyper_model()
39 | 
40 |     X = dsutils.load_heart_disease_uci()
41 |     y = X.pop('target')
42 | 
43 |     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
44 |     X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=0.3)
45 | 
46 |     experiment = GeneralExperiment(hyper_model, X_train, y_train, X_eval=X_eval, y_eval=y_eval, X_test=X_test)
47 |     estimator = experiment.run(max_trials=5, cv=True)
48 |     trials = hyper_model.get_top_trials(5)
49 | 
50 |     assert estimator
51 |     assert 1 < len(trials) <= 5
52 | 
53 |     score = estimator.evaluate(X_test, y_test, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'])
54 |     print('evaluate score:', score)
55 |     assert score
56 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | import os
21 | import sys
22 | from datetime import datetime
23 | 
24 | sys.path.insert(0, os.path.abspath('../..'))
25 | 
26 | 
27 | def setup(app):
28 |     app.add_css_file('css/my_theme.css')
29 | 
30 | 
31 | now = datetime.now()
32 | project = 'Hypernets'
33 | copyright = f'{now.year}, DataCanvas.com'
34 | author = 'DataCanvas.com'
35 | 
36 | # The full version, including alpha/beta/rc tags
37 | # release = '0.2.5'
38 | extensions = ['recommonmark',
39 |               'sphinx.ext.autodoc',
40 |               'sphinx.ext.napoleon',
41 |               'sphinx.ext.viewcode'
42 |               # 'sphinx.ext.autodoc',
43 |               # 'sphinx.ext.mathjax',
44 |               # 'sphinx.ext.ifconfig',
45 |               # 'sphinx.ext.viewcode',
46 |               # 'sphinx.ext.githubpages',
47 |               ]
48 | exclude_patterns = []
49 | # html_theme = 'alabaster'
50 | html_theme = 'sphinx_rtd_theme'
51 | pygments_style = 'sphinx'
52 | templates_path = ['_templates']
53 | source_suffix = ['.rst', '.md']
54 | master_doc = 'index'
55 | html_static_path = ['_static']
56 | 
57 | # One entry per manual page. List of tuples
58 | # (source start file, name, description, authors, manual section).
59 | man_pages = [
60 |     (master_doc, 'Hypernets', 'Hypernets Documentation',
61 |      [author], 1)
62 | ]
63 | 
64 | texinfo_documents = [
65 |     (master_doc, 'Hypernets', 'Hypernets Documentation',
66 |      author, 'Hypernets', 'One line description of project.',
67 |      'Miscellaneous'),
68 | ]
69 | 


--------------------------------------------------------------------------------
/docs/source/overview.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | Hypernets is a general automated search framework, based on which it can implement automatic optimization tools for various machine learning frameworks and libraries, including deep learning frameworks such as tensorflow, keras, pytorch, and machine learning libraries like sklearn, lightgbm, xgboost, etc.
 4 | We introduced an abstract search space representation, taking into account the requirements of hyperparameter optimization and neural architecture search(NAS), making Hypernets a general framework that can adapt to various automated machine learning needs.
 5 | 
 6 | The figure below shows conceptual model of Hypernets.
 7 | 
 8 | ![hypernets_conceptual_model](images/hypernets_conceptual_model.png)
 9 | 
10 | ## Key Components
11 | 
12 | ### HyperSpace
13 | The space of all feasible solutions for a model is called **Search Space**. HyperSpace is an abstract representation of the search space composed of `Parameter Space`, `Connection Space`, and `Module Space`. The general form of HyperSpace is a DAG, so it can represent ML pipeline and neural network architecture very flexibly.
14 | 
15 | ### Seacher
16 | Search algorithms that looking for a optimal solution in `HyperSpace` and generating samples for `HyperModel`.
17 | 
18 | ### HyperModel
19 | High-level interface for users to perform model search and training, as long as the defined search space and training data are passed in to get the best model. HyperModel is an abstract class that needs to implement a dedicated HyperModel for different frameworks or domains. For example, `HyperKeras` is used to automatically search for neural networks built with keras, and `HyperGBM` is used to automatically optimize ML pipeline composed of sklearn, xgboost, and lightgbm....
20 | 
21 | ### Estimator
22 | A specific `HyperModle` needs to be paired with a dedicated `Estimator` to fit and evaluate the sample given by the `HyperModel`. This sample may be a set of hyperparameters, a network architecture, or a mixture of them.
23 | 
24 | ### Experiment
25 | The playground to prepare training and testing data, and search the optimized estimator with HyperModel.
26 | 
27 | ### Tabular Toolbox
28 | A general tabular data computing layer. At present, we provide the implementations of pandas, cudf and dask data types. 
29 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.hyperctl.rst:
--------------------------------------------------------------------------------
 1 | hypernets.hyperctl package
 2 | ==========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.hyperctl.api module
 8 | -----------------------------
 9 | 
10 | .. automodule:: hypernets.hyperctl.api
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.hyperctl.appliation module
16 | ------------------------------------
17 | 
18 | .. automodule:: hypernets.hyperctl.appliation
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.hyperctl.batch module
24 | -------------------------------
25 | 
26 | .. automodule:: hypernets.hyperctl.batch
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | hypernets.hyperctl.callbacks module
32 | -----------------------------------
33 | 
34 | .. automodule:: hypernets.hyperctl.callbacks
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | hypernets.hyperctl.cli module
40 | -----------------------------
41 | 
42 | .. automodule:: hypernets.hyperctl.cli
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | hypernets.hyperctl.consts module
48 | --------------------------------
49 | 
50 | .. automodule:: hypernets.hyperctl.consts
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 
55 | hypernets.hyperctl.executor module
56 | ----------------------------------
57 | 
58 | .. automodule:: hypernets.hyperctl.executor
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 
63 | hypernets.hyperctl.scheduler module
64 | -----------------------------------
65 | 
66 | .. automodule:: hypernets.hyperctl.scheduler
67 |    :members:
68 |    :undoc-members:
69 |    :show-inheritance:
70 | 
71 | hypernets.hyperctl.server module
72 | --------------------------------
73 | 
74 | .. automodule:: hypernets.hyperctl.server
75 |    :members:
76 |    :undoc-members:
77 |    :show-inheritance:
78 | 
79 | hypernets.hyperctl.utils module
80 | -------------------------------
81 | 
82 | .. automodule:: hypernets.hyperctl.utils
83 |    :members:
84 |    :undoc-members:
85 |    :show-inheritance:
86 | 
87 | Module contents
88 | ---------------
89 | 
90 | .. automodule:: hypernets.hyperctl
91 |    :members:
92 |    :undoc-members:
93 |    :show-inheritance:
94 | 


--------------------------------------------------------------------------------
/hypernets/hyperctl/api.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | from hypernets.hyperctl import consts,utils
 5 | from hypernets.utils import logging as hyn_logging
 6 | 
 7 | logger = hyn_logging.get_logger(__name__)
 8 | 
 9 | _job_dict = {}
10 | 
11 | 
12 | def get_job(job_name, api_server_portal):
13 |     url_get_job = f"{api_server_portal}/hyperctl/api/job/{job_name}"
14 |     data = utils.get_request(url_get_job)
15 |     return data
16 | 
17 | 
18 | def _get_job_name_and_damon_portal():
19 |     job_name = os.getenv(consts.KEY_ENV_JOB_NAME)
20 |     api_server_portal = f"{os.getenv(consts.KEY_ENV_SERVER_PORTAL)}"
21 | 
22 |     assert job_name
23 |     assert api_server_portal
24 | 
25 |     return job_name, api_server_portal
26 | 
27 | 
28 | def get_job_params():
29 |     global _job_dict
30 |     dev_job_params = _job_dict.get('params')
31 |     if dev_job_params is not None:
32 |         return dev_job_params
33 | 
34 |     job_name, api_server_portal = _get_job_name_and_damon_portal()
35 |     return get_job(job_name, api_server_portal)['params']
36 | 
37 | 
38 | def get_job_data_dir():
39 |     global _job_dict
40 |     dev_job_data_dir = _job_dict.get('job_data_dir')
41 |     if dev_job_data_dir is not None:
42 |         return dev_job_data_dir
43 | 
44 |     job_working_dir = os.getenv(consts.KEY_ENV_JOB_WORKING_DIR)
45 |     return job_working_dir
46 | 
47 | 
48 | def inject(params, job_data_dir=None):
49 |     global _job_dict
50 |     job_dict = _job_dict
51 |     job_dict['params'] = params
52 |     if job_data_dir is None:
53 |         tempfile.gettempdir()
54 |         job_dict['job_data_dir'] = tempfile.mkdtemp(prefix='hyperctl-')
55 | 
56 | 
57 | def reset_dev_params():
58 |     global _job_dict
59 |     _job_dict = {}
60 | 
61 | 
62 | def list_jobs(api_server_portal):
63 |     # if api_server_portal is None:
64 |     #     api_server_portal = os.getenv(consts.KEY_ENV_api_server_portal)
65 |     assert api_server_portal
66 |     url_get_jobs = f"{api_server_portal}/hyperctl/api/job"
67 |     data = utils.get_request(url_get_jobs)
68 |     return data['jobs']
69 | 
70 | 
71 | def kill_job(api_server_portal, job_name):
72 |     url_kill_job = f"{api_server_portal}/hyperctl/api/job/{job_name}/kill"
73 |     data = utils.post_request(url_kill_job, request_data=None)
74 |     return data
75 | 


--------------------------------------------------------------------------------
/hypernets/tests/searchers/test_genetic.py:
--------------------------------------------------------------------------------
 1 | from hypernets.core import get_random_state, set_random_state, HyperSpace, Identity, Bool, Optional, Real, HyperInput, Choice, Int
 2 | from hypernets.searchers.genetic import SinglePointCrossOver, ShuffleCrossOver, UniformCrossover, Individual
 3 | 
 4 | 
 5 | class TestCrossOver:
 6 | 
 7 |     @classmethod
 8 |     def setup_class(cls):
 9 |         set_random_state(1234)
10 |         cls.random_state = get_random_state()
11 | 
12 |     def test_shuffle_crossover(self):
13 |         co = ShuffleCrossOver(random_state=self.random_state)
14 |         self.run_crossover(co)
15 | 
16 |     def test_single_point_crossover(self):
17 |         co = SinglePointCrossOver(random_state=self.random_state)
18 |         self.run_crossover(co)
19 | 
20 |     def test_uniform_crossover(self):
21 |         co = UniformCrossover(random_state=self.random_state)
22 |         try:
23 |             self.run_crossover(co)
24 |             # P(off=[A or B]) = 0.5 ^ 3 * 2
25 |         except Exception as e:
26 |             print(e)
27 | 
28 |     def run_crossover(self, crossover):
29 |         # 1. prepare data
30 |         random_state = self.random_state
31 | 
32 |         # 2. construct a search space
33 |         def get_space():
34 |             space = HyperSpace()
35 |             with space.as_default():
36 |                 input1 = HyperInput(name="input1")
37 |                 id1 = Identity(p1=Choice([1, 2, 3, 4]), p2=Int(1, 100), name="id1")
38 |                 id2 = Identity(p3=Real(0, 1), name="id2")
39 |                 id1(input1)
40 |                 id2(id1)
41 |             return space
42 |         out = get_space()
43 |         print(out)
44 | 
45 |         # 3. construct individuals
46 |         dna1 = get_space()
47 |         dna1.assign_by_vectors([0, 50, 0.2])
48 |         ind1 = Individual(dna=dna1, scores=[1, 1], random_state=random_state)
49 | 
50 |         dna2 = get_space()
51 |         dna2.assign_by_vectors([1, 30, 0.5])
52 |         ind2 = Individual(dna=dna2, scores=[1, 1], random_state=random_state)
53 | 
54 |         output = crossover(ind1=ind1, ind2=ind2, out_space=get_space())
55 |         assert output.all_assigned
56 | 
57 |         # the offspring is not same as any parents
58 |         assert output.vectors != ind1.dna.vectors
59 |         assert output.vectors != ind2.dna.vectors
60 | 
61 | 


--------------------------------------------------------------------------------
/hypernets/searchers/grid_searcher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | from ..core.searcher import Searcher, OptimizeDirection
 7 | from ..core import EarlyStoppingError
 8 | from sklearn.model_selection import ParameterGrid
 9 | 
10 | 
11 | class GridSearcher(Searcher):
12 |     def __init__(self, space_fn, optimize_direction=OptimizeDirection.Minimize, space_sample_validation_fn=None,
13 |                  n_expansion=5):
14 |         Searcher.__init__(self, space_fn, optimize_direction, space_sample_validation_fn=space_sample_validation_fn)
15 |         space = space_fn()
16 |         assignable_params = space.get_unassigned_params()
17 |         self.grid = {}
18 |         self.n_expansion = n_expansion
19 |         for p in assignable_params:
20 |             self.grid[p.id] = [s.value for s in p.expansion(n_expansion)]
21 |         self.all_combinations = list(ParameterGrid(self.grid))
22 |         self.position_ = -1
23 | 
24 |     @property
25 |     def parallelizable(self):
26 |         return True
27 | 
28 |     def sample(self, space_options=None):
29 |         sample = self._sample_and_check(self._get_sample)
30 |         return sample
31 | 
32 |     def _get_sample(self):
33 |         self.position_ += 1
34 | 
35 |         if self.position_ >= len(self.all_combinations):
36 |             raise EarlyStoppingError('no more samples.')
37 |         sample = self.space_fn()
38 |         for k, v in self.all_combinations[self.position_].items():
39 |             sample.__dict__[k].assign(v)
40 |         assert sample.all_assigned == True
41 |         return sample
42 | 
43 |     def get_best(self):
44 |         raise NotImplementedError
45 | 
46 |     def update_result(self, space, result):
47 |         pass
48 | 
49 |     def reset(self):
50 |         self.position_ = -1
51 | 
52 |     def export(self):
53 |         raise NotImplementedError
54 | 
55 | 
56 | def test_parameter_grid(self):
57 |     space = self.get_space()
58 |     ps = space.get_unassigned_params()
59 |     grid = {}
60 |     for p in ps:
61 |         grid[p.name] = [s.value for s in p.expansion(2)]
62 |     all_vectors = list(ParameterGrid(grid))
63 |     for ps in all_vectors:
64 |         space = self.get_space()
65 |         for k, v in ps.items():
66 |             space.__dict__[k].assign(v)
67 |         assert space.all_assigned == True
68 | 


--------------------------------------------------------------------------------
/hypernets/tabular/ensemble/stacking.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | from sklearn.linear_model import LogisticRegression, LinearRegression
 7 | 
 8 | from .base_ensemble import BaseEnsemble
 9 | 
10 | 
11 | class StackingEnsemble(BaseEnsemble):
12 |     def __init__(self, task, estimators, need_fit=False, n_folds=5, method='soft', meta_model=None, fit_kwargs=None):
13 |         super(StackingEnsemble, self).__init__(task, estimators, need_fit, n_folds, method)
14 |         if meta_model is None:
15 |             if task == 'regression':
16 |                 self.meta_model = LinearRegression()
17 |             else:
18 |                 self.meta_model = LogisticRegression()
19 |         else:
20 |             self.meta_model = meta_model
21 |         self.fit_kwargs = fit_kwargs if fit_kwargs is not None else {}
22 | 
23 |     def fit_predictions(self, predictions, y_true):
24 |         X = self.__predictions2X(predictions)
25 |         self.meta_model.fit(X, y_true, **self.fit_kwargs)
26 | 
27 |     def __predictions2X(self, predictions):
28 |         X = predictions
29 |         if len(X.shape) == 3:
30 |             if self.task == 'binary':
31 |                 X = X[:, :, -1]
32 |             elif self.task == 'multiclass':
33 |                 np = self.np
34 |                 X = np.argmax(X, axis=2)
35 |             else:
36 |                 raise ValueError(
37 |                     f"The shape of `predictions` and the `task` don't match. shape:{predictions.shape}, task:{self.task}")
38 |         return X
39 | 
40 |     def predictions2predict(self, predictions):
41 |         assert self.meta_model is not None
42 |         X = self.__predictions2X(predictions)
43 |         pred = self.meta_model.predict(X)
44 |         if self.task == 'binary':
45 |             np = self.np
46 |             pred = np.clip(pred, 0, 1)
47 |         return pred
48 | 
49 |     def predictions2predict_proba(self, predictions):
50 |         assert self.meta_model is not None
51 |         X = self.__predictions2X(predictions)
52 |         if hasattr(self.meta_model, 'predict_proba'):
53 |             pred = self.meta_model.predict_proba(X)
54 |         else:
55 |             pred = self.meta_model.predict(X)
56 | 
57 |         if self.task == 'binary':
58 |             np = self.np
59 |             pred = np.clip(pred, 0, 1)
60 |         return pred
61 | 


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/_dataframe_mapper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import cudf
 6 | import cupy
 7 | import numpy as np
 8 | from sklearn.pipeline import _name_estimators
 9 | 
10 | from hypernets.tabular.dataframe_mapper import DataFrameMapper, TransformerPipeline
11 | from ._transformer import Localizable
12 | 
13 | 
14 | class CumlTransformerPipeline(TransformerPipeline):
15 |     def as_local(self):
16 |         steps = [(name, tf.as_local()) for name, tf in self.steps]
17 |         target = TransformerPipeline(steps)
18 |         return target
19 | 
20 | 
21 | def make_transformer_pipeline(*steps):
22 |     """Construct a TransformerPipeline from the given estimators.
23 |     """
24 |     return CumlTransformerPipeline(_name_estimators(steps))
25 | 
26 | 
27 | class CumlDataFrameMapper(DataFrameMapper, Localizable):
28 |     @staticmethod
29 |     def _build_transformer(transformers):
30 |         if isinstance(transformers, list):
31 |             transformers = make_transformer_pipeline(*transformers)
32 |         return transformers
33 | 
34 |     def _to_df(self, X, extracted, columns):
35 |         dfs = [cudf.DataFrame(arr, index=None) for arr in extracted]
36 |         for df, pos in zip(dfs, np.cumsum([d.shape[1] for d in dfs])):
37 |             df.reset_index(drop=True, inplace=True)
38 |             df.columns = [f'c{i}' for i in range(pos - df.shape[1], pos)]
39 |         df_out = cudf.concat(dfs, axis=1, ignore_index=True) if len(dfs) > 1 else dfs[0]
40 |         if len(X) == len(df_out):
41 |             df_out.index = X.index
42 |         df_out.columns = columns
43 | 
44 |         return df_out
45 | 
46 |     @staticmethod
47 |     def _hstack_array(extracted):
48 |         arrs = [arr.values if isinstance(arr, cudf.DataFrame) else arr for arr in extracted]
49 |         return cupy.hstack(arrs)
50 | 
51 |     @staticmethod
52 |     def _fix_feature(fea):
53 |         if isinstance(fea, (np.ndarray, cupy.ndarray)) and len(fea.shape) == 1:
54 |             fea = fea.reshape(-1, 1)
55 |         return fea
56 | 
57 |     def as_local(self):
58 |         target = DataFrameMapper([], default=None, df_out=self.df_out, input_df=self.input_df,
59 |                                  df_out_dtype_transforms=self.df_out_dtype_transforms)
60 |         target.fitted_features_ = [(cols, t.as_local(), opts) for cols, t, opts in self.fitted_features_]
61 |         return target
62 | 


--------------------------------------------------------------------------------
/hypernets/tabular/cuml_ex/_data_cleaner.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | 
 6 | import cudf
 7 | import cupy
 8 | 
 9 | from ._transformer import Localizable, copy_attrs_as_local
10 | from ..data_cleaner import DataCleaner, _CleanerHelper
11 | 
12 | 
13 | class CumlDataCleaner(DataCleaner, Localizable):
14 |     @staticmethod
15 |     def get_helper(X, y):
16 |         if isinstance(X, (cudf.DataFrame, cudf.Series)):
17 |             return _CumlCleanerHelper()
18 |         else:
19 |             return DataCleaner.get_helper(X, y)
20 | 
21 |     def as_local(self):
22 |         target = DataCleaner(nan_chars=self.nan_chars, correct_object_dtype=self.correct_object_dtype,
23 |                              drop_constant_columns=self.drop_constant_columns,
24 |                              drop_duplicated_columns=self.drop_duplicated_columns,
25 |                              drop_label_nan_rows=self.drop_label_nan_rows,
26 |                              drop_idness_columns=self.drop_idness_columns,
27 |                              replace_inf_values=self.replace_inf_values,
28 |                              drop_columns=self.drop_columns,
29 |                              reserve_columns=self.reserve_columns,
30 |                              reduce_mem_usage=self.reduce_mem_usage,
31 |                              int_convert_to=self.int_convert_to)
32 |         copy_attrs_as_local(self, target, 'df_meta_', 'columns_', 'dropped_constant_columns_',
33 |                             'dropped_idness_columns_', 'dropped_duplicated_columns_')
34 | 
35 |         return target
36 | 
37 | 
38 | class _CumlCleanerHelper(_CleanerHelper):
39 |     @staticmethod
40 |     def _get_duplicated_columns(df):
41 |         columns = df.columns.to_list()
42 |         duplicates = set()
43 | 
44 |         for i, c in enumerate(columns[:-1]):
45 |             if c in duplicates:
46 |                 continue
47 |             for nc in columns[i + 1:]:
48 |                 if df[c].equals(df[nc]):
49 |                     duplicates.add(nc)
50 | 
51 |         return {c: c in duplicates for c in columns}
52 | 
53 |     @staticmethod
54 |     def replace_nan_chars(X: cudf.DataFrame, nan_chars):
55 |         cat_cols = X.select_dtypes(['object', 'string', ])
56 |         if cat_cols.shape[1] > 0:
57 |             cat_cols = cat_cols.replace(nan_chars, cupy.nan)
58 |             X[cat_cols.columns] = cat_cols
59 |         return X
60 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_cuml/cache_test.py:
--------------------------------------------------------------------------------
 1 | from hypernets.tabular.cache import cache, clear
 2 | from hypernets.tabular.datasets import dsutils
 3 | from . import if_cuml_ready, is_cuml_installed
 4 | from ..cache_test import CacheCounter
 5 | 
 6 | if is_cuml_installed:
 7 |     import cudf
 8 |     from hypernets.tabular.cuml_ex import CumlToolBox
 9 |     from hypernets.tabular.cuml_ex._transformer import MultiLabelEncoder
10 | 
11 | 
12 |     class CachedCumlMultiLabelEncoder(MultiLabelEncoder):
13 |         cache_counter = CacheCounter()
14 | 
15 |         @cache(attr_keys='columns',
16 |                attrs_to_restore='columns,dtype,encoders',
17 |                callbacks=cache_counter)
18 |         def fit_transform(self, X, *args):
19 |             return super().fit_transform(X, *args)
20 | 
21 |         @cache(attr_keys='columns',
22 |                attrs_to_restore='columns,dtype,encoders',
23 |                callbacks=cache_counter)
24 |         def fit_transform_as_array(self, X, *args):
25 |             X = super().fit_transform(X, *args)
26 |             return X.values
27 | 
28 | 
29 | @if_cuml_ready
30 | def test_cache_cuml():
31 |     clear()
32 | 
33 |     cache_counter = CachedCumlMultiLabelEncoder.cache_counter
34 |     df = cudf.from_pandas(dsutils.load_bank())
35 | 
36 |     t = MultiLabelEncoder()
37 |     X = t.fit_transform(df.copy())
38 | 
39 |     cache_counter.reset()
40 |     t1 = CachedCumlMultiLabelEncoder()
41 |     X1 = t1.fit_transform(df.copy())
42 |     t2 = CachedCumlMultiLabelEncoder()
43 |     X2 = t2.fit_transform(df.copy())
44 | 
45 |     hasher = CumlToolBox.data_hasher()
46 |     assert hasher(X) == hasher(X1) == hasher(X2)
47 |     assert cache_counter.enter_counter.value == 2
48 |     assert cache_counter.apply_counter.value <= 2
49 |     assert cache_counter.store_counter.value <= 2
50 |     assert cache_counter.apply_counter.value + cache_counter.store_counter.value == 2
51 | 
52 |     cache_counter.reset()
53 |     t3 = CachedCumlMultiLabelEncoder()
54 |     X3 = t3.fit_transform_as_array(df.copy())
55 |     t4 = CachedCumlMultiLabelEncoder()
56 |     X4 = t4.fit_transform_as_array(df.copy())
57 | 
58 |     assert hasher(X3) == hasher(X4)
59 |     assert cache_counter.enter_counter.value == 2
60 |     assert cache_counter.apply_counter.value <= 2
61 |     assert cache_counter.store_counter.value <= 2
62 |     assert cache_counter.apply_counter.value + cache_counter.store_counter.value == 2
63 | 


--------------------------------------------------------------------------------
/hypernets/experiment/cfg.py:
--------------------------------------------------------------------------------
 1 | from hypernets.conf import configure, Configurable, Bool, Int, String, List, Dict
 2 | 
 3 | 
 4 | @configure()
 5 | class ExperimentCfg(Configurable):
 6 |     experiment_callbacks_console = \
 7 |         List(default_value=[],
 8 |              allow_none=True, config=True,
 9 |              help='ExperimentCallback instance or name list.'
10 |              )
11 |     experiment_callbacks_notebook = \
12 |         List(default_value=['hypernets.experiment.SimpleNotebookCallback', ],
13 |              allow_none=True, config=True,
14 |              help='ExperimentCallback instance or name list.'
15 |              )
16 | 
17 |     experiment_default_target_set = \
18 |         List(default_value=['y', 'target', 'class'],
19 |              allow_none=True, config=True,
20 |              help='Default target names.'
21 |              )
22 |     experiment_auto_down_sample_enabled = \
23 |         Bool(False,
24 |              allow_none=True, config=True,
25 |              help=''
26 |              )
27 |     experiment_auto_down_sample_rows_threshold = \
28 |         Int(10000,
29 |             allow_none=True, config=True,
30 |             help=''
31 |             )
32 |     experiment_discriminator = \
33 |         String('once_percentile',
34 |                allow_none=True, config=True,
35 |                help='discriminator identity, "percentile" or "progressive"',
36 |                )
37 |     experiment_discriminator_options = \
38 |         Dict(default_value={'percentile': 50, 'min_trials': 5, 'min_steps': 5, 'stride': 1},
39 |              key_trait=String(),
40 |              allow_none=True, config=True,
41 |              help='discriminator settings',
42 |              )
43 |     experiment_data_adaption_min_cols_limit = \
44 |         Int(10,
45 |             allow_none=True, config=True,
46 |             help=''
47 |             )
48 | 
49 |     hyper_model_callbacks_console = \
50 |         List(default_value=['hypernets.core.callbacks.SummaryCallback', ],
51 |              allow_none=True, config=True,
52 |              help='Callback instance or name list.'
53 |              )
54 |     hyper_model_callbacks_notebook = \
55 |         List(default_value=['hypernets.core.callbacks.NotebookCallback',
56 |                             'hypernets.core.callbacks.ProgressiveCallback', ],
57 |              allow_none=True, config=True,
58 |              help='Callback instance or name list.'
59 |              )
60 | 


--------------------------------------------------------------------------------
/hypernets/tabular/dask_ex/_data_cleaner.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from functools import partial
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | from dask import dataframe as dd, array as da
10 | 
11 | from hypernets.utils import logging
12 | from ..data_cleaner import DataCleaner, _CleanerHelper
13 | 
14 | logger = logging.get_logger(__name__)
15 | 
16 | 
17 | class DaskDataCleaner(DataCleaner):
18 |     @staticmethod
19 |     def get_helper(X, y):
20 |         if isinstance(X, (dd.DataFrame, dd.Series, da.Array)):
21 |             return _DaskCleanerHelper()
22 |         else:
23 |             return DataCleaner.get_helper(X, y)
24 | 
25 | 
26 | class _DaskCleanerHelper(_CleanerHelper):
27 |     @staticmethod
28 |     def reduce_mem_usage(df, excludes=None):
29 |         raise NotImplementedError('"reduce_mem_usage" is not supported for Dask DataFrame.')
30 | 
31 |     @staticmethod
32 |     def _get_duplicated_columns(df):
33 |         duplicates = df.reduction(chunk=lambda c: pd.DataFrame(c.T.duplicated()).T,
34 |                                   aggregate=lambda a: np.all(a, axis=0)).compute()
35 |         return duplicates
36 | 
37 |     @staticmethod
38 |     def _detect_dtype(dtype, df):
39 |         result = {}
40 |         df = df.copy()
41 |         for col in df.columns.to_list():
42 |             try:
43 |                 df[col] = df[col].astype(dtype)
44 |                 result[col] = [True]  # as-able
45 |             except:
46 |                 result[col] = [False]
47 |         return pd.DataFrame(result)
48 | 
49 |     def _correct_object_dtype_as(self, X, df_meta):
50 |         for dtype, columns in df_meta.items():
51 |             columns = [c for c in columns if str(X[c].dtype) != dtype]
52 |             if len(columns) == 0:
53 |                 continue
54 | 
55 |             correctable = X[columns].reduction(chunk=partial(self._detect_dtype, dtype),
56 |                                                aggregate=lambda a: np.all(a, axis=0),
57 |                                                meta={c: 'bool' for c in columns}).compute()
58 |             correctable = [i for i, v in correctable.items() if v]
59 |             # for col in correctable:
60 |             #     X[col] = X[col].astype(dtype)
61 |             if correctable:
62 |                 X[correctable] = X[correctable].astype(dtype)
63 |             logger.info(f'Correct columns [{",".join(correctable)}] to {dtype}.')
64 | 
65 |         return X
66 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/psudo_labeling_test.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | from math import ceil
 3 | 
 4 | from hypernets.tabular import get_tool_box
 5 | from hypernets.tabular import sklearn_ex as skex
 6 | from hypernets.tabular.datasets import dsutils
 7 | 
 8 | 
 9 | class TestPseudoLabeling:
10 |     @classmethod
11 |     def setup_class(cls):
12 |         cls.df = cls.load_data()
13 | 
14 |     @staticmethod
15 |     def load_data():
16 |         df = dsutils.load_bank()
17 |         return skex.MultiLabelEncoder().fit_transform(df)
18 | 
19 |     def run_sample(self, X, y):
20 |         tb = get_tool_box(X, y)
21 |         model = tb.general_estimator(X, y)
22 | 
23 |         X_train, X_test, y_train, y_test = \
24 |             tb.train_test_split(X, y, test_size=0.5, random_state=7)
25 |         model.fit(X_train, y_train)
26 |         proba = model.predict_proba(X_test)
27 | 
28 |         preds = model.predict(X_test)
29 |         preds, = tb.to_local(preds)
30 |         c0 = Counter(preds)
31 |         print('original samples:', c0)
32 | 
33 |         options = dict(threshold=0.8, number=10, quantile=0.8)
34 |         for strategy in ['threshold', 'number', 'quantile', ]:
35 |             pl = tb.pseudo_labeling(strategy=strategy, **options)
36 |             X_pseudo, y_pseudo = pl.select(X_test.copy(), model.classes_, proba.copy())
37 | 
38 |             y_pseudo, = tb.to_local(y_pseudo)
39 | 
40 |             # validate result data
41 |             if len(y_pseudo) > 0:
42 |                 expected_y_pseudo = model.predict(X_pseudo)
43 |                 expected_y_pseudo, = tb.to_local(expected_y_pseudo)
44 |                 assert (expected_y_pseudo == y_pseudo).all()
45 | 
46 |             # validate sample numbers
47 |             c = Counter(y_pseudo)
48 |             if strategy == 'number':
49 |                 assert all([v <= options['number'] for k, v in c.items()])
50 |             elif strategy == 'quantile':
51 |                 if self.is_quantile_exact():
52 |                     expected_c = {k: ceil(c0[k] * (1 - options['quantile'])) for k, v in c0.items()}
53 |                     assert c == expected_c
54 | 
55 |     @staticmethod
56 |     def is_quantile_exact():
57 |         return True
58 | 
59 |     def test_binary(self):
60 |         X = self.df.copy()
61 |         y = X.pop('y')
62 |         self.run_sample(X, y)
63 | 
64 |     def test_multiclass(self):
65 |         X = self.df.copy()
66 |         y = X.pop('education')
67 |         self.run_sample(X, y)
68 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/tb_dask/cache_test.py:
--------------------------------------------------------------------------------
 1 | from hypernets.tabular.cache import cache, clear
 2 | from hypernets.tabular.datasets import dsutils
 3 | from . import if_dask_ready, is_dask_installed
 4 | from ..cache_test import CacheCounter
 5 | 
 6 | if is_dask_installed:
 7 |     import dask.dataframe as dd
 8 |     from hypernets.tabular import dask_ex as dex
 9 | 
10 | 
11 |     class CachedDaskMultiLabelEncoder(dex.SafeOrdinalEncoder):
12 |         cache_counter = CacheCounter()
13 | 
14 |         @cache(attr_keys='columns',
15 |                attrs_to_restore='columns,dtype,categorical_columns_,non_categorical_columns_,categories_',
16 |                callbacks=cache_counter)
17 |         def fit_transform(self, X, *args):
18 |             return super().fit_transform(X, *args)
19 | 
20 |         @cache(attr_keys='columns',
21 |                attrs_to_restore='columns,dtype,categorical_columns_,non_categorical_columns_,categories_',
22 |                callbacks=cache_counter)
23 |         def fit_transform_as_array(self, X, *args):
24 |             X = super().fit_transform(X, *args)
25 |             return X.to_dask_array(lengths=True)
26 | 
27 | 
28 | @if_dask_ready
29 | def test_cache_dask():
30 |     clear()
31 | 
32 |     cache_counter = CachedDaskMultiLabelEncoder.cache_counter
33 |     df = dd.from_pandas(dsutils.load_bank(), npartitions=2)
34 | 
35 |     t = dex.SafeOrdinalEncoder()
36 |     X = t.fit_transform(df.copy())
37 | 
38 |     cache_counter.reset()
39 |     t1 = CachedDaskMultiLabelEncoder()
40 |     X1 = t1.fit_transform(df.copy())
41 |     t2 = CachedDaskMultiLabelEncoder()
42 |     X2 = t2.fit_transform(df.copy())
43 | 
44 |     hasher = dex.DaskToolBox.data_hasher()
45 |     assert hasher(X) == hasher(X1) == hasher(X2)
46 |     assert cache_counter.enter_counter.value == 2
47 |     assert cache_counter.apply_counter.value <= 2
48 |     assert cache_counter.store_counter.value <= 2
49 |     assert cache_counter.apply_counter.value + cache_counter.store_counter.value == 2
50 | 
51 |     cache_counter.reset()
52 |     t3 = CachedDaskMultiLabelEncoder()
53 |     X3 = t3.fit_transform_as_array(df.copy())
54 |     t4 = CachedDaskMultiLabelEncoder()
55 |     X4 = t4.fit_transform_as_array(df.copy())
56 | 
57 |     assert hasher(X3) == hasher(X4)
58 |     assert cache_counter.enter_counter.value == 2
59 |     assert cache_counter.apply_counter.value <= 2
60 |     assert cache_counter.store_counter.value <= 2
61 |     assert cache_counter.apply_counter.value + cache_counter.store_counter.value == 2
62 | 


--------------------------------------------------------------------------------
/docs/source/hypernets.searchers.rst:
--------------------------------------------------------------------------------
 1 | hypernets.searchers package
 2 | ===========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | hypernets.searchers.evolution\_searcher module
 8 | ----------------------------------------------
 9 | 
10 | .. automodule:: hypernets.searchers.evolution_searcher
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | hypernets.searchers.genetic module
16 | ----------------------------------
17 | 
18 | .. automodule:: hypernets.searchers.genetic
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | hypernets.searchers.grid\_searcher module
24 | -----------------------------------------
25 | 
26 | .. automodule:: hypernets.searchers.grid_searcher
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | hypernets.searchers.mcts\_core module
32 | -------------------------------------
33 | 
34 | .. automodule:: hypernets.searchers.mcts_core
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | hypernets.searchers.mcts\_searcher module
40 | -----------------------------------------
41 | 
42 | .. automodule:: hypernets.searchers.mcts_searcher
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | hypernets.searchers.moead\_searcher module
48 | ------------------------------------------
49 | 
50 | .. automodule:: hypernets.searchers.moead_searcher
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 
55 | hypernets.searchers.moo module
56 | ------------------------------
57 | 
58 | .. automodule:: hypernets.searchers.moo
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 
63 | hypernets.searchers.nsga\_searcher module
64 | -----------------------------------------
65 | 
66 | .. automodule:: hypernets.searchers.nsga_searcher
67 |    :members:
68 |    :undoc-members:
69 |    :show-inheritance:
70 | 
71 | hypernets.searchers.playback\_searcher module
72 | ---------------------------------------------
73 | 
74 | .. automodule:: hypernets.searchers.playback_searcher
75 |    :members:
76 |    :undoc-members:
77 |    :show-inheritance:
78 | 
79 | hypernets.searchers.random\_searcher module
80 | -------------------------------------------
81 | 
82 | .. automodule:: hypernets.searchers.random_searcher
83 |    :members:
84 |    :undoc-members:
85 |    :show-inheritance:
86 | 
87 | Module contents
88 | ---------------
89 | 
90 | .. automodule:: hypernets.searchers
91 |    :members:
92 |    :undoc-members:
93 |    :show-inheritance:
94 | 


--------------------------------------------------------------------------------
/hypernets/tabular/evaluator/hyperdt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | import numpy as np
 7 | from deeptables.models.hyper_dt import HyperDT
 8 | from deeptables.models.hyper_dt import mini_dt_space
 9 | from hypernets.core import EarlyStoppingCallback
10 | from hypernets.core.searcher import OptimizeDirection
11 | from hypernets.searchers import EvolutionSearcher
12 | from sklearn.model_selection import train_test_split
13 | 
14 | from . import BaseEstimator
15 | 
16 | 
17 | class HyperDTEstimator(BaseEstimator):
18 |     def __init__(self, task, reward_metric, max_trials=30, epochs=100, earlystop_rounds=30, time_limit=3600,
19 |                  expected_reward=None, **kwargs):
20 |         super(HyperDTEstimator, self).__init__(task)
21 |         self.name = 'HyperDT'
22 |         self.kwargs = kwargs
23 |         self.estimator = None
24 |         self.max_trials = max_trials
25 |         self.reward_metric = reward_metric
26 |         self.epochs = epochs
27 |         self.earlystop_rounds = earlystop_rounds
28 |         self.time_limit = time_limit
29 |         self.expected_reward = expected_reward
30 | 
31 |     def train(self, X, y, X_test):
32 |         searcher = EvolutionSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, population_size=30,
33 |                                      sample_size=10, regularized=True, candidates_size=10)
34 |         es = EarlyStoppingCallback(self.earlystop_rounds, 'max', time_limit=self.time_limit,
35 |                                    expected_reward=self.expected_reward)
36 | 
37 |         hdt = HyperDT(searcher,
38 |                       callbacks=[es],
39 |                       reward_metric=self.reward_metric,
40 |                       cache_preprocessed_data=True,
41 |                       )
42 |         stratify = y
43 |         if self.task == 'regression':
44 |             stratify = None
45 |         X_train, X_eval, y_train, y_eval = train_test_split(X, y, test_size=0.3,
46 |                                                             random_state=9527, stratify=stratify)
47 | 
48 |         hdt.search(X_train, y_train, X_eval, y_eval, max_trials=self.max_trials, epochs=self.epochs)
49 |         best_trial = hdt.get_best_trial()
50 |         self.estimator = hdt.load_estimator(best_trial.model_file)
51 | 
52 |     def predict_proba(self, X):
53 |         proba = self.estimator.predict_proba(X)
54 |         return proba
55 | 
56 |     def predict(self, X):
57 |         return self.estimator.predict(X)
58 | 


--------------------------------------------------------------------------------
/hypernets/core/mutables.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | from collections import OrderedDict
 6 | 
 7 | 
 8 | class MutableScope:
 9 |     def __init__(self):
10 |         self.reset()
11 |         self.stack = []
12 | 
13 |     @property
14 |     def current_path(self):
15 |         return '.'.join(self.stack)
16 | 
17 |     def entry(self, name):
18 |         self.stack.append(name)
19 | 
20 |     def exit(self):
21 |         self.stack.pop()
22 | 
23 |     def reset(self):
24 |         self.id_dict = OrderedDict()
25 |         self.name_dict = OrderedDict()
26 | 
27 |     def register(self, mutable):
28 |         assert isinstance(mutable, Mutable)
29 | 
30 |         if mutable.name is None:
31 |             mutable.id = self.assign_id(mutable)
32 |             mutable.name = mutable.id
33 |         else:
34 |             if self.name_dict.get(mutable.name) is not None:
35 |                 raise ValueError(f'name `{mutable.name}` is duplicate.')
36 |             mutable.id = f'ID_{mutable.name}'
37 | 
38 |         self.name_dict[mutable.name] = mutable
39 |         self.id_dict[mutable.id] = mutable
40 | 
41 |     def assign_id(self, mutable):
42 |         prefix = mutable.__class__.__name__
43 |         if mutable.type is not None:
44 |             prefix = mutable.type + '_' + prefix
45 |         i = 1
46 |         while True:
47 |             id = f'{prefix}_{i}'
48 |             if id not in self.id_dict:
49 |                 break
50 |             i += 1
51 |         return id
52 | 
53 |     def get_mutable(self, id):
54 |         return self.id_dict[id]
55 | 
56 |     def get_mutable_by_name(self, name):
57 |         return self.name_dict[name]
58 | 
59 | 
60 | class Mutable(object):
61 |     def __init__(self, scope, name=None):
62 |         self.attach_to_scope(scope, name)
63 | 
64 |     def attach_to_scope(self, scope, name=None):
65 |         assert scope is not None, 'scope cannot be None'
66 |         self.scope = scope
67 |         self.name = name
68 |         self.alias = None
69 |         self.scope.register(self)
70 |         self.path = scope.current_path
71 | 
72 |     def __repr__(self):
73 |         # if self.alias is not None:
74 |         #     return 'ALIAS:' + self.alias
75 |         # else:
76 |         #     return 'ID:' + self._id
77 |         return self._id
78 | 
79 |     @property
80 |     def type(self):
81 |         return None
82 | 
83 |     @property
84 |     def id(self):
85 |         return self._id
86 | 
87 |     @id.setter
88 |     def id(self, id):
89 |         self._id = id
90 | 
91 |     def update(self):
92 |         pass
93 | 


--------------------------------------------------------------------------------
/hypernets/tests/utils/tuning_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | from hypernets.utils.param_tuning import search_params
 8 | from hypernets.core.search_space import Choice, Real, Int
 9 | import numpy as np
10 | 
11 | 
12 | def func1(p1=Choice(['a', 'b'], random_state=np.random.RandomState(9527)),
13 |           p2=Int(1, 10, 2, random_state=np.random.RandomState(9527)),
14 |           p3=Real(1.0, 5.0, random_state=np.random.RandomState(9527)), p4=9):
15 |     print(f'p1:{p1},p2:{p2},p3{p3},p4:{p4}')
16 |     return p2 * p3
17 | 
18 | 
19 | def func_early_stopping(p1=Choice(['a', 'b'], random_state=np.random.RandomState(9527)),
20 |                         p2=Int(1, 10, 2, random_state=np.random.RandomState(9527)),
21 |                         p3=Real(1.0, 5.0, random_state=np.random.RandomState(9527)),
22 |                         p4=9):
23 |     print(f'p1:{p1},p2:{p2},p3{p3},p4:{p4}')
24 |     return 0.6
25 | 
26 | 
27 | class Test_ParamTuning():
28 |     def test_search_params(self):
29 |         print('start')
30 |         history = search_params(func1, 'grid', max_trials=10, optimize_direction='max')
31 |         best = history.get_best()
32 |         assert best.reward[0] == 14.370000000000001
33 |         assert best.trial_no == 10
34 | 
35 |     def test_trigger_by_trials(self):
36 |         from hypernets.core import EarlyStoppingCallback
37 |         es = EarlyStoppingCallback(3, 'max',
38 |                                    time_limit=3600,
39 |                                    expected_reward=1)
40 | 
41 |         history = search_params(func_early_stopping, 'grid', max_trials=10, optimize_direction='max', callbacks=[es])
42 |         best = history.get_best()
43 |         assert best.reward[0] == 0.6
44 |         assert best.trial_no == 1
45 |         assert len(history.trials) == 4
46 |         assert es.triggered_reason == EarlyStoppingCallback.REASON_TRIAL_LIMIT
47 | 
48 |     def test_trigger_by_reward(self):
49 |         from hypernets.core import EarlyStoppingCallback
50 |         es = EarlyStoppingCallback(3, 'max',
51 |                                    time_limit=3600,
52 |                                    expected_reward=0.5)
53 | 
54 |         history = search_params(func_early_stopping, 'grid', max_trials=10, optimize_direction='max', callbacks=[es])
55 |         best = history.get_best()
56 |         assert best.reward[0] == 0.6
57 |         assert best.trial_no == 1
58 |         assert len(history.trials) == 1
59 |         assert es.triggered_reason == EarlyStoppingCallback.REASON_EXPECTED_REWARD
60 | 
61 | 


--------------------------------------------------------------------------------
/hypernets/core/searcher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | """
 5 | import enum
 6 | 
 7 | from hypernets.utils import to_repr
 8 | from .stateful import Stateful
 9 | 
10 | 
11 | class OptimizeDirection(enum.Enum):
12 |     Minimize = 'min'
13 |     Maximize = 'max'
14 | 
15 | 
16 | class Searcher(Stateful):
17 |     def __init__(self, space_fn, optimize_direction=OptimizeDirection.Minimize, use_meta_learner=True,
18 |                  space_sample_validation_fn=None):
19 |         self.space_fn = space_fn
20 |         self.use_meta_learner = use_meta_learner
21 |         self.optimize_direction = optimize_direction
22 |         self.meta_learner = None
23 |         self.space_sample_validation_fn = space_sample_validation_fn
24 | 
25 |     def set_meta_learner(self, meta_learner):
26 |         self.meta_learner = meta_learner
27 | 
28 |     @property
29 |     def parallelizable(self):
30 |         return False
31 | 
32 |     def sample(self, space_options=None):
33 |         raise NotImplementedError
34 | 
35 |     def _random_sample(self, **space_kwargs):
36 |         if space_kwargs is None:
37 |             space_kwargs = {}
38 |         space_sample = self.space_fn(**space_kwargs)
39 |         space_sample.random_sample()
40 |         return space_sample
41 | 
42 |     def _sample_and_check(self, sample_fn, space_options=None):
43 |         if space_options is None:
44 |             space_options = {}
45 | 
46 |         counter = 0
47 |         while True:
48 |             space_sample = sample_fn(**space_options)
49 |             counter += 1
50 |             if counter >= 1000:
51 |                 raise ValueError('Unable to take valid sample and exceed the retry limit 1000.')
52 |             if self.space_sample_validation_fn is not None:
53 |                 if self.space_sample_validation_fn(space_sample):
54 |                     break
55 |             else:
56 |                 break
57 |         return space_sample
58 | 
59 |     def get_best(self):
60 |         raise NotImplementedError
61 | 
62 |     def update_result(self, space, result):
63 |         raise NotImplementedError
64 | 
65 |     def summary(self):
66 |         return 'No Summary'
67 | 
68 |     def reset(self):
69 |         raise NotImplementedError
70 | 
71 |     def export(self):
72 |         raise NotImplementedError
73 | 
74 |     def kind(self):
75 |         """Type of the Searcher, should be one of soo, moo.
76 |            This property used to avoid having to import MOOSearcher when detecting Searcher type.
77 |         """
78 |         return 'soo'
79 | 
80 |     def __repr__(self):
81 |         return to_repr(self)
82 | 


--------------------------------------------------------------------------------
/hypernets/tests/tabular/toolbox_test.py:
--------------------------------------------------------------------------------
 1 | import os.path as path
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | from hypernets.tabular import get_tool_box
 7 | from hypernets.tabular.datasets import dsutils
 8 | from hypernets.utils import const
 9 | 
10 | 
11 | class TestToolBox:
12 | 
13 |     def test_infer_task_type(self):
14 |         y1 = np.random.randint(0, 2, size=(1000), dtype='int')
15 |         y2 = np.random.randint(0, 2, size=(1000)).astype('str')
16 |         y3 = np.random.randint(0, 20, size=(1000)).astype('object')
17 |         y4 = np.random.random(size=(1000)).astype('float')
18 |         y5 = np.array([1, 1, 2, 2, 'na'])
19 | 
20 |         tb = get_tool_box(y1)
21 | 
22 |         task, _ = tb.infer_task_type(y1)
23 |         assert task == const.TASK_BINARY
24 | 
25 |         task, _ = tb.infer_task_type(y2)
26 |         assert task == const.TASK_BINARY
27 | 
28 |         task, _ = tb.infer_task_type(y3)
29 |         assert task == const.TASK_MULTICLASS
30 | 
31 |         task, _ = tb.infer_task_type(y4)
32 |         assert task == const.TASK_REGRESSION
33 | 
34 |         task, _ = tb.infer_task_type(y5, excludes=['na'])
35 |         assert task == const.TASK_BINARY
36 | 
37 |     def test_detect_estimator_lightgbm(self):
38 |         tb = get_tool_box(pd.DataFrame)
39 |         detector = tb.estimator_detector('lightgbm.LGBMClassifier', 'binary')
40 |         r = detector()
41 |         assert r == {'installed', 'initialized', 'fitted'}
42 | 
43 |     def test_concat_df(self):
44 |         df = pd.DataFrame(dict(
45 |             x1=['a', 'b', 'c'],
46 |             x2=[1, 2, 3]
47 |         ))
48 |         tb = get_tool_box(pd.DataFrame)
49 | 
50 |         # DataFrame + DataFrame
51 |         df1 = tb.concat_df([df, df], axis=0)
52 |         df2 = pd.concat([df, df], axis=0)
53 |         assert (df1 == df2).all().all()
54 | 
55 |         # DataFrame + ndarray
56 |         df1 = tb.concat_df([df, df.values], axis=0)
57 |         df2 = pd.concat([df, df], axis=0)
58 |         assert isinstance(df1, pd.DataFrame)
59 |         assert (df1 == df2).all().all()
60 | 
61 |         # Series + ndarray
62 |         s = df['x1']
63 |         df1 = tb.concat_df([s, s.values], axis=0)
64 |         df2 = pd.concat([s, s], axis=0)
65 |         assert isinstance(df1, pd.Series)
66 |         assert (df1 == df2).all()
67 | 
68 |     def test_load_data(self, ):
69 |         data_dir = path.split(dsutils.__file__)[0]
70 |         data_file = f'{data_dir}/blood.csv'
71 |         tb = get_tool_box(pd.DataFrame)
72 |         df = tb.load_data(data_file)
73 |         assert isinstance(df, pd.DataFrame)
74 | 


--------------------------------------------------------------------------------
/hypernets/core/objective.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from hypernets.core.searcher import OptimizeDirection
 3 | 
 4 | 
 5 | class Objective(metaclass=abc.ABCMeta):
 6 |     """ Objective  = Indicator metric + Direction"""
 7 | 
 8 |     def __init__(self, name, direction, need_train_data=False, need_val_data=True, need_test_data=False):
 9 |         self.name = name
10 |         self.direction = direction
11 |         self.need_train_data = need_train_data
12 |         self.need_val_data = need_val_data
13 |         self.need_test_data = need_test_data
14 | 
15 |     def evaluate(self, trial, estimator, X_train, y_train, X_val, y_val, X_test=None, **kwargs) -> float:
16 |         if self.need_test_data:
17 |             assert X_test is not None, "need test data"
18 | 
19 |         if self.need_train_data:
20 |             assert X_train is not None and y_train is not None, "need train data"
21 | 
22 |         if self.need_val_data:
23 |             assert X_val is not None and X_val is not None, "need validation data"
24 | 
25 |         return self._evaluate(trial, estimator, X_train, y_train, X_val, y_val, X_test=X_test, **kwargs)
26 | 
27 |     @abc.abstractmethod
28 |     def _evaluate(self, trial, estimator, X_train, y_train, X_val, y_val, X_test=None, **kwargs) -> float:
29 |         raise NotImplementedError
30 | 
31 |     def evaluate_cv(self, trial, estimator, X_trains, y_trains,
32 |                     X_vals, y_vals, X_test=None, **kwargs) -> float:
33 | 
34 |         if self.need_test_data:
35 |             assert X_test is not None, "need test data"
36 | 
37 |         if self.need_train_data:
38 |             assert X_trains is not None and y_trains is not None, "need train data"
39 |             assert len(X_trains) == len(y_trains)
40 | 
41 |         if self.need_val_data:
42 |             assert X_vals is not None and y_vals is not None, "need validation data"
43 |             assert len(X_vals) == len(y_vals)
44 | 
45 |         return self._evaluate_cv(trial=trial, estimator=estimator, X_trains=X_trains, y_trains=y_trains,
46 |                                  X_vals=X_vals, y_vals=y_vals, X_test=X_test, **kwargs)
47 | 
48 |     @abc.abstractmethod
49 |     def _evaluate_cv(self, trial, estimator, X_trains, y_trains, X_vals, y_vals, X_test=None, **kwargs) -> float:
50 |         raise NotImplementedError
51 | 
52 |     def __repr__(self):
53 |         return f"{self.__class__.__name__}(name={self.name}, direction={self.direction}," \
54 |                f" need_train_data={self.need_train_data}," \
55 |                f" need_val_data={self.need_val_data}," \
56 |                f" need_test_data={self.need_test_data})"
57 | 


--------------------------------------------------------------------------------
/hypernets/core/meta_learner.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | from lightgbm import LGBMRegressor
 7 | import numpy as np
 8 | from ..utils import logging
 9 | 
10 | logger = logging.get_logger(__name__)
11 | 
12 | 
13 | class MetaLearner(object):
14 |     def __init__(self, history, dataset_id, trial_store):
15 |         self.trial_store = trial_store
16 |         self.dataset_id = dataset_id
17 |         self.history = history
18 |         self.regressors = {}
19 |         self.store_history = {}
20 | 
21 |         if logger.is_info_enabled():
22 |             logger.info(f'Initialize Meta Learner: dataset_id:{dataset_id}')
23 | 
24 |     def new_sample(self, space_sample):
25 |         self.fit(space_sample.signature)
26 | 
27 |     def fit(self, space_signature):
28 | 
29 |         features = self.extract_features_and_labels(space_signature)
30 |         x = []
31 |         y = []
32 |         for feature, label in features:
33 |             if label != 0:
34 |                 x.append(feature)
35 |                 y.append(label)
36 | 
37 |         store_history = self.store_history.get(space_signature)
38 | 
39 |         if self.trial_store is not None and store_history is None:
40 |             trials = self.trial_store.get_all(self.dataset_id, space_signature)
41 |             store_x = []
42 |             store_y = []
43 |             for t in trials:
44 |                 store_x.append(t.space_sample_vectors)
45 |                 store_y.append(t.reward)
46 |             store_history = (store_x, store_y)
47 |             self.store_history[space_signature] = store_history
48 | 
49 |         if store_history is None:
50 |             store_history = ([], [])
51 | 
52 |         store_x, store_y = store_history
53 |         x = x + store_x
54 |         y = y + store_y
55 |         if len(x) >= 2:
56 |             regressor = LGBMRegressor(min_data=1, min_data_in_bin=1, verbosity=-1)
57 |             regressor.fit(x, y)
58 |             #  if logger.is_info_enabled():
59 |             #      logger.info(regressor.predict(x))
60 |             self.regressors[space_signature] = regressor
61 | 
62 |     def predict(self, space_sample, default_value=np.inf):
63 |         regressor = self.regressors.get(space_sample.signature)
64 |         if regressor is not None:
65 |             score = regressor.predict([space_sample.vectors])
66 |         else:
67 |             score = default_value
68 |         return score
69 | 
70 |     def extract_features_and_labels(self, signature):
71 |         features = [(t.space_sample.vectors, t.reward) for t in self.history.trials if
72 |                     t.space_sample.signature == signature]
73 |         return features
74 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | .idea
  3 | 
  4 | ### Python template
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | dt_output/
134 | log/
135 | trial_store/
136 | tmp/
137 | catboost_info/
138 | 
139 | #dispatchers
140 | logs/
141 | workdir/
142 | dask-worker-space/
143 | 
144 | 


--------------------------------------------------------------------------------
/hypernets/tests/discriminators/percentile.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | __author__ = 'yangjian'
 3 | """
 4 | 
 5 | """
 6 | 
 7 | from hypernets.discriminators import PercentileDiscriminator, ProgressivePercentileDiscriminator
 8 | 
 9 | from . import history, group_id
10 | 
11 | 
12 | class Test_PercentileDiscriminator():
13 |     def test_percentile(self):
14 |         d = PercentileDiscriminator(50, min_trials=5, min_steps=5, stride=1, history=history, optimize_direction='min')
15 |         p1 = d.is_promising([0.9, 0.9, 0.9, 0.9], group_id)
16 |         assert p1 == True
17 | 
18 |         p2 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.9], group_id)
19 |         assert p2 == False
20 | 
21 |         p2 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.525], group_id)
22 |         assert p2 == False
23 | 
24 |         p2 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.524], group_id)
25 |         assert p2 == True
26 | 
27 |         d = PercentileDiscriminator(0, min_trials=5, min_steps=5, stride=1, history=history, optimize_direction='min')
28 |         p1 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.50], group_id)
29 |         assert p1 == True
30 |         p1 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.56], group_id)
31 |         assert p1 == False
32 | 
33 |         d = PercentileDiscriminator(100, min_trials=5, min_steps=5, stride=1, history=history, optimize_direction='min')
34 |         p1 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.55], group_id)
35 |         assert p1 == False
36 |         p1 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.49], group_id)
37 |         assert p1 == True
38 | 
39 |     def test_progressive_percentile(self):
40 |         d = ProgressivePercentileDiscriminator([100, 90, 80, 60, 50, 40, 30, 20, 10, 0], min_trials=5, min_steps=5,
41 |                                                stride=1,
42 |                                                history=history, optimize_direction='min')
43 |         p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1], group_id)
44 |         assert p1 == True
45 | 
46 |         p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.56], group_id)
47 |         assert p1 == False
48 | 
49 |         p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1], group_id)
50 |         assert p1 == True
51 | 
52 |         p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.45], group_id)
53 |         assert p1 == False
54 | 
55 |         p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], group_id)
56 |         assert p1 == True
57 | 
58 |         p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.345], group_id)
59 |         assert p1 == False
60 | 
61 |         p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], group_id)
62 |         assert p1 == True
63 | 
64 |         p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.34], group_id)
65 |         assert p1 == False
66 | 


--------------------------------------------------------------------------------
/hypernets/dispatchers/run.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import argparse
 3 | 
 4 | from hypernets.dispatchers.cluster import Cluster
 5 | 
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser('run HyperNets experiment in cluster.')
 9 |     parser.add_argument('--experiment', '-experiment',
10 |                         default=None,
11 |                         help='experiment id, default current timestamp')
12 |     parser.add_argument('--driver-broker', '-driver-broker',
13 |                         help='address of the driver broker'
14 |                              ', eg: grpc://<hostname>:<broker_port> to use grpc process broker'
15 |                              ', or just <hostname> to use ssh process')
16 |     parser.add_argument('--driver-port', '-driver-port',
17 |                         type=int, default=8001,
18 |                         help='tcp port of the driver'
19 |                              ', the executors will connect to this port with grpc'
20 |                              ', default 8001')
21 |     parser.add_argument('--executor-brokers', '-executor-brokers',
22 |                         required=True,
23 |                         help='addresses of the executor nodes, separated by comma. '
24 |                              'eg: "grpc://<executor1_hostname>:<broker_port>,'
25 |                              'grpc://<executor2_hostname>:<broker_port>"')
26 |     parser.add_argument('--with-driver', '-with-driver',
27 |                         type=int, default=1,
28 |                         help='start driver progress or not, default 1')
29 |     parser.add_argument('--spaces-dir', '-spaces-dir',
30 |                         default='tmp',
31 |                         help='driver directory to store space files, default "tmp"')
32 |     parser.add_argument('--logs-dir', '-logs-dir',
33 |                         default='logs',
34 |                         help='local directory to store log files')
35 |     parser.add_argument('--report-interval', '-report-interval',
36 |                         type=int, default=60,
37 |                         help='report cluster processes, default 60')
38 |     args, argv = parser.parse_known_args()
39 | 
40 |     cluster = Cluster(args.experiment,
41 |                       args.driver_broker,
42 |                       args.driver_port,
43 |                       args.with_driver,
44 |                       args.executor_brokers.split(','),
45 |                       args.spaces_dir,
46 |                       args.logs_dir,
47 |                       args.report_interval,
48 |                       *argv)
49 |     cluster.run()
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     try:
54 |         main()
55 |         print('done')
56 |     except KeyboardInterrupt as e:
57 |         print('KeyboardInterrupt')
58 | 


--------------------------------------------------------------------------------