├── setup.cfg ├── hypernets ├── pipeline │ └── __init__.py ├── tests │ ├── board │ │ └── __init__.py │ ├── model │ │ ├── __init__.py │ │ └── plain_model_test.py │ ├── experiment │ │ ├── __init__.py │ │ ├── run_export_experiment_report.py │ │ └── general_experiment_test.py │ ├── hyperctl │ │ ├── __init__.py │ │ ├── minimum_batch.json │ │ ├── plain_job_script.py │ │ ├── job_template.yml │ │ ├── test_cli.py │ │ ├── local_batch.json │ │ ├── remote_batch.json │ │ └── test_batch.py │ ├── pipeline │ │ └── __init__.py │ ├── tabular │ │ ├── __init__.py │ │ ├── ensemble │ │ │ └── __init__.py │ │ ├── lifelong_learning │ │ │ └── __init__.py │ │ ├── tb_cuml │ │ │ ├── __init__.py │ │ │ ├── data_cleaner_test.py │ │ │ ├── psudo_labeling_test.py │ │ │ ├── feature_importance_test.py │ │ │ └── cache_test.py │ │ ├── tb_dask │ │ │ ├── data_cleaner_test.py │ │ │ ├── psudo_labeling_test.py │ │ │ ├── feature_importance_test.py │ │ │ ├── __init__.py │ │ │ ├── toolbox_test.py │ │ │ └── cache_test.py │ │ ├── utils_test.py │ │ ├── cache_test.py │ │ ├── psudo_labeling_test.py │ │ └── toolbox_test.py │ ├── core │ │ ├── __init__.py │ │ └── mutable_test.py │ ├── searchers │ │ ├── __init__.py │ │ ├── test_moo.py │ │ ├── playback_test.py │ │ └── test_genetic.py │ ├── trial │ │ ├── __init__.py │ │ └── trial_store_test.py │ ├── dispatchers │ │ ├── __init__.py │ │ └── process_test.py │ ├── utils │ │ ├── __init__.py │ │ ├── perf_test.py │ │ ├── estimators_test.py │ │ ├── tic_toc_test.py │ │ ├── common_test.py │ │ └── tuning_test.py │ ├── __init__.py │ └── discriminators │ │ ├── base_test.py │ │ ├── __init__.py │ │ └── percentile.py ├── dispatchers │ ├── dask │ │ └── __init__.py │ ├── predict │ │ ├── __init__.py │ │ └── grpc │ │ │ ├── __init__.py │ │ │ ├── proto │ │ │ ├── __init__.py │ │ │ ├── readme.txt │ │ │ ├── predict.proto │ │ │ └── predict_pb2_grpc.py │ │ │ ├── predict_client.py │ │ │ └── predict_service.py │ ├── cluster │ │ ├── grpc │ │ │ ├── __init__.py │ │ │ └── proto │ │ │ │ ├── __init__.py │ │ │ │ ├── readme.txt │ │ │ │ └── spec.proto │ │ └── __init__.py │ ├── process │ │ ├── grpc │ │ │ ├── __init__.py │ │ │ └── proto │ │ │ │ ├── __init__.py │ │ │ │ ├── readme.txt │ │ │ │ └── proc.proto │ │ ├── __init__.py │ │ ├── grpc_process.py │ │ └── local_process.py │ ├── run_predict_server.py │ ├── run_broker.py │ ├── run_predict.py │ ├── __init__.py │ ├── cfg.py │ └── run.py ├── tabular │ ├── datasets │ │ ├── __init__.py │ │ ├── boston.csv.gz │ │ ├── bank-uci.csv.gz │ │ ├── adult-uci.csv.gz │ │ └── dsutils.py │ ├── lifelong_learning │ │ ├── _validation.py │ │ └── __init__.py │ ├── evaluator │ │ ├── __init__.py │ │ ├── h2o.py │ │ ├── tpot.py │ │ ├── auto_sklearn.py │ │ └── hyperdt.py │ ├── cuml_ex │ │ ├── __init__.py │ │ ├── _estimator_detector.py │ │ ├── _pseudo_labeling.py │ │ ├── _drift_detection.py │ │ ├── _data_hasher.py │ │ ├── _ensemble.py │ │ ├── _persistence.py │ │ ├── _dataframe_mapper.py │ │ └── _data_cleaner.py │ ├── ensemble │ │ ├── __init__.py │ │ ├── misc.py │ │ └── stacking.py │ ├── feature_generators │ │ ├── _base.py │ │ └── __init__.py │ ├── dask_ex │ │ ├── _collinearity.py │ │ ├── _feature_generators.py │ │ ├── _drift_detection.py │ │ ├── _model_selection.py │ │ ├── __init__.py │ │ ├── _data_hasher.py │ │ ├── _dataframe_mapper.py │ │ └── _data_cleaner.py │ └── __init__.py ├── hyperctl │ ├── __init__.py │ ├── consts.py │ ├── utils.py │ └── api.py ├── core │ ├── config.py │ ├── stateful.py │ ├── dispatcher.py │ ├── random_state.py │ ├── context.py │ ├── __init__.py │ ├── pareto.py │ ├── mutables.py │ ├── searcher.py │ ├── objective.py │ └── meta_learner.py ├── __init__.py ├── examples │ ├── __init__.py │ └── smoke_testing.py ├── server │ └── __init__.py ├── model │ └── __init__.py ├── conf │ └── __init__.py ├── experiment │ ├── __init__.py │ ├── general.py │ └── cfg.py ├── utils │ ├── const.py │ └── __init__.py ├── searchers │ ├── random_searcher.py │ ├── playback_searcher.py │ └── grid_searcher.py └── discriminators │ └── __init__.py ├── requirements-zhcn.txt ├── docs ├── source │ ├── examples.md │ ├── faq.md │ ├── tuning.md │ ├── _static │ │ └── css │ │ │ └── my_theme.css │ ├── images │ │ ├── DAT2.1.png │ │ ├── DAT2.5.png │ │ ├── Hypernets.png │ │ ├── moead_pbi.png │ │ ├── DAT_latest.png │ │ ├── enas_arch_1.png │ │ ├── enas_arch_2.png │ │ ├── crowding_distance.png │ │ ├── enas_arch_sample.png │ │ ├── nsga2_procedure.png │ │ ├── compete_experiment.png │ │ ├── connection_space_or.png │ │ ├── r_dominance_sorting.png │ │ ├── hypernets_search_space.png │ │ ├── notebook_plot_dataset.png │ │ ├── connection_space_repeat.png │ │ ├── excel_experiment_report.png │ │ ├── connection_space_optional.png │ │ ├── connection_space_permuation.png │ │ ├── connection_space_sequential.png │ │ ├── hyper_model_search_sequence.png │ │ ├── hypernets_conceptual_model.png │ │ ├── notebook_experiment_config.png │ │ ├── abstract_illustration_of_nas.png │ │ └── connection_space_inputchoice.png │ ├── modules.rst │ ├── release_notes.rst │ ├── hypernets.conf.rst │ ├── hypernets.server.rst │ ├── hypernets.tabular.cuml_ex.rst │ ├── hypernets.tabular.dask_ex.rst │ ├── hypernets.tabular.lifelong_learning.rst │ ├── hypernets.tabular.feature_generators.rst │ ├── hypernets.rst │ ├── hypernets.discriminators.rst │ ├── hypernets.tabular.datasets.rst │ ├── hypernets.dispatchers.dask.rst │ ├── hypernets.pipeline.rst │ ├── hypernets.dispatchers.predict.rst │ ├── hypernets.examples.rst │ ├── release_note_025.rst │ ├── hypernets.model.rst │ ├── release_note_030.rst │ ├── hypernets.dispatchers.cluster.grpc.proto.rst │ ├── hypernets.dispatchers.process.grpc.proto.rst │ ├── hypernets.dispatchers.predict.grpc.proto.rst │ ├── index.rst │ ├── hypernets.tabular.ensemble.rst │ ├── hypernets.dispatchers.predict.grpc.rst │ ├── hypernets.dispatchers.cluster.grpc.rst │ ├── hypernets.dispatchers.process.grpc.rst │ ├── hypernets.dispatchers.process.rst │ ├── hypernets.dispatchers.cluster.rst │ ├── hypermodels.rst │ ├── hypernets.experiment.rst │ ├── hypernets.utils.rst │ ├── hypernets.tabular.evaluator.rst │ ├── hypernets.dispatchers.rst │ ├── conf.py │ ├── overview.md │ ├── hypernets.hyperctl.rst │ └── hypernets.searchers.rst ├── requirements.txt ├── Makefile └── make.bat ├── requirements-fg.txt ├── requirements-tests.txt ├── requirements-board.txt ├── requirements-cuml.txt ├── DAT2.5.png ├── requirements-notebook.txt ├── requirements-extra.txt ├── requirements-dask.txt ├── .github ├── ISSUE_TEMPLATE │ ├── 90-other-issues.md │ ├── 30-feature-request.md │ └── 00-bug-issue.md └── workflows │ └── dist-builder.yml ├── requirements.txt ├── .readthedocs.yml ├── CONTRIBUTING.md └── .gitignore /setup.cfg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/tests/board/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/tests/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements-zhcn.txt: -------------------------------------------------------------------------------- 1 | jieba 2 | -------------------------------------------------------------------------------- /docs/source/examples.md: -------------------------------------------------------------------------------- 1 | # Exapmles 2 | -------------------------------------------------------------------------------- /hypernets/dispatchers/dask/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/tabular/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/tests/experiment/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/tests/hyperctl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/tests/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/faq.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ## How... -------------------------------------------------------------------------------- /hypernets/dispatchers/predict/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/dispatchers/cluster/grpc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/dispatchers/predict/grpc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/dispatchers/process/grpc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements-fg.txt: -------------------------------------------------------------------------------- 1 | featuretools>=0.23.0 2 | -------------------------------------------------------------------------------- /requirements-tests.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | -------------------------------------------------------------------------------- /docs/source/tuning.md: -------------------------------------------------------------------------------- 1 | # Hyper-parameter Tuning 2 | -------------------------------------------------------------------------------- /hypernets/dispatchers/cluster/grpc/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/dispatchers/predict/grpc/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hypernets/dispatchers/process/grpc/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements-board.txt: -------------------------------------------------------------------------------- 1 | # hboard 2 | ipywidgets 3 | -------------------------------------------------------------------------------- /requirements-cuml.txt: -------------------------------------------------------------------------------- 1 | cupy 2 | cudf 3 | cuml 4 | pynvml 5 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx_rtd_theme 2 | recommonmark 3 | 4 | -------------------------------------------------------------------------------- /DAT2.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/DAT2.5.png -------------------------------------------------------------------------------- /hypernets/hyperctl/__init__.py: -------------------------------------------------------------------------------- 1 | from . import api 2 | from .api import get_job_params 3 | -------------------------------------------------------------------------------- /docs/source/_static/css/my_theme.css: -------------------------------------------------------------------------------- 1 | .wy-nav-content { 2 | max-width: 1080px !important; 3 | } -------------------------------------------------------------------------------- /hypernets/core/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /requirements-notebook.txt: -------------------------------------------------------------------------------- 1 | jupyterlab 2 | ipywidgets 3 | jupyterlab_widgets 4 | # hboard-widget 5 | -------------------------------------------------------------------------------- /hypernets/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | __version__ = '0.3.2' 4 | -------------------------------------------------------------------------------- /hypernets/examples/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /hypernets/server/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /hypernets/tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /hypernets/tests/searchers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /hypernets/tests/trial/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /hypernets/tests/dispatchers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /hypernets/tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /docs/source/images/DAT2.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/DAT2.1.png -------------------------------------------------------------------------------- /docs/source/images/DAT2.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/DAT2.5.png -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | hypernets 2 | ========= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | hypernets 8 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /requirements-extra.txt: -------------------------------------------------------------------------------- 1 | paramiko 2 | #protobuf<4.0 3 | #grpcio>=1.24.0 4 | s3fs 5 | python-geohash 6 | #pyarrow 7 | -------------------------------------------------------------------------------- /docs/source/images/Hypernets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/Hypernets.png -------------------------------------------------------------------------------- /docs/source/images/moead_pbi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/moead_pbi.png -------------------------------------------------------------------------------- /docs/source/images/DAT_latest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/DAT_latest.png -------------------------------------------------------------------------------- /docs/source/images/enas_arch_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/enas_arch_1.png -------------------------------------------------------------------------------- /docs/source/images/enas_arch_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/enas_arch_2.png -------------------------------------------------------------------------------- /hypernets/tests/tabular/lifelong_learning/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ -------------------------------------------------------------------------------- /hypernets/tabular/lifelong_learning/_validation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | -------------------------------------------------------------------------------- /docs/source/images/crowding_distance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/crowding_distance.png -------------------------------------------------------------------------------- /docs/source/images/enas_arch_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/enas_arch_sample.png -------------------------------------------------------------------------------- /docs/source/images/nsga2_procedure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/nsga2_procedure.png -------------------------------------------------------------------------------- /hypernets/tabular/datasets/boston.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/hypernets/tabular/datasets/boston.csv.gz -------------------------------------------------------------------------------- /docs/source/images/compete_experiment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/compete_experiment.png -------------------------------------------------------------------------------- /docs/source/images/connection_space_or.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_or.png -------------------------------------------------------------------------------- /docs/source/images/r_dominance_sorting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/r_dominance_sorting.png -------------------------------------------------------------------------------- /hypernets/tabular/datasets/bank-uci.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/hypernets/tabular/datasets/bank-uci.csv.gz -------------------------------------------------------------------------------- /docs/source/images/hypernets_search_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/hypernets_search_space.png -------------------------------------------------------------------------------- /docs/source/images/notebook_plot_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/notebook_plot_dataset.png -------------------------------------------------------------------------------- /hypernets/tabular/datasets/adult-uci.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/hypernets/tabular/datasets/adult-uci.csv.gz -------------------------------------------------------------------------------- /docs/source/images/connection_space_repeat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_repeat.png -------------------------------------------------------------------------------- /docs/source/images/excel_experiment_report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/excel_experiment_report.png -------------------------------------------------------------------------------- /docs/source/images/connection_space_optional.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_optional.png -------------------------------------------------------------------------------- /docs/source/images/connection_space_permuation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_permuation.png -------------------------------------------------------------------------------- /docs/source/images/connection_space_sequential.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_sequential.png -------------------------------------------------------------------------------- /docs/source/images/hyper_model_search_sequence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/hyper_model_search_sequence.png -------------------------------------------------------------------------------- /docs/source/images/hypernets_conceptual_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/hypernets_conceptual_model.png -------------------------------------------------------------------------------- /docs/source/images/notebook_experiment_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/notebook_experiment_config.png -------------------------------------------------------------------------------- /docs/source/images/abstract_illustration_of_nas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/abstract_illustration_of_nas.png -------------------------------------------------------------------------------- /docs/source/images/connection_space_inputchoice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataCanvasIO/Hypernets/HEAD/docs/source/images/connection_space_inputchoice.png -------------------------------------------------------------------------------- /hypernets/tabular/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from ._base import BaseEstimator, Evaluator 8 | -------------------------------------------------------------------------------- /hypernets/dispatchers/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | from .cluster import Cluster 2 | from .driver_dispatcher import DriverDispatcher 3 | from .executor_dispatcher import ExecutorDispatcher 4 | -------------------------------------------------------------------------------- /hypernets/tabular/lifelong_learning/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from ._split import PrequentialSplit,select_valid_oof -------------------------------------------------------------------------------- /hypernets/model/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from .estimator import CrossValidationEstimator,Estimator 8 | from .hyper_model import HyperModel -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from ._toolbox import CumlToolBox 6 | from ._transformer import Localizable, copy_attrs_as_local, as_local_if_possible 7 | -------------------------------------------------------------------------------- /requirements-dask.txt: -------------------------------------------------------------------------------- 1 | dask!=2023.2.1,!=2023.3.*,!=2023.4.*,!=2023.5.*,<2024.5.0 2 | distributed!=2023.2.1,!=2023.3.*,!=2023.4.*,!=2023.5.*,<2024.5.0 3 | #dask<=2023.2.0 4 | #distributed<=2023.2.0 5 | dask-ml<2025.0.0 6 | -------------------------------------------------------------------------------- /docs/source/release_notes.rst: -------------------------------------------------------------------------------- 1 | Release Notes 2 | ============= 3 | 4 | Releasing history: 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | v0.2.5 10 | v0.3.0 11 | -------------------------------------------------------------------------------- /hypernets/conf/__init__.py: -------------------------------------------------------------------------------- 1 | from traitlets import Unicode, Unicode as String, Bool, Int, Float, Enum, List, Dict, Union 2 | 3 | from ._configuration import Configurable, configure, observe, configure_and_observe, generate_config_file 4 | -------------------------------------------------------------------------------- /hypernets/tests/hyperctl/minimum_batch.json: -------------------------------------------------------------------------------- 1 | { 2 | "job_command": "pwd", 3 | "jobs": [ 4 | { 5 | "params": { 6 | "learning_rate": 0.1 7 | }, 8 | 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /docs/source/hypernets.conf.rst: -------------------------------------------------------------------------------- 1 | hypernets.conf package 2 | ====================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: hypernets.conf 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/source/hypernets.server.rst: -------------------------------------------------------------------------------- 1 | hypernets.server package 2 | ======================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: hypernets.server 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /hypernets/tests/hyperctl/plain_job_script.py: -------------------------------------------------------------------------------- 1 | from hypernets import hyperctl 2 | 3 | 4 | def main(): 5 | params = hyperctl.get_job_params() 6 | assert params 7 | print(params) 8 | 9 | 10 | if __name__ == '__main__': 11 | main() 12 | -------------------------------------------------------------------------------- /hypernets/tabular/ensemble/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from .base_ensemble import BaseEnsemble 8 | from .stacking import StackingEnsemble 9 | from .voting import AveragingEnsemble, GreedyEnsemble 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/90-other-issues.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Other Issues 3 | about: Use this template for any other non-support related issues 4 | labels: 'type:others' 5 | 6 | --- 7 | 8 | This template is for miscellaneous issues not covered by the other issue categories. 9 | 10 | -------------------------------------------------------------------------------- /docs/source/hypernets.tabular.cuml_ex.rst: -------------------------------------------------------------------------------- 1 | hypernets.tabular.cuml\_ex package 2 | ================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: hypernets.tabular.cuml_ex 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/source/hypernets.tabular.dask_ex.rst: -------------------------------------------------------------------------------- 1 | hypernets.tabular.dask\_ex package 2 | ================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: hypernets.tabular.dask_ex 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /hypernets/core/stateful.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | 7 | class Stateful: 8 | def __init__(self): 9 | pass 10 | 11 | def load_state(self): 12 | pass 13 | 14 | def save_state(self): 15 | pass 16 | 17 | def get_state_path(self): 18 | pass 19 | -------------------------------------------------------------------------------- /docs/source/hypernets.tabular.lifelong_learning.rst: -------------------------------------------------------------------------------- 1 | hypernets.tabular.lifelong\_learning package 2 | ============================================ 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: hypernets.tabular.lifelong_learning 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/source/hypernets.tabular.feature_generators.rst: -------------------------------------------------------------------------------- 1 | hypernets.tabular.feature\_generators package 2 | ============================================= 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: hypernets.tabular.feature_generators 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /hypernets/dispatchers/cluster/grpc/proto/readme.txt: -------------------------------------------------------------------------------- 1 | requirements: 2 | grpcio 3 | grpcio-tools [ need to run protoc ] 4 | 5 | run the following command to re-generate protobuf stub code for python: 6 | 7 | python -m grpc_tools.protoc --python_out=. --grpc_python_out=. -I. hypernets/dispatchers/cluster/grpc/proto/spec.proto 8 | -------------------------------------------------------------------------------- /hypernets/dispatchers/process/grpc/proto/readme.txt: -------------------------------------------------------------------------------- 1 | requirements: 2 | grpcio 3 | grpcio-tools [ need to run protoc ] 4 | 5 | run the following command to re-generate protobuf stub code for python: 6 | 7 | python -m grpc_tools.protoc --python_out=. --grpc_python_out=. -I. hypernets/dispatchers/process/grpc/proto/proc.proto 8 | -------------------------------------------------------------------------------- /hypernets/dispatchers/predict/grpc/proto/readme.txt: -------------------------------------------------------------------------------- 1 | requirements: 2 | grpcio 3 | grpcio-tools [ need to run protoc ] 4 | 5 | run the following command to re-generate protobuf stub code for python: 6 | 7 | python -m grpc_tools.protoc --python_out=. --grpc_python_out=. -I. hypernets/dispatchers/predict/grpc/proto/predict.proto 8 | -------------------------------------------------------------------------------- /hypernets/tabular/ensemble/misc.py: -------------------------------------------------------------------------------- 1 | try: 2 | from sklearn.metrics._scorer import _PredictScorer 3 | 4 | 5 | def is_predict_scorer(s): 6 | return isinstance(s, _PredictScorer) 7 | except ImportError: 8 | # sklearn 1.4.0 + 9 | def is_predict_scorer(s): 10 | return getattr(s, '_response_method', '') == 'predict' 11 | -------------------------------------------------------------------------------- /hypernets/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | import os 7 | import tempfile 8 | import time 9 | 10 | test_output_dir = tempfile.mkdtemp(prefix=time.strftime("hyn_test_%m%d%H%M_")) 11 | 12 | os.environ['DEEPTABLES_HOME'] = test_output_dir 13 | os.environ['HYPERNETS_HOME'] = test_output_dir 14 | -------------------------------------------------------------------------------- /hypernets/tests/utils/perf_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | import psutil 5 | 6 | from hypernets.utils import get_perf 7 | 8 | 9 | def test_get_perf(): 10 | proc = psutil.Process(os.getpid()) 11 | perf = get_perf(proc) 12 | assert isinstance(perf, OrderedDict) 13 | assert 'cpu_total' in perf.keys() 14 | -------------------------------------------------------------------------------- /docs/source/hypernets.rst: -------------------------------------------------------------------------------- 1 | hypernets package 2 | ================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | hypernets.experiment 11 | hypernets.searchers 12 | 13 | 14 | Module contents 15 | --------------- 16 | 17 | .. automodule:: hypernets 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.16.5,<2.0.0 2 | pandas>=0.25.3 3 | scikit-learn>=0.22.1,<1.6.0 4 | scipy 5 | lightgbm>=2.2.0 6 | fsspec>=0.8.0 7 | ipython 8 | traitlets 9 | XlsxWriter>=3.0.2 10 | psutil 11 | joblib; python_version >= '3.8' or platform_system != 'Windows' 12 | joblib<1.3.0; python_version < '3.8' and platform_system == 'Windows' 13 | pyyaml 14 | paramiko 15 | requests 16 | tornado 17 | prettytable 18 | tqdm 19 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_cuml/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import pytest 6 | 7 | from hypernets.tabular import is_cuml_installed 8 | 9 | if is_cuml_installed: 10 | import cupy 11 | 12 | if_cuml_ready = pytest.mark.skipif(not cupy.cuda.is_available(), reason='Cuda is not available') 13 | else: 14 | if_cuml_ready = pytest.mark.skipif(not is_cuml_installed, reason='Cuml is not installed') 15 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_cuml/data_cleaner_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from . import if_cuml_ready, is_cuml_installed 6 | from ..data_cleaner_test import TestDataCleaner as _TestDataCleaner 7 | 8 | if is_cuml_installed: 9 | import cudf 10 | 11 | 12 | @if_cuml_ready 13 | class TestCumlDataCleaner(_TestDataCleaner): 14 | @staticmethod 15 | def load_data(): 16 | return cudf.from_pandas(_TestDataCleaner.load_data()) 17 | -------------------------------------------------------------------------------- /hypernets/core/dispatcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | 7 | class Dispatcher(object): 8 | def __init__(self): 9 | super(Dispatcher, self).__init__() 10 | 11 | def dispatch(self, hyper_model, X, y, X_val, y_val, X_test, cv, num_folds, max_trials, dataset_id, trial_store, 12 | **fit_kwargs): 13 | raise NotImplemented() 14 | 15 | # def run_trial(self, space_sample, trial_no, X, y, X_val, y_val, **fit_kwargs): 16 | # pass 17 | -------------------------------------------------------------------------------- /hypernets/dispatchers/predict/grpc/proto/predict.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | 4 | package hypernets.dispatchers.predict.grpc.proto; 5 | 6 | 7 | service PredictService { 8 | rpc predict(PredictRequest) returns (PredictResponse) {} 9 | } 10 | 11 | message PredictRequest { 12 | string data_file = 1; 13 | string result_file = 2; 14 | } 15 | 16 | message PredictResponse { 17 | string data_file = 1; 18 | string result_file = 2; 19 | int32 code = 3; 20 | string message = 4; 21 | } 22 | -------------------------------------------------------------------------------- /hypernets/dispatchers/process/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from .local_process import LocalProcess 4 | 5 | try: 6 | from .grpc_process import GrpcProcess 7 | except ImportError: 8 | pass 9 | except: 10 | from hypernets.utils import logging 11 | import sys 12 | 13 | logger = logging.get_logger(__name__) 14 | logger.warning('Failed to load GrpcProcess', exc_info=sys.exc_info()) 15 | 16 | try: 17 | from .ssh_process import SshProcess 18 | except ImportError: 19 | pass 20 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_dask/data_cleaner_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from . import if_dask_ready, is_dask_installed 6 | from ..data_cleaner_test import TestDataCleaner as _TestDataCleaner 7 | 8 | if is_dask_installed: 9 | import dask.dataframe as dd 10 | 11 | 12 | @if_dask_ready 13 | class TestDaskDataCleaner(_TestDataCleaner): 14 | @staticmethod 15 | def load_data(): 16 | df = _TestDataCleaner.load_data() 17 | return dd.from_pandas(df, npartitions=2) 18 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | # Build documentation in the docs/ directory with Sphinx 4 | sphinx: 5 | configuration: docs/source/conf.py 6 | 7 | # Build documentation with MkDocs 8 | #mkdocs: 9 | # configuration: mkdocs.yml 10 | 11 | # Optionally build your docs in additional formats such as PDF and ePub 12 | formats: all 13 | 14 | # Optionally set the version of Python and requirements required to build your docs 15 | python: 16 | version: 3.6 17 | install: 18 | - requirements: requirements.txt 19 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_dask/psudo_labeling_test.py: -------------------------------------------------------------------------------- 1 | from . import if_dask_ready, is_dask_installed, setup_dask 2 | from ..psudo_labeling_test import TestPseudoLabeling as _TestPseudoLabeling 3 | 4 | if is_dask_installed: 5 | import dask.dataframe as dd 6 | 7 | 8 | @if_dask_ready 9 | class TestDaskPseudoLabeling(_TestPseudoLabeling): 10 | @staticmethod 11 | def load_data(): 12 | setup_dask(None) 13 | df = _TestPseudoLabeling.load_data() 14 | return dd.from_pandas(df, npartitions=2) 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/30-feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Use this template for raising a feature request 4 | labels: 'type:feature' 5 | 6 | --- 7 | 8 | Please make sure that this is a feature request. 9 | 10 | **System information** 11 | - Hypernets version (you are using): 12 | - Are you willing to contribute it (Yes/No): 13 | 14 | 15 | **Describe the feature and the current behavior/state.** 16 | 17 | 18 | **Will this change the current api? How?** 19 | 20 | 21 | **Any Other info.** 22 | -------------------------------------------------------------------------------- /docs/source/hypernets.discriminators.rst: -------------------------------------------------------------------------------- 1 | hypernets.discriminators package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.discriminators.percentile module 8 | ------------------------------------------ 9 | 10 | .. automodule:: hypernets.discriminators.percentile 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: hypernets.discriminators 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /docs/source/hypernets.tabular.datasets.rst: -------------------------------------------------------------------------------- 1 | hypernets.tabular.datasets package 2 | ================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.tabular.datasets.dsutils module 8 | ----------------------------------------- 9 | 10 | .. automodule:: hypernets.tabular.datasets.dsutils 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: hypernets.tabular.datasets 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_cuml/psudo_labeling_test.py: -------------------------------------------------------------------------------- 1 | from . import if_cuml_ready, is_cuml_installed 2 | from ..psudo_labeling_test import TestPseudoLabeling as _TestPseudoLabeling 3 | 4 | if is_cuml_installed: 5 | import cudf 6 | 7 | 8 | @if_cuml_ready 9 | class TestCumlPseudoLabeling(_TestPseudoLabeling): 10 | 11 | @staticmethod 12 | def load_data(): 13 | df = _TestPseudoLabeling.load_data() 14 | return cudf.from_pandas(df) 15 | 16 | @staticmethod 17 | def is_quantile_exact(): 18 | return False 19 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.dask.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.dask package 2 | ================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.dispatchers.dask.dask\_dispatcher module 8 | -------------------------------------------------- 9 | 10 | .. automodule:: hypernets.dispatchers.dask.dask_dispatcher 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Module contents 16 | --------------- 17 | 18 | .. automodule:: hypernets.dispatchers.dask 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | -------------------------------------------------------------------------------- /hypernets/tabular/feature_generators/_base.py: -------------------------------------------------------------------------------- 1 | import featuretools as ft 2 | from hypernets.utils import Version 3 | 4 | FT_V0 = Version(ft.__version__) < Version('1.0') 5 | 6 | if FT_V0: 7 | from featuretools.variable_types import Categorical, LatLong, NaturalLanguage, Datetime, Numeric, Unknown 8 | 9 | 10 | def ColumnSchema(*, logical_type, semantic_tags=None): 11 | return logical_type 12 | 13 | else: 14 | from woodwork.logical_types import Categorical, LatLong, NaturalLanguage, Datetime, Double as Numeric, Unknown 15 | from woodwork.column_schema import ColumnSchema 16 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_cuml/feature_importance_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | from ..feature_importance_test import TestPermutationImportance as _TestPermutationImportance 7 | from . import if_cuml_ready, is_cuml_installed 8 | 9 | if is_cuml_installed: 10 | import cudf 11 | 12 | 13 | @if_cuml_ready 14 | class TestCumlPermutationImportance(_TestPermutationImportance): 15 | @staticmethod 16 | def load_data(): 17 | df = _TestPermutationImportance.load_data() 18 | df = cudf.from_pandas(df) 19 | return df 20 | -------------------------------------------------------------------------------- /hypernets/tabular/dask_ex/_collinearity.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | import dask 7 | 8 | from ._transformers import SafeOrdinalEncoder 9 | from ..collinearity import MultiCollinearityDetector 10 | 11 | 12 | class DaskMultiCollinearityDetector(MultiCollinearityDetector): 13 | def _value_counts(self, X): 14 | n_values = super()._value_counts(X) 15 | return dask.compute(*n_values) 16 | 17 | def _corr(self, X, method=None): 18 | Xt = SafeOrdinalEncoder().fit_transform(X) 19 | corr = Xt.corr(method='pearson' if method is None else method).compute().values 20 | return corr 21 | -------------------------------------------------------------------------------- /hypernets/tests/searchers/test_moo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from hypernets.core.pareto import pareto_dominate 4 | from hypernets.searchers.genetic import Individual 5 | 6 | 7 | def test_dominate(): 8 | s1 = np.array([0.5, 0.6]) 9 | s2 = np.array([0.4, 0.6]) 10 | assert pareto_dominate(s2, s1) 11 | 12 | s3 = np.array([0.3, 0.7]) 13 | assert not pareto_dominate(s2, s3) 14 | 15 | s4 = np.array([0.2, 0.5]) 16 | assert not pareto_dominate(s3, s4) 17 | 18 | # different direction 19 | s5 = np.array([0.8, 100]) 20 | s6 = np.array([0.7, 101]) 21 | assert pareto_dominate(s5, s6, directions=('max', 'min')) 22 | -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/_estimator_detector.py: -------------------------------------------------------------------------------- 1 | from ..estimator_detector import EstimatorDetector 2 | 3 | 4 | class CumlEstimatorDetector(EstimatorDetector): 5 | def __call__(self, *args, **kwargs): 6 | from .. import CumlToolBox 7 | result = super(CumlEstimatorDetector, self).__call__(*args, **kwargs) 8 | 9 | estimator = self.create_estimator(self.get_estimator_cls()) 10 | X, y = self.prepare_data() 11 | X, y = CumlToolBox.from_local(X, y) 12 | 13 | try: 14 | self.fit_estimator(estimator, X, y) 15 | result.add('fitted_with_cuml') 16 | except: 17 | pass 18 | 19 | return result 20 | -------------------------------------------------------------------------------- /hypernets/tests/hyperctl/job_template.yml: -------------------------------------------------------------------------------- 1 | params: 2 | learning_rate: [0.1,0.5] 3 | dataset: ['path/d1.csv','path/d2.csv'] 4 | 5 | command: python3 cli.py 6 | working_dir: /tmp/code 7 | 8 | resource: 9 | cpu: 2 10 | ram: 1024 11 | gpu: 1 12 | 13 | server: 14 | port: 8060 15 | 16 | scheduler: 17 | interval: 1 18 | exit_on_finish: True 19 | 20 | backend: 21 | type: remote 22 | conf: 23 | machines: 24 | - connection: 25 | hostname: host1 26 | username: hyperctl 27 | ssh_rsa_file: ~/.ssh/id_rsa 28 | - connection: 29 | hostname: host2 30 | username: hyperctl 31 | password: hyperctl 32 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_dask/feature_importance_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | import pytest 7 | 8 | from . import if_dask_ready, is_dask_installed 9 | from ..feature_importance_test import TestPermutationImportance as _TestPermutationImportance 10 | 11 | if is_dask_installed: 12 | import dask.dataframe as dd 13 | 14 | 15 | @if_dask_ready 16 | @pytest.mark.xfail(reasone='to be fixed') 17 | class TestDaskPermutationImportance(_TestPermutationImportance): 18 | @staticmethod 19 | def load_data(): 20 | df = _TestPermutationImportance.load_data() 21 | df = dd.from_pandas(df, npartitions=2) 22 | return df 23 | -------------------------------------------------------------------------------- /docs/source/hypernets.pipeline.rst: -------------------------------------------------------------------------------- 1 | hypernets.pipeline package 2 | ========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.pipeline.base module 8 | ------------------------------ 9 | 10 | .. automodule:: hypernets.pipeline.base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.pipeline.transformers module 16 | -------------------------------------- 17 | 18 | .. automodule:: hypernets.pipeline.transformers 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: hypernets.pipeline 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /hypernets/core/random_state.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | import numpy as np 7 | 8 | _hypernets_random_state = None 9 | 10 | 11 | def set_random_state(seed): 12 | global _hypernets_random_state 13 | if seed is None: 14 | _hypernets_random_state = None 15 | else: 16 | _hypernets_random_state = np.random.RandomState(seed=seed) 17 | 18 | 19 | def get_random_state(): 20 | global _hypernets_random_state 21 | if _hypernets_random_state is None: 22 | return np.random.RandomState() 23 | else: 24 | return _hypernets_random_state 25 | 26 | 27 | def randint(): 28 | return get_random_state().randint(0, 65535) 29 | -------------------------------------------------------------------------------- /hypernets/tabular/dask_ex/_feature_generators.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | from ..feature_generators import FeatureGenerationTransformer 7 | from ..feature_generators import is_feature_generator_ready as _is_feature_generator_ready 8 | 9 | is_feature_generator_ready = _is_feature_generator_ready 10 | 11 | 12 | class DaskFeatureGenerationTransformer(FeatureGenerationTransformer): 13 | def _fix_input(self, X, y, for_fit=True): 14 | from ._toolbox import DaskToolBox 15 | 16 | X, y = super()._fix_input(X, y, for_fit=for_fit) 17 | X, y = [DaskToolBox.make_divisions_known(t) if DaskToolBox.is_dask_object(t) else t for t in (X, y)] 18 | 19 | return X, y 20 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.predict.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.predict package 2 | ===================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | hypernets.dispatchers.predict.grpc 11 | 12 | Submodules 13 | ---------- 14 | 15 | hypernets.dispatchers.predict.predict\_helper module 16 | ---------------------------------------------------- 17 | 18 | .. automodule:: hypernets.dispatchers.predict.predict_helper 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: hypernets.dispatchers.predict 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/source/hypernets.examples.rst: -------------------------------------------------------------------------------- 1 | hypernets.examples package 2 | ========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.examples.plain\_model module 8 | -------------------------------------- 9 | 10 | .. automodule:: hypernets.examples.plain_model 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.examples.smoke\_testing module 16 | ---------------------------------------- 17 | 18 | .. automodule:: hypernets.examples.smoke_testing 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: hypernets.examples 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /hypernets/dispatchers/run_predict_server.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import argparse 3 | 4 | from hypernets.dispatchers.predict.grpc.predict_service import serve 5 | 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser('start predict server.') 9 | parser.add_argument('--port', '-port', 10 | type=int, default=8030, 11 | help='tcp port of the predict server') 12 | args, argv = parser.parse_known_args() 13 | 14 | server, _ = serve(f'0.0.0.0:{args.port}', ' '.join(argv)) 15 | server.wait_for_termination() 16 | 17 | 18 | if __name__ == '__main__': 19 | try: 20 | main() 21 | print('done') 22 | except KeyboardInterrupt as e: 23 | print(e) 24 | -------------------------------------------------------------------------------- /hypernets/tabular/dask_ex/_drift_detection.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | from hypernets.utils import logging 7 | from ..drift_detection import FeatureSelectorWithDriftDetection, DriftDetector 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class DaskFeatureSelectionWithDriftDetector(FeatureSelectorWithDriftDetection): 13 | parallelizable = False 14 | 15 | @staticmethod 16 | def get_detector(preprocessor=None, estimator=None, random_state=9527): 17 | return DaskDriftDetector(preprocessor=preprocessor, estimator=estimator, random_state=random_state) 18 | 19 | 20 | class DaskDriftDetector(DriftDetector): 21 | @staticmethod 22 | def _copy_data(X): 23 | return X.copy() 24 | -------------------------------------------------------------------------------- /hypernets/core/context.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class Context(metaclass=abc.ABCMeta): 5 | 6 | def get(self, key): 7 | raise NotImplementedError 8 | 9 | def put(self, key, value): 10 | raise NotImplementedError 11 | 12 | 13 | class DefaultContext(Context): 14 | 15 | def __init__(self): 16 | super(DefaultContext, self).__init__() 17 | self._map = {} 18 | 19 | def put(self, key, value): 20 | self._map[key] = value 21 | 22 | def get(self, key): 23 | return self._map.get(key) 24 | 25 | # def __getstate__(self): 26 | # states = dict(self.__dict__) 27 | # if '_map' in states: # mark _map as transient 28 | # states['_map'] = {} 29 | # return states 30 | -------------------------------------------------------------------------------- /hypernets/experiment/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from ._experiment import Experiment, ExperimentCallback 8 | from .general import GeneralExperiment 9 | from .compete import CompeteExperiment, SteppedExperiment, StepNames 10 | from ._extractor import ExperimentExtractor, ExperimentMeta, DatasetMeta, StepMeta, \ 11 | StepType, EarlyStoppingStatusMeta, EarlyStoppingConfigMeta, ConfusionMatrixMeta 12 | from ._callback import ConsoleCallback, SimpleNotebookCallback, MLReportCallback, \ 13 | MLEvaluateCallback, ResourceUsageMonitor, ABSExpVisExperimentCallback, ABSExpVisHyperModelCallback, ActionType 14 | from ._maker import make_experiment, default_experiment_callbacks, default_search_callbacks 15 | -------------------------------------------------------------------------------- /docs/source/release_note_025.rst: -------------------------------------------------------------------------------- 1 | Version 0.2.5 2 | ------------- 3 | 4 | We add a few new features to this version: 5 | 6 | * Toolbox: A general computing layer for tabular data 7 | - Provide implementations of pandas, dask and cudf data types 8 | - DefaultToolbox (Numpy + Pandas + Sklearn) 9 | - DaskToolbox (DaskCore + DaskML) 10 | - CumlToolBox (Cupy + Cudf + Cuml) 11 | 12 | 13 | * HyperCtl: A tool package for multi-job management 14 | - Support sequencial jobs with multi-parameter settings 15 | - Support parallel jobs in remote multi-machines 16 | 17 | 18 | * Export experiment report (.xlsx) 19 | - Include information of engineering features, ensembled models, evaluation scores, resource usages, etc. 20 | - Generate plots automatically 21 | -------------------------------------------------------------------------------- /hypernets/utils/const.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | TASK_AUTO = 'auto' 8 | TASK_BINARY = 'binary' 9 | TASK_MULTICLASS = 'multiclass' 10 | TASK_REGRESSION = 'regression' 11 | TASK_MULTILABEL = 'multilabel' 12 | 13 | 14 | COLUMNNAME_POSTFIX_DISCRETE = '_discrete' 15 | COLUMNNAME_POSTFIX_CATEGORIZE = '_cat' 16 | 17 | # DATATYPE_TENSOR_FLOAT = 'float32' 18 | # DATATYPE_PREDICT_CLASS = 'int32' 19 | DATATYPE_LABEL = 'int16' 20 | 21 | 22 | SEARCHER_SOO = "soo" 23 | SEARCHER_MOO = "moo" 24 | 25 | COMBINATION_SHUFFLE = "shuffle" 26 | COMBINATION_UNIFORM = "uniform" 27 | COMBINATION_SINGLE_POINT = "single_point" 28 | 29 | DECOMPOSITION_TCHE = "tchebicheff" 30 | DECOMPOSITION_WS = "weighted_sum" 31 | DECOMPOSITION_PBI = "pbi" 32 | -------------------------------------------------------------------------------- /hypernets/tests/hyperctl/test_cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from pathlib import Path 4 | 5 | from hypernets.hyperctl import cli, utils 6 | 7 | SRC_DIR = Path(__file__).parent 8 | 9 | 10 | def test_run_generate_job_specs(): 11 | batch_config_path = (SRC_DIR / "job_template.yml").as_posix() 12 | fd, fp = tempfile.mkstemp(prefix="jobs_spec_", suffix=".json") 13 | os.close(fd) 14 | os.remove(fp) 15 | 16 | cli.run_generate_job_specs(batch_config_path, fp) 17 | fp_ = Path(fp) 18 | 19 | assert fp_.exists() 20 | jobs_spec = utils.load_json(fp) 21 | assert len(jobs_spec['jobs']) == 4 22 | assert 'server' in jobs_spec 23 | assert 'name' in jobs_spec 24 | assert len(jobs_spec['backend']['conf']['machines']) == 2 25 | os.remove(fp_) 26 | -------------------------------------------------------------------------------- /hypernets/core/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from .search_space import HyperNode, HyperSpace, ParameterSpace, ModuleSpace, \ 6 | Int, Real, Bool, Constant, Choice, MultipleChoice, Dynamic, Cascade, get_default_space 7 | from .ops import HyperInput, Identity, ConnectionSpace, Optional, ModuleChoice, Sequential, Permutation, \ 8 | Repeat, InputChoice, ConnectLooseEnd, Reduction 9 | from .searcher import OptimizeDirection, Searcher 10 | from .callbacks import Callback, FileStorageLoggingCallback, SummaryCallback, \ 11 | EarlyStoppingCallback, EarlyStoppingError, NotebookCallback, ProgressiveCallback 12 | from .trial import Trial, TrialStore, TrialHistory, DiskTrialStore 13 | from .dispatcher import Dispatcher 14 | from .random_state import set_random_state, get_random_state, randint 15 | -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/_pseudo_labeling.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | from hypernets.utils import logging 7 | from ..pseudo_labeling import PseudoLabeling 8 | 9 | logger = logging.get_logger(__name__) 10 | 11 | 12 | class CumlPseudoLabeling(PseudoLabeling): 13 | import cupy as np 14 | 15 | def _filter_by_quantile(self, proba): 16 | """ 17 | cupy does not support *nanquantile* 18 | """ 19 | np = self.np 20 | 21 | q = [] 22 | for i in range(proba.shape[1]): 23 | p = proba[:, i] 24 | p = p[p > 0.] 25 | if len(p) > 0: 26 | q.append(np.quantile(p, self.quantile)) 27 | else: 28 | q.append(1.) 29 | selected = (proba >= np.array(q)) 30 | return selected 31 | -------------------------------------------------------------------------------- /hypernets/tests/utils/estimators_test.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from sklearn.datasets import load_iris 4 | from sklearn.tree import DecisionTreeClassifier 5 | 6 | from hypernets.utils import get_tree_importances 7 | 8 | 9 | def test_get_tree_importances(): 10 | X, y = load_iris(return_X_y=True) 11 | rfc = DecisionTreeClassifier().fit(X, y) 12 | print(rfc) 13 | imps_dict = get_tree_importances(rfc) 14 | assert len(imps_dict.keys()) == 4 15 | for c in ['col_1', 'col_2', 'col_3', 'col_0']: 16 | assert c in imps_dict.keys() 17 | 18 | values_type = list(set(map(lambda v: type(v), imps_dict.values()))) 19 | 20 | assert len(values_type) == 1 21 | assert values_type[0] == int or values_type[0] == float # not numpy type 22 | assert json.dumps(imps_dict) # has only python base type 23 | -------------------------------------------------------------------------------- /hypernets/tabular/feature_generators/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | # from ._primitives import CrossCategorical, GeoHashPrimitive, DaskCompatibleHaversine, TfidfPrimitive 6 | # from ._transformers import FeatureGenerationTransformer, is_geohash_installed 7 | 8 | try: 9 | from ._transformers import FeatureGenerationTransformer, is_geohash_installed 10 | 11 | is_feature_generator_ready = True 12 | except ImportError as e: 13 | _msg = f'{e}, install featuretools and try again' 14 | 15 | is_geohash_installed = False 16 | is_feature_generator_ready = False 17 | 18 | from sklearn.base import BaseEstimator as _BaseEstimator 19 | 20 | 21 | class FeatureGenerationTransformer(_BaseEstimator): 22 | def __init__(self, *args, **kwargs): 23 | raise ImportError(_msg) 24 | -------------------------------------------------------------------------------- /hypernets/tests/hyperctl/local_batch.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "local-batch-example", 3 | "job_command": "sleep 100;echo \"finished\"", 4 | "jobs": [ 5 | { 6 | "name": "job1", 7 | "params": { 8 | "learning_rate": 0.1 9 | }, 10 | 11 | "assets":[ 12 | "/tmp/file-a", 13 | "/tmp/dir-a" 14 | ] 15 | },{ 16 | "name": "job2", 17 | "params": { 18 | "learning_rate": 0.2 19 | } 20 | } 21 | ], 22 | "backend": { 23 | "type": "local", 24 | "conf": {} 25 | }, 26 | "scheduler": { 27 | "interval": 5000, 28 | "exit_on_finish": true 29 | }, 30 | "server": { 31 | "host": "localhost", 32 | "port": 8060 33 | } 34 | } -------------------------------------------------------------------------------- /hypernets/dispatchers/process/grpc/proto/proc.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | 4 | package hypernets.dispatchers.process.grpc.proto; 5 | 6 | 7 | service ProcessBroker { 8 | rpc run(stream ProcessRequest) returns (stream DataChunk) {} 9 | rpc download(DownloadRequest) returns (stream DataChunk) {} 10 | } 11 | 12 | message ProcessRequest { 13 | string program = 1; 14 | repeated string args = 2; 15 | string cwd = 3; 16 | int32 buffer_size = 4; 17 | string encoding = 5; 18 | } 19 | 20 | message DownloadRequest { 21 | string peer = 1; 22 | string path = 2; 23 | int32 buffer_size = 3; 24 | string encoding = 4; 25 | } 26 | 27 | message DataChunk { 28 | enum DataKind { 29 | IN = 0; 30 | OUT = 1; 31 | ERR = 2; 32 | DATA = 10; 33 | END = 99; 34 | EXCEPTION = 400; 35 | } 36 | DataKind kind = 1; 37 | bytes data = 2; 38 | } 39 | -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/_drift_detection.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from ..drift_detection import FeatureSelectorWithDriftDetection, DriftDetector 6 | 7 | 8 | class CumlFeatureSelectorWithDriftDetection(FeatureSelectorWithDriftDetection): 9 | # parallelizable = False 10 | def _score_features(self, X_merged, y, scorer, cv): 11 | from . import CumlToolBox 12 | X_merged, y = CumlToolBox.to_local(X_merged, y) 13 | return super()._score_features(X_merged, y, scorer, cv) 14 | 15 | @staticmethod 16 | def get_detector(preprocessor=None, estimator=None, random_state=None): 17 | return CumlDriftDetector(preprocessor=preprocessor, estimator=estimator, random_state=random_state) 18 | 19 | 20 | class CumlDriftDetector(DriftDetector): 21 | @staticmethod 22 | def _copy_data(X): 23 | return X.copy() 24 | -------------------------------------------------------------------------------- /docs/source/hypernets.model.rst: -------------------------------------------------------------------------------- 1 | hypernets.model package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.model.estimator module 8 | -------------------------------- 9 | 10 | .. automodule:: hypernets.model.estimator 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.model.hyper\_model module 16 | ----------------------------------- 17 | 18 | .. automodule:: hypernets.model.hyper_model 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.model.objectives module 24 | --------------------------------- 25 | 26 | .. automodule:: hypernets.model.objectives 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: hypernets.model 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /hypernets/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | import sys as sys_ 7 | try: 8 | from packaging.version import Version 9 | except ModuleNotFoundError: 10 | from distutils.version import LooseVersion as Version 11 | 12 | is_os_windows = sys_.platform.find('win') == 0 13 | is_os_darwin = sys_.platform.find('darwin') == 0 14 | is_os_linux = sys_.platform.find('linux') == 0 15 | 16 | from ._doc_lens import DocLens 17 | from ._fsutils import filesystem as fs 18 | from ._tic_tok import tic_toc, report as tic_toc_report, report_as_dataframe as tic_toc_report_as_dataframe 19 | from .common import generate_id, combinations, isnotebook, Counter, to_repr, get_params, context, profile 20 | from .common import load_module 21 | from ._estimators import load_estimator, save_estimator, get_tree_importances 22 | from ._perf import get_perf, dump_perf, load_perf 23 | -------------------------------------------------------------------------------- /docs/source/release_note_030.rst: -------------------------------------------------------------------------------- 1 | Version 0.3.0 2 | ------------- 3 | 4 | We add a few new features to this version: 5 | 6 | * Multi-objectives optimization 7 | 8 | * optimization algorithm 9 | - add MOEA/D(Multiobjective Evolutionary Algorithm Based on Decomposition) 10 | - add Tchebycheff, Weighted Sum, Penalty-based boundary intersection approach(PBI) decompose approachs 11 | - add shuffle crossover, uniform crossover, single point crossover strategies for GA based algorithms 12 | - automatically normalize objectives of different dimensions 13 | - automatically convert maximization problem to minimization problem 14 | - add NSGA-II(Non-dominated Sorting Genetic Algorithm) 15 | - add R-NSGA-II(A new dominance relation for multicriteria decision making) 16 | 17 | * builtin objectives 18 | - number of features 19 | - prediction performance 20 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /hypernets/dispatchers/cluster/grpc/proto/spec.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package hypernets.dispatchers.cluster.grpc.proto; 4 | 5 | 6 | service SearchDriver { 7 | rpc ping(PingMessage) returns (PingMessage){} 8 | rpc search(stream SearchRequest) returns (stream SearchResponse){} 9 | } 10 | 11 | message PingMessage{ 12 | string message = 1; 13 | } 14 | 15 | message SearchRequest { 16 | string search_id = 1; 17 | string trial_no = 2; 18 | string space_id = 3; 19 | bool success = 4; 20 | float reward = 5; 21 | string message = 6; 22 | } 23 | 24 | 25 | message SearchResponse { 26 | enum SearchResponseCode{ 27 | OK = 0; 28 | WAITING = 11; 29 | FINISHED = 12; 30 | FAILED = 99; 31 | } 32 | 33 | SearchResponseCode code = 1; 34 | string search_id = 2; 35 | string trial_no = 3; 36 | string space_id = 4; 37 | string space_file = 5; 38 | string model_file = 6; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /hypernets/tabular/dask_ex/_model_selection.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import dask.array as da 6 | import dask.dataframe as dd 7 | from sklearn import model_selection as sk_sel 8 | 9 | 10 | def _fake_X_y(X, y): 11 | if isinstance(X, dd.DataFrame): 12 | X = X.index.to_frame() 13 | X.set_index(0) 14 | X = X.compute() 15 | 16 | if isinstance(y, (dd.Series, dd.DataFrame, da.Array)): 17 | y = y.compute() 18 | 19 | return X, y 20 | 21 | 22 | class FakeDaskKFold(sk_sel.KFold): 23 | def split(self, X, y=None, groups=None): 24 | X, y = _fake_X_y(X, y) 25 | yield from super().split(X, y, groups=groups) 26 | 27 | 28 | class FakeDaskStratifiedKFold(sk_sel.StratifiedKFold): 29 | def split(self, X, y, groups=None): 30 | assert y is not None 31 | 32 | X, y = _fake_X_y(X, y) 33 | yield from super().split(X, y, groups=groups) 34 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.cluster.grpc.proto.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.cluster.grpc.proto package 2 | ================================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.dispatchers.cluster.grpc.proto.spec\_pb2 module 8 | --------------------------------------------------------- 9 | 10 | .. automodule:: hypernets.dispatchers.cluster.grpc.proto.spec_pb2 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.dispatchers.cluster.grpc.proto.spec\_pb2\_grpc module 16 | --------------------------------------------------------------- 17 | 18 | .. automodule:: hypernets.dispatchers.cluster.grpc.proto.spec_pb2_grpc 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: hypernets.dispatchers.cluster.grpc.proto 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.process.grpc.proto.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.process.grpc.proto package 2 | ================================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.dispatchers.process.grpc.proto.proc\_pb2 module 8 | --------------------------------------------------------- 9 | 10 | .. automodule:: hypernets.dispatchers.process.grpc.proto.proc_pb2 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.dispatchers.process.grpc.proto.proc\_pb2\_grpc module 16 | --------------------------------------------------------------- 17 | 18 | .. automodule:: hypernets.dispatchers.process.grpc.proto.proc_pb2_grpc 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: hypernets.dispatchers.process.grpc.proto 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /hypernets/searchers/random_searcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from ..core.searcher import Searcher, OptimizeDirection 6 | 7 | 8 | class RandomSearcher(Searcher): 9 | def __init__(self, space_fn, optimize_direction=OptimizeDirection.Minimize, space_sample_validation_fn=None): 10 | Searcher.__init__(self, space_fn, optimize_direction, space_sample_validation_fn=space_sample_validation_fn) 11 | 12 | @property 13 | def parallelizable(self): 14 | return True 15 | 16 | def sample(self, space_options=None): 17 | sample = self._sample_and_check(self._random_sample) 18 | return sample 19 | 20 | def get_best(self): 21 | raise NotImplementedError 22 | 23 | def update_result(self, space, result): 24 | pass 25 | 26 | def reset(self): 27 | raise NotImplementedError 28 | 29 | def export(self): 30 | raise NotImplementedError 31 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.predict.grpc.proto.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.predict.grpc.proto package 2 | ================================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.dispatchers.predict.grpc.proto.predict\_pb2 module 8 | ------------------------------------------------------------ 9 | 10 | .. automodule:: hypernets.dispatchers.predict.grpc.proto.predict_pb2 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.dispatchers.predict.grpc.proto.predict\_pb2\_grpc module 16 | ------------------------------------------------------------------ 17 | 18 | .. automodule:: hypernets.dispatchers.predict.grpc.proto.predict_pb2_grpc 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: hypernets.dispatchers.predict.grpc.proto 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /hypernets/tabular/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from ._base import get_tool_box, register_toolbox, register_transformer, tb_transformer 6 | from .toolbox import ToolBox 7 | 8 | register_toolbox(ToolBox, aliases=('default', 'pandas')) 9 | 10 | try: 11 | import dask.dataframe as dd 12 | 13 | import dask_ml 14 | from .dask_ex import DaskToolBox 15 | 16 | register_toolbox(DaskToolBox, pos=0, aliases=('dask',)) 17 | is_dask_installed = True 18 | except ImportError: 19 | # import traceback 20 | # traceback.print_exc() 21 | is_dask_installed = False 22 | 23 | try: 24 | import cupy 25 | import cudf 26 | import cuml 27 | from .cuml_ex import CumlToolBox 28 | 29 | register_toolbox(CumlToolBox, pos=0, aliases=('cuml', 'rapids')) 30 | is_cuml_installed = True 31 | except ImportError: 32 | # import traceback 33 | # 34 | # traceback.print_exc() 35 | is_cuml_installed = False 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/00-bug-issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Issue 3 | about: Use this template for reporting a bug 4 | labels: 'type:bug' 5 | 6 | --- 7 | 8 | Please make sure that this is a bug. 9 | 10 | **System information** 11 | - OS Platform and Distribution (e.g., CentOS 7.6): 12 | - Python version: 13 | - Hypernets version: 14 | - Other Python packages(run `pip list`): 15 | 16 | 17 | **Describe the current behavior** 18 | 19 | 20 | **Describe the expected behavior** 21 | 22 | 23 | **Standalone code to reproduce the issue** 24 | Provide a reproducible test case that is the bare minimum necessary to generate 25 | the problem. If possible, please share a link to Jupyter notebook. 26 | 27 | 28 | **Are you willing to submit PR?(Yes/No)** 29 | 30 | 31 | **Other info / logs** 32 | Include any logs or source code that would be helpful to diagnose the problem. 33 | If including tracebacks, please include the full traceback. Large logs and files 34 | should be attached. 35 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Hypernets 2 | ===================== 3 | 4 | Hypernets: A General Automated Machine Learning Framework 5 | ######################################################### 6 | 7 | Hypernets is a general AutoML framework that can meet various needs such as feature engineering, hyperparameter optimization, and neural architecture search, thereby helping users achieve the end-to-end automated machine learning pipelines. 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Home: 12 | 13 | Overview 14 | Quick Start 15 | Search Space 16 | Searchers 17 | HyperModels 18 | Neural Architecture Search 19 | Experiment 20 | Hyperctl 21 | API 22 | Release Notes 23 | FAQ 24 | 25 | Indices and tables 26 | ================== 27 | 28 | * :ref:`genindex` 29 | * :ref:`modindex` 30 | * :ref:`search` 31 | -------------------------------------------------------------------------------- /docs/source/hypernets.tabular.ensemble.rst: -------------------------------------------------------------------------------- 1 | hypernets.tabular.ensemble package 2 | ================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.tabular.ensemble.base\_ensemble module 8 | ------------------------------------------------ 9 | 10 | .. automodule:: hypernets.tabular.ensemble.base_ensemble 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.tabular.ensemble.stacking module 16 | ------------------------------------------ 17 | 18 | .. automodule:: hypernets.tabular.ensemble.stacking 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.tabular.ensemble.voting module 24 | ---------------------------------------- 25 | 26 | .. automodule:: hypernets.tabular.ensemble.voting 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: hypernets.tabular.ensemble 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.predict.grpc.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.predict.grpc package 2 | ========================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | hypernets.dispatchers.predict.grpc.proto 11 | 12 | Submodules 13 | ---------- 14 | 15 | hypernets.dispatchers.predict.grpc.predict\_client module 16 | --------------------------------------------------------- 17 | 18 | .. automodule:: hypernets.dispatchers.predict.grpc.predict_client 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.dispatchers.predict.grpc.predict\_service module 24 | ---------------------------------------------------------- 25 | 26 | .. automodule:: hypernets.dispatchers.predict.grpc.predict_service 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: hypernets.dispatchers.predict.grpc 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.cluster.grpc.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.cluster.grpc package 2 | ========================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | hypernets.dispatchers.cluster.grpc.proto 11 | 12 | Submodules 13 | ---------- 14 | 15 | hypernets.dispatchers.cluster.grpc.search\_driver\_client module 16 | ---------------------------------------------------------------- 17 | 18 | .. automodule:: hypernets.dispatchers.cluster.grpc.search_driver_client 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.dispatchers.cluster.grpc.search\_driver\_service module 24 | ----------------------------------------------------------------- 25 | 26 | .. automodule:: hypernets.dispatchers.cluster.grpc.search_driver_service 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: hypernets.dispatchers.cluster.grpc 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /hypernets/tabular/dask_ex/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from ._toolbox import DaskToolBox 6 | 7 | try: 8 | import dask_ml.preprocessing as dm_pre 9 | import dask_ml.model_selection as dm_sel 10 | 11 | from dask_ml.impute import SimpleImputer 12 | from dask_ml.compose import ColumnTransformer 13 | from dask_ml.preprocessing import \ 14 | LabelEncoder, OneHotEncoder, OrdinalEncoder, \ 15 | StandardScaler, MinMaxScaler, RobustScaler 16 | 17 | from ._transformers import \ 18 | SafeOneHotEncoder, TruncatedSVD, \ 19 | MaxAbsScaler, SafeOrdinalEncoder, DataInterceptEncoder, \ 20 | CallableAdapterEncoder, DataCacher, CacheCleaner, \ 21 | LgbmLeavesEncoder, CategorizeEncoder, MultiKBinsDiscretizer, \ 22 | LocalizedTfidfVectorizer, \ 23 | MultiVarLenFeatureEncoder, DataFrameWrapper 24 | 25 | from ..sklearn_ex import PassThroughEstimator 26 | 27 | dask_ml_available = True 28 | except ImportError: 29 | # Not found dask_ml 30 | dask_ml_available = False 31 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.process.grpc.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.process.grpc package 2 | ========================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | hypernets.dispatchers.process.grpc.proto 11 | 12 | Submodules 13 | ---------- 14 | 15 | hypernets.dispatchers.process.grpc.process\_broker\_client module 16 | ----------------------------------------------------------------- 17 | 18 | .. automodule:: hypernets.dispatchers.process.grpc.process_broker_client 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.dispatchers.process.grpc.process\_broker\_service module 24 | ------------------------------------------------------------------ 25 | 26 | .. automodule:: hypernets.dispatchers.process.grpc.process_broker_service 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: hypernets.dispatchers.process.grpc 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /hypernets/tests/experiment/run_export_experiment_report.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from sklearn.model_selection import train_test_split 4 | 5 | from hypernets.examples.plain_model import PlainModel, PlainSearchSpace 6 | from hypernets.experiment import make_experiment 7 | from hypernets.tabular.datasets import dsutils 8 | 9 | 10 | def main(): 11 | df = dsutils.load_boston() 12 | 13 | df_train, df_eval = train_test_split(df, test_size=0.2) 14 | search_space = PlainSearchSpace(enable_lr=False, enable_nn=False, enable_dt=False, enable_dtr=True) 15 | 16 | experiment = make_experiment(PlainModel, df_train, 17 | target='target', 18 | search_space=search_space, 19 | log_level='info', 20 | random_state=8086, 21 | report_render='excel') 22 | estimator = experiment.run(max_trials=10) 23 | print(estimator) 24 | 25 | 26 | if __name__ == '__main__': 27 | t = time.time() 28 | main() 29 | print(time.time() - t) 30 | -------------------------------------------------------------------------------- /hypernets/dispatchers/run_broker.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import argparse 3 | 4 | from hypernets.dispatchers.process.grpc.process_broker_service import serve 5 | 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser('run HyperNets process broker.') 9 | parser.add_argument('--host', '-host', 10 | default='0.0.0.0', 11 | help='broker hostname or ip address' 12 | + ', default "0.0.0.0"') 13 | parser.add_argument('--port', '-port', 14 | type=int, default=8010, 15 | help='broker tcp port, default 8010') 16 | parser.add_argument('--workers', '-workers', 17 | type=int, default=10, 18 | help='max worker count, default 10') 19 | 20 | args = parser.parse_args() 21 | 22 | server, _ = serve(f'{args.host}:{args.port}', args.workers) 23 | server.wait_for_termination() 24 | 25 | 26 | if __name__ == '__main__': 27 | try: 28 | main() 29 | print('done') 30 | except KeyboardInterrupt as e: 31 | print(e) 32 | -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/_data_hasher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import cupy 6 | import cudf 7 | 8 | from ..data_hasher import DataHasher 9 | 10 | 11 | class CumlDataHasher(DataHasher): 12 | 13 | def _iter_data(self, data): 14 | if isinstance(data, cudf.DataFrame): 15 | yield from self._iter_cudf_dataframe(data) 16 | elif isinstance(data, cudf.Series): 17 | yield from self._iter_cudf_dataframe(data.to_frame()) 18 | elif isinstance(data, cupy.ndarray): 19 | yield from self._iter_cudf_dataframe(cudf.DataFrame(data), yield_columns=False) 20 | else: 21 | yield from super()._iter_data(data) 22 | 23 | @staticmethod 24 | def _iter_cudf_dataframe(df, yield_columns=True): 25 | if yield_columns: 26 | yield ','.join(map(str, df.columns.tolist())).encode('utf-8') 27 | 28 | if hasattr(df, 'hash_columns'): 29 | hashed = df.hash_columns() 30 | else: 31 | hashed = df.hash_values().values 32 | # hashed = cudf.DataFrame(hashed).T.hash_columns() 33 | yield cupy.asnumpy(hashed) 34 | -------------------------------------------------------------------------------- /.github/workflows/dist-builder.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | name: Build Python distribution 7 | 8 | on: workflow_dispatch 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | build_dist: 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.8", ] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install "setuptools>57.0" wheel 32 | pip list 33 | 34 | - name: Build package 35 | run: | 36 | python setup.py sdist bdist_wheel 37 | 38 | - uses: actions/upload-artifact@v3 39 | with: 40 | name: packages 41 | path: dist/* 42 | -------------------------------------------------------------------------------- /hypernets/dispatchers/run_predict.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import argparse 3 | 4 | from hypernets.dispatchers.predict.predict_helper import PredictHelper 5 | 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser('run predict.') 9 | parser.add_argument('--server', '-server', 10 | default='127.0.0.1:8030', 11 | help='predict server address, separated by comma') 12 | parser.add_argument('--chunk-size', '-chunk-size', 13 | type=int, default=1000, 14 | help='chunk line number') 15 | parser.add_argument('data_file', 16 | help='data file path') 17 | parser.add_argument('result_file', 18 | help='result file path') 19 | args = parser.parse_args() 20 | 21 | servers = list(filter(lambda s: len(s) > 0, args.server.split(','))) 22 | ph = PredictHelper(servers) 23 | ph.predict(args.data_file, args.result_file, args.chunk_size) 24 | 25 | 26 | if __name__ == '__main__': 27 | try: 28 | main() 29 | print('done') 30 | except KeyboardInterrupt as e: 31 | print(e) 32 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.process.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.process package 2 | ===================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | hypernets.dispatchers.process.grpc 11 | 12 | Submodules 13 | ---------- 14 | 15 | hypernets.dispatchers.process.grpc\_process module 16 | -------------------------------------------------- 17 | 18 | .. automodule:: hypernets.dispatchers.process.grpc_process 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.dispatchers.process.local\_process module 24 | --------------------------------------------------- 25 | 26 | .. automodule:: hypernets.dispatchers.process.local_process 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | hypernets.dispatchers.process.ssh\_process module 32 | ------------------------------------------------- 33 | 34 | .. automodule:: hypernets.dispatchers.process.ssh_process 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: hypernets.dispatchers.process 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /hypernets/tabular/evaluator/h2o.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from . import BaseEstimator 8 | import h2o 9 | from h2o.automl import H2OAutoML 10 | 11 | 12 | class H2OEstimator(BaseEstimator): 13 | def __init__(self, task, **kwargs): 14 | super(H2OEstimator, self).__init__(task) 15 | self.name = 'H2O AutoML' 16 | self.kwargs = kwargs 17 | self.estimator = None 18 | 19 | def train(self, X, y, X_test): 20 | h2o.init() 21 | target = '__tabular_toolbox_target__' 22 | X.insert(0, target, y) 23 | train = h2o.H2OFrame(X) 24 | x_cols = train.columns 25 | x_cols.remove(target) 26 | train[target] = train[target].asfactor() 27 | self.esitmator = H2OAutoML(max_models=20, seed=1) 28 | self.esitmator.train(x=x_cols, y=target, training_frame=train) 29 | 30 | def predict_proba(self, X): 31 | x = h2o.H2OFrame(X) 32 | preds = self.esitmator.predict(x) 33 | preds = preds[1:].as_data_frame().values 34 | return preds 35 | 36 | def predict(self, X): 37 | proba = self.predict_proba(X) 38 | return self.proba2predict(proba) 39 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.cluster.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers.cluster package 2 | ===================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | hypernets.dispatchers.cluster.grpc 11 | 12 | Submodules 13 | ---------- 14 | 15 | hypernets.dispatchers.cluster.cluster module 16 | -------------------------------------------- 17 | 18 | .. automodule:: hypernets.dispatchers.cluster.cluster 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.dispatchers.cluster.driver\_dispatcher module 24 | ------------------------------------------------------- 25 | 26 | .. automodule:: hypernets.dispatchers.cluster.driver_dispatcher 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | hypernets.dispatchers.cluster.executor\_dispatcher module 32 | --------------------------------------------------------- 33 | 34 | .. automodule:: hypernets.dispatchers.cluster.executor_dispatcher 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: hypernets.dispatchers.cluster 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /hypernets/tests/utils/tic_toc_test.py: -------------------------------------------------------------------------------- 1 | from hypernets.utils import tic_toc, tic_toc_report_as_dataframe 2 | from hypernets.tabular.datasets import dsutils 3 | 4 | 5 | @tic_toc(details=True) 6 | def fn_foo(a1, a2, k1=None, k2='foo'): 7 | pass 8 | 9 | 10 | class ClsBar: 11 | @tic_toc(details=False) 12 | def no_args(self): 13 | pass 14 | 15 | @tic_toc(details=True) 16 | def method_bar(self, a1, a2, k1=None, k2='foo'): 17 | pass 18 | 19 | 20 | def foo(): 21 | fn_foo(1, 2, k1='lalala') 22 | fn_foo('dict', {'a': 'aaa', 'b': 345}) 23 | fn_foo('list', list(range(5))) 24 | fn_foo('big-list', list(range(100))) 25 | fn_foo('big-range', range(100)) 26 | fn_foo('df', dsutils.load_blood()) 27 | fn_foo('ndarray', dsutils.load_blood().values) 28 | fn_foo('fn', foo) 29 | fn_foo('lambda', lambda: print('lambda')) 30 | fn_foo(['aaa', 3, 4, ['aaa', 'bbb']], 2, k2='lalala') 31 | 32 | 33 | def cls_foo(): 34 | x = ClsBar() 35 | x.method_bar(1, 2, k1='foo') 36 | x.method_bar('dict', {'a': 'aaa', 'b': 345}) 37 | x.no_args() 38 | 39 | 40 | def test_tic_toc(): 41 | foo() 42 | cls_foo() 43 | 44 | df = tic_toc_report_as_dataframe() 45 | print(df) 46 | -------------------------------------------------------------------------------- /docs/source/hypermodels.rst: -------------------------------------------------------------------------------- 1 | HyperModel 2 | ============= 3 | 4 | HyperModel is an abstract class that needs to implement a dedicated HyperModel for different frameworks or domains. HyperModel explore hyper-parameters sample from Searcher, fit and evaluate Estimator, then reward the metric score to Searcher for optimization. The figure below shows HyperModel search sequence. 5 | 6 | .. image:: images/hyper_model_search_sequence.png 7 | :width: 600 8 | :align: center 9 | :alt: search sequence 10 | 11 | 12 | Customize HyperModel 13 | ------------------------- 14 | 15 | To customize HyerModel, two components are required: 16 | 17 | * HyperModel: subclass of *hypernets.model.HyperModel*, create newer Estimator instance with searched space sample, and load trained estimator from storage. 18 | 19 | * Estimator: subclass of *hypernets.model.Estimator*, the core component for model fitting/evaluation/prediction/persistence. 20 | 21 | You can reference *hypernets.examples.plain_model.PlainModel* and *hypernets.examples.plain_model.PlainEstimator* as start point. See `DeepTables `_, `HyperGBM `_, `HyperKeras `_ for more details. 22 | -------------------------------------------------------------------------------- /docs/source/hypernets.experiment.rst: -------------------------------------------------------------------------------- 1 | hypernets.experiment package 2 | ============================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.experiment.cfg module 8 | ------------------------------- 9 | 10 | .. automodule:: hypernets.experiment.cfg 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.experiment.compete module 16 | ----------------------------------- 17 | 18 | .. automodule:: hypernets.experiment.compete 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.experiment.general module 24 | ----------------------------------- 25 | 26 | .. automodule:: hypernets.experiment.general 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | hypernets.experiment.job module 32 | ------------------------------- 33 | 34 | .. automodule:: hypernets.experiment.job 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | hypernets.experiment.report module 40 | ---------------------------------- 41 | 42 | .. automodule:: hypernets.experiment.report 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | Module contents 48 | --------------- 49 | 50 | .. automodule:: hypernets.experiment 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | -------------------------------------------------------------------------------- /hypernets/tests/core/mutable_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | from hypernets.core.ops import Identity 7 | from hypernets.core.search_space import * 8 | 9 | 10 | class Test_Mutable: 11 | def test_scope(self): 12 | with HyperSpace().as_default(): 13 | id1 = Identity() 14 | id2 = Identity(name='named_id') 15 | id3 = Identity() 16 | id4 = Identity(name='named_id_2') 17 | 18 | assert id1.name == 'Module_Identity_1' 19 | assert id1.id == 'Module_Identity_1' 20 | 21 | assert id2.name == 'named_id' 22 | assert id2.id == 'ID_named_id' 23 | 24 | assert id3.name == 'Module_Identity_2' 25 | assert id3.id == 'Module_Identity_2' 26 | 27 | assert id4.name == 'named_id_2' 28 | assert id4.id == 'ID_named_id_2' 29 | 30 | hp1 = Int(0, 100) 31 | hp2 = Real(0, 10.0) 32 | hp3 = Choice([1, 2, 3, 4]) 33 | 34 | assert hp1.name == 'Param_Int_1' 35 | assert hp1.id == 'Param_Int_1' 36 | 37 | assert hp2.name == 'Param_Real_1' 38 | assert hp2.id == 'Param_Real_1' 39 | 40 | assert hp3.name == 'Param_Choice_1' 41 | assert hp3.id == 'Param_Choice_1' 42 | -------------------------------------------------------------------------------- /hypernets/tests/trial/trial_store_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from hypernets.core.trial import * 6 | from hypernets.core.search_space import * 7 | from hypernets.core.ops import * 8 | from hypernets.tests import test_output_dir 9 | 10 | 11 | class Test_TrialStore(): 12 | def get_space(self): 13 | space = HyperSpace() 14 | with space.as_default(): 15 | id1 = Identity(p1=Choice([1, 2]), p2=Int(1, 100)) 16 | id2 = Identity(p3=Real(0, 1, step=0.2))(id1) 17 | id3 = Identity(p4=Dynamic(lambda p5: p5 * 3, p5=Choice([2, 4, 8])))(id2) 18 | return space 19 | 20 | def test_basic(self): 21 | store = DiskTrialStore(f'{test_output_dir}/trial_store') 22 | dataset_id = 'test_dataset' 23 | sample = self.get_space() 24 | sample.random_sample() 25 | 26 | trial = Trial(sample, 1, 0.99, 100) 27 | store.put(dataset_id, trial) 28 | store.reset() 29 | 30 | trial_get = store.get(dataset_id, sample) 31 | assert trial.trial_no == 1 32 | assert trial.reward == 0.99 33 | assert trial.elapsed == 100 34 | assert trial.space_sample.vectors == trial_get.space_sample.vectors 35 | 36 | trials = store.get_all(dataset_id, sample.signature) 37 | assert trials 38 | -------------------------------------------------------------------------------- /hypernets/tests/hyperctl/remote_batch.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "remote-batch-example", 3 | "job_command": "sleep 3;echo \"finished\"", 4 | "jobs": [ 5 | { 6 | "name": "job1", 7 | "params": { 8 | "learning_rate": 0.1 9 | } 10 | },{ 11 | "name": "job2", 12 | "params": { 13 | "learning_rate": 0.2 14 | } 15 | } 16 | ], 17 | "backend": { 18 | "type": "remote", 19 | "machines": [ 20 | { 21 | "connection": { 22 | "hostname": "host1", 23 | "username": "hyperctl", 24 | "password": "hyperctl" 25 | }, 26 | "environments": { 27 | "JAVA_HOME": "/usr/local/jdk" 28 | } 29 | }, 30 | { 31 | "connection":{ 32 | "hostname": "host2", 33 | "username": "hyperctl", 34 | "password": "hyperctl" 35 | } 36 | } 37 | ] 38 | }, 39 | "scheduler": { 40 | "interval": 5000, 41 | "exit_on_finish": false 42 | }, 43 | "server": { 44 | "host": "localhost", 45 | "port": 8061 46 | } 47 | } -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/_ensemble.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import cudf 6 | import cupy 7 | 8 | from hypernets.tabular.ensemble import GreedyEnsemble 9 | from ._transformer import Localizable, as_local_if_possible, copy_attrs_as_local 10 | 11 | 12 | class CumlGreedyEnsemble(GreedyEnsemble, Localizable): 13 | np = cupy 14 | 15 | @staticmethod 16 | def _to_local(y): 17 | if isinstance(y, cupy.ndarray): 18 | y = cupy.asnumpy(y) 19 | elif isinstance(y, cudf.Series): 20 | y = y.to_pandas() 21 | 22 | return y 23 | 24 | def _score(self, y_true, y_preds): 25 | y_true = self._to_local(y_true) 26 | y_preds = list(map(self._to_local, y_preds)) 27 | 28 | r = super()._score(y_true, y_preds) 29 | return r 30 | 31 | def as_local(self): 32 | estimators = list(map(as_local_if_possible, self.estimators)) 33 | target = GreedyEnsemble(estimators=estimators, task=self.task, need_fit=self.need_fit, 34 | n_folds=self.n_folds, method=self.method, random_state=self.random_state, 35 | scoring=self.scoring, ensemble_size=self.ensemble_size) 36 | copy_attrs_as_local(self, target, 'weights_', 'scores_', 'hits_', 'best_stack_') 37 | return target 38 | -------------------------------------------------------------------------------- /hypernets/dispatchers/predict/grpc/predict_client.py: -------------------------------------------------------------------------------- 1 | import grpc 2 | 3 | from hypernets.dispatchers.predict.grpc.proto import predict_pb2_grpc 4 | from hypernets.dispatchers.predict.grpc.proto.predict_pb2 import PredictRequest 5 | from hypernets.utils import logging 6 | 7 | logger = logging.get_logger(__name__) 8 | 9 | 10 | class PredictClient(object): 11 | 12 | def __init__(self, server): 13 | super(PredictClient, self).__init__() 14 | self.channel = grpc.insecure_channel(server) 15 | self.stub = predict_pb2_grpc.PredictServiceStub(self.channel) 16 | 17 | self.server = server 18 | self._closed = False 19 | 20 | def __del__(self): 21 | self.close() 22 | 23 | def close(self): 24 | if not self._closed: 25 | self.channel.close() 26 | 27 | def predict(self, data_file, result_file): 28 | try: 29 | request = PredictRequest(data_file=data_file, result_file=result_file) 30 | response = self.stub.predict(request) 31 | code = response.code 32 | return code 33 | except Exception as e: 34 | import traceback 35 | msg = f'[Predict {self.server}] {e.__class__.__name__}:\n' 36 | logger.error(msg + traceback.format_exc()) 37 | 38 | return 98 if isinstance(e, grpc.RpcError) else 99 39 | -------------------------------------------------------------------------------- /hypernets/tests/discriminators/base_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | from hypernets.discriminators import get_previous_trials_scores, get_percentile_score 7 | 8 | from . import history, group_id, group_id2 9 | 10 | 11 | def test_base(): 12 | ts = get_previous_trials_scores(history, 0, 9, group_id) 13 | assert ts.shape == (5, 10) 14 | ts = get_previous_trials_scores(history, 0, 8, group_id) 15 | assert ts.shape == (6, 9) 16 | ts2 = get_previous_trials_scores(history, 0, 9, group_id2) 17 | assert ts2.shape == (1, 10) 18 | 19 | def get_0_100_50_percentile_score(n_step, sign=-1): 20 | s1 = get_percentile_score(history, n_step, group_id, 0, sign) 21 | s2 = get_percentile_score(history, n_step, group_id, 100, sign) 22 | s3 = get_percentile_score(history, n_step, group_id, 50, sign) 23 | return s1, s2, s3 24 | 25 | p1 = get_0_100_50_percentile_score(0) 26 | assert p1 == (0.9, 0.9, 0.9) 27 | 28 | p2 = get_0_100_50_percentile_score(1) 29 | assert p2 == (0.85, 0.8, 0.85) 30 | 31 | p3 = get_0_100_50_percentile_score(5) 32 | assert p3 == (0.45, 0.4, 0.425) 33 | 34 | p4 = get_0_100_50_percentile_score(9) 35 | assert p4 == (0.25, 0.21, 0.23) 36 | 37 | p5 = get_0_100_50_percentile_score(9, 1) 38 | assert p5 == (0.21, 0.25, 0.23) 39 | -------------------------------------------------------------------------------- /hypernets/searchers/playback_searcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from ..core import TrialHistory 6 | from ..core.callbacks import EarlyStoppingError 7 | from ..core.searcher import Searcher, OptimizeDirection 8 | 9 | 10 | class PlaybackSearcher(Searcher): 11 | def __init__(self, history: TrialHistory, top_n=None, reverse=False, 12 | optimize_direction=OptimizeDirection.Minimize): 13 | assert history is not None 14 | assert len(history.trials) > 0 15 | 16 | self.history = history 17 | self.top_n = top_n if top_n is not None else len(history.trials) 18 | self.samples = [t.space_sample for t in self.history.get_top(self.top_n)] 19 | self.index = 0 20 | self.reverse = reverse 21 | 22 | if reverse: 23 | self.samples.reverse() 24 | 25 | super(PlaybackSearcher, self).__init__(None, use_meta_learner=False, optimize_direction=optimize_direction) 26 | 27 | @property 28 | def parallelizable(self): 29 | return True 30 | 31 | def sample(self, space_options=None): 32 | if self.index >= len(self.samples): 33 | raise EarlyStoppingError('no more samples.') 34 | sample = self.samples[self.index] 35 | self.index += 1 36 | return sample 37 | 38 | def update_result(self, space, result): 39 | pass 40 | -------------------------------------------------------------------------------- /hypernets/tabular/evaluator/tpot.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from tpot import TPOTClassifier, TPOTRegressor 8 | from . import BaseEstimator 9 | from ..column_selector import column_object_category_bool 10 | from ..sklearn_ex import SafeOrdinalEncoder 11 | 12 | 13 | class TpotEstimator(BaseEstimator): 14 | def __init__(self, task, **kwargs): 15 | super(TpotEstimator, self).__init__(task) 16 | if task == 'regression': 17 | self.tpot = TPOTRegressor(**kwargs) 18 | else: 19 | self.tpot = TPOTClassifier(**kwargs) 20 | self.name = 'tpot' 21 | self.label_encoder = None 22 | self.obj_cols = None 23 | 24 | def train(self, X, y, X_test): 25 | self.obj_cols = column_object_category_bool(X) 26 | self.label_encoder = SafeOrdinalEncoder() 27 | X[self.obj_cols] = self.label_encoder.fit_transform(X[self.obj_cols]) 28 | self.tpot.fit(X, y) 29 | 30 | def predict_proba(self, X): 31 | X[self.obj_cols] = self.label_encoder.transform(X[self.obj_cols]) 32 | proba = self.tpot.predict_proba(X) 33 | print(f'proba.shape:{proba.shape}') 34 | return proba 35 | 36 | def predict(self, X): 37 | X[self.obj_cols] = self.label_encoder.transform(X[self.obj_cols]) 38 | return self.tpot.predict(X) 39 | -------------------------------------------------------------------------------- /hypernets/tabular/dask_ex/_data_hasher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import dask.array as da 6 | import dask.dataframe as dd 7 | 8 | from ..data_hasher import DataHasher 9 | 10 | 11 | class DaskDataHasher(DataHasher): 12 | 13 | def _iter_data(self, data): 14 | if isinstance(data, dd.DataFrame): 15 | yield from self._iter_dask_dataframe(data) 16 | elif isinstance(data, dd.Series): 17 | yield from self._iter_dask_dataframe(data.to_frame()) 18 | elif isinstance(data, da.Array): 19 | yield from self._iter_dask_array(data) 20 | else: 21 | yield from super()._iter_data(data) 22 | 23 | @staticmethod 24 | def _iter_dask_dataframe(df): 25 | yield ','.join(map(str, df.columns.tolist())).encode('utf-8') 26 | 27 | # x = df.map_partitions(DataHasher._hash_pd_dataframe, meta=(None, 'u8')).compute() 28 | name = 'hashed' 29 | x = df.map_partitions(lambda part: DataHasher._hash_pd_dataframe(part).to_frame(name), 30 | meta={name: 'u8'}).compute() 31 | yield x.values 32 | 33 | @staticmethod 34 | def _iter_dask_array(arr): 35 | if len(arr.shape) == 1: 36 | arr = arr.compute_chunk_sizes().reshape(-1, 1) 37 | x = arr.map_blocks(DataHasher._hash_ndarray, dtype='u8').compute() 38 | yield x 39 | -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/_persistence.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | import cudf 7 | import cupy 8 | 9 | from ..persistence import ParquetPersistence 10 | 11 | _my_cached_types = (cudf.DataFrame, cudf.Series, cupy.ndarray) 12 | 13 | _META_CUML_KEY = b'cuml_type' 14 | 15 | 16 | class CumlParquetPersistence(ParquetPersistence): 17 | acceptable_types = ParquetPersistence.acceptable_types + _my_cached_types 18 | 19 | def store(self, data, path, *, filesystem=None, **kwargs): 20 | assert isinstance(data, self.acceptable_types) 21 | 22 | metadata = {} 23 | if isinstance(data, _my_cached_types): 24 | from . import CumlToolBox 25 | data, = CumlToolBox.to_local(data) 26 | metadata[_META_CUML_KEY] = type(data).__name__.encode() 27 | 28 | return super().store(data, path, filesystem=filesystem, metadata=metadata, **kwargs) 29 | 30 | def load(self, path, *, filesystem=None, return_metadata=False, **kwargs): 31 | data, metadata = super().load(path, filesystem=filesystem, return_metadata=True, **kwargs) 32 | 33 | if metadata is not None and metadata.get(_META_CUML_KEY, None) is not None: 34 | from . import CumlToolBox 35 | data, = CumlToolBox.from_local(data) 36 | 37 | if return_metadata: 38 | return data, metadata 39 | else: 40 | return data 41 | -------------------------------------------------------------------------------- /docs/source/hypernets.utils.rst: -------------------------------------------------------------------------------- 1 | hypernets.utils package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.utils.common module 8 | ----------------------------- 9 | 10 | .. automodule:: hypernets.utils.common 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.utils.const module 16 | ---------------------------- 17 | 18 | .. automodule:: hypernets.utils.const 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.utils.df\_utils module 24 | -------------------------------- 25 | 26 | .. automodule:: hypernets.utils.df_utils 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | hypernets.utils.logging module 32 | ------------------------------ 33 | 34 | .. automodule:: hypernets.utils.logging 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | hypernets.utils.param\_tuning module 40 | ------------------------------------ 41 | 42 | .. automodule:: hypernets.utils.param_tuning 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | hypernets.utils.ssh\_utils module 48 | --------------------------------- 49 | 50 | .. automodule:: hypernets.utils.ssh_utils 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | Module contents 56 | --------------- 57 | 58 | .. automodule:: hypernets.utils 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | -------------------------------------------------------------------------------- /hypernets/tabular/evaluator/auto_sklearn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | import autosklearn.classification 7 | import autosklearn.regression 8 | from . import BaseEstimator 9 | from ..column_selector import column_object 10 | 11 | 12 | class AutoSklearnEstimator(BaseEstimator): 13 | def __init__(self, task, **kwargs): 14 | super(AutoSklearnEstimator, self).__init__(task) 15 | if task == 'regression': 16 | self.automl = autosklearn.regression.AutoSklearnRegressor(**kwargs) 17 | else: 18 | self.automl = autosklearn.classification.AutoSklearnClassifier(**kwargs) 19 | self.name = 'auto-sklearn' 20 | 21 | def train(self, X, y, X_test): 22 | target = '__tabular_toolbox_target__' 23 | X.insert(0, target, y) 24 | obj_cols = column_object(X) 25 | if len(obj_cols) > 0: 26 | X[obj_cols] = X[obj_cols].astype('category') 27 | y = X.pop(target) 28 | self.automl.fit(X, y) 29 | 30 | def predict_proba(self, X): 31 | obj_cols = column_object(X) 32 | if len(obj_cols) > 0: 33 | X[obj_cols] = X[obj_cols].astype('category') 34 | return self.automl.predict_proba(X) 35 | 36 | def predict(self, X): 37 | obj_cols = column_object(X) 38 | if len(obj_cols) > 0: 39 | X[obj_cols] = X[obj_cols].astype('category') 40 | return self.automl.predict(X) 41 | -------------------------------------------------------------------------------- /hypernets/tests/discriminators/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | from hypernets.discriminators import PercentileDiscriminator, get_previous_trials_scores, get_percentile_score 7 | from hypernets.core import TrialHistory, Trial 8 | 9 | history = TrialHistory(optimize_direction='min') 10 | group_id = 'lightgbm_cv_1' 11 | group_id2 = 'lightgbm_cv_2' 12 | t1 = Trial(None, 1, 0.9, 0, succeeded=True) 13 | t1.iteration_scores[group_id] = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.3, 0.3] 14 | t2 = Trial(None, 1, 0.8, 0, succeeded=True) 15 | t2.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.55, 0.45, 0.35, 0.35, 0.35, 0.25] 16 | t3 = Trial(None, 1, 0.8, 0, succeeded=True) 17 | t3.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.54, 0.44, 0.34, 0.34, 0.34, 0.24] 18 | t4 = Trial(None, 1, 0.8, 0, succeeded=True) 19 | t4.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.53, 0.43, 0.33, 0.33, 0.33, 0.23] 20 | t5 = Trial(None, 1, 0.8, 0, succeeded=True) 21 | t5.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.52, 0.42, 0.32, 0.32, 0.32, 0.22] 22 | t6 = Trial(None, 1, 0.8, 0, succeeded=True) 23 | t6.iteration_scores[group_id] = [0.9, 0.85, 0.75, 0.65, 0.51, 0.41, 0.31, 0.31, 0.31, 0.21] 24 | t6.iteration_scores[group_id2] = [0.9, 0.85, 0.75, 0.65, 0.51, 0.41, 0.31, 0.31, 0.31, 0.21] 25 | history.append(t1) 26 | history.append(t2) 27 | history.append(t3) 28 | history.append(t4) 29 | history.append(t5) 30 | history.append(t6) -------------------------------------------------------------------------------- /hypernets/tests/searchers/playback_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | import pytest 8 | 9 | from hypernets.core.ops import * 10 | from hypernets.core.search_space import * 11 | from hypernets.searchers import PlaybackSearcher 12 | from hypernets.core import TrialHistory, Trial 13 | from hypernets.core import EarlyStoppingError 14 | 15 | def get_space(): 16 | space = HyperSpace() 17 | with space.as_default(): 18 | id1 = Identity(p1=Choice(['a', 'b']), p2=Int(1, 100), p3=Real(0, 1.0)) 19 | return space 20 | 21 | 22 | th = TrialHistory('min') 23 | sample = get_space() 24 | sample.assign_by_vectors([0, 1, 0.1]) 25 | trial = Trial(sample, 1, 0.99, 100) 26 | th.append(trial) 27 | 28 | sample = get_space() 29 | sample.assign_by_vectors([1, 2, 0.2]) 30 | trial = Trial(sample, 2, 0.9, 50) 31 | th.append(trial) 32 | 33 | sample = get_space() 34 | sample.assign_by_vectors([0, 3, 0.3]) 35 | trial = Trial(sample, 3, 0.7, 200) 36 | th.append(trial) 37 | 38 | 39 | class Test_PlaybackSearcher(): 40 | def test_playback_searcher(self): 41 | searcher = PlaybackSearcher(th, top_n=2) 42 | sample1 = searcher.sample() 43 | assert sample1.vectors == [0, 3, 0.3] 44 | sample2 = searcher.sample() 45 | assert sample2.vectors == [1, 2, 0.2] 46 | with pytest.raises(EarlyStoppingError) as ese: 47 | searcher.sample() 48 | assert ese.value.args[0] == 'no more samples.' 49 | -------------------------------------------------------------------------------- /hypernets/dispatchers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import time 4 | 5 | from .cfg import DispatchCfg as c 6 | 7 | 8 | def get_dispatcher(hyper_model, **kwargs): 9 | timestamp = time.strftime('%Y%m%d%H%M%S') 10 | experiment = c.experiment if len(c.experiment) > 0 else f'experiment_{timestamp}' 11 | work_dir = c.work_dir if len(c.work_dir) > 0 else f'{experiment}' 12 | 13 | if hyper_model.searcher.parallelizable: 14 | if c.backend == 'dask': 15 | from .dask.dask_dispatcher import DaskDispatcher 16 | return DaskDispatcher(work_dir) 17 | elif c.backend == 'cluster': 18 | driver_address = c.cluster_driver 19 | if c.cluster_role == 'driver': 20 | from hypernets.dispatchers.cluster import DriverDispatcher 21 | return DriverDispatcher(driver_address, work_dir) 22 | elif c.cluster_role == 'executor': 23 | if driver_address is None: 24 | raise Exception('Not found setting "driver" for executor role.') 25 | from hypernets.dispatchers.cluster import ExecutorDispatcher 26 | return ExecutorDispatcher(driver_address) 27 | 28 | return default_dispatcher(work_dir) 29 | 30 | 31 | def default_dispatcher(work_dir=None): 32 | from .in_process_dispatcher import InProcessDispatcher 33 | 34 | models_dir = f'{work_dir}/models' if work_dir else '' 35 | return InProcessDispatcher(models_dir) 36 | -------------------------------------------------------------------------------- /hypernets/examples/smoke_testing.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import numpy as np 6 | 7 | from hypernets.core.ops import Choice, Bool, Identity 8 | from hypernets.core.search_space import HyperSpace, Int, Real 9 | from hypernets.searchers.evolution_searcher import EvolutionSearcher 10 | from hypernets.searchers.mcts_searcher import MCTSSearcher 11 | from hypernets.searchers.random_searcher import RandomSearcher 12 | 13 | 14 | def get_space(): 15 | space = HyperSpace() 16 | with space.as_default(): 17 | p1 = Int(1, 100) 18 | p2 = Choice(['a', 'b', 'c']) 19 | p3 = Bool() 20 | p4 = Real(0.0, 1.0) 21 | id1 = Identity(p1=p1) 22 | id2 = Identity(p2=p2)(id1) 23 | id3 = Identity(p3=p3)(id2) 24 | id4 = Identity(p4=p4)(id3) 25 | return space 26 | 27 | 28 | def run_search(): 29 | searchers = ( 30 | RandomSearcher(get_space, space_sample_validation_fn=lambda s: True), 31 | MCTSSearcher(get_space, max_node_space=10), 32 | EvolutionSearcher(get_space, 5, 3, regularized=False) 33 | ) 34 | 35 | for searcher in searchers: 36 | for i in range(100): 37 | space_sample = searcher.sample() 38 | assert space_sample.all_assigned == True 39 | print(searcher.__class__.__name__, i, space_sample.params_summary()) 40 | searcher.update_result(space_sample, [np.random.uniform(0.1, 0.9)]) 41 | 42 | 43 | if __name__ == '__main__': 44 | run_search() 45 | -------------------------------------------------------------------------------- /hypernets/tests/model/plain_model_test.py: -------------------------------------------------------------------------------- 1 | from hypernets.core.callbacks import SummaryCallback 2 | from hypernets.examples.plain_model import PlainModel, PlainSearchSpace 3 | from hypernets.examples.plain_model import train_heart_disease 4 | from hypernets.searchers import make_searcher 5 | from hypernets.tabular.sklearn_ex import MultiLabelEncoder 6 | 7 | 8 | class DaskPlainModel(PlainModel): 9 | def _get_estimator(self, space_sample): 10 | from hypernets.tabular import get_tool_box 11 | import dask.dataframe as dd 12 | 13 | estimator = super()._get_estimator(space_sample) 14 | 15 | return get_tool_box(dd.DataFrame).wrap_local_estimator(estimator) 16 | 17 | 18 | def create_plain_model(reward_metric='auc', optimize_direction='max', 19 | with_encoder=False, with_dask=False): 20 | search_space = PlainSearchSpace(enable_dt=True, enable_lr=True, enable_nn=False) 21 | searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction) 22 | 23 | encoder = MultiLabelEncoder if with_encoder else None 24 | cls = DaskPlainModel if with_dask else PlainModel 25 | hyper_model = cls(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()], 26 | transformer=encoder) 27 | 28 | return hyper_model 29 | 30 | 31 | def test_train_heart_disease(): 32 | train_heart_disease(cv=False, max_trials=5) 33 | 34 | 35 | def test_train_heart_disease_with_cv(): 36 | train_heart_disease(cv=True, max_trials=5) 37 | -------------------------------------------------------------------------------- /hypernets/hyperctl/consts.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | KEY_ENV_BATCHES_DATA_DIR = 'HYPERCTL_BATCHES_DATA_DIR' 5 | KEY_ENV_JOB_NAME = 'HYPERCTL_JOB_NAME' 6 | KEY_ENV_JOB_DATA_DIR = 'HYPERCTL_JOB_DATA_DIR' 7 | KEY_ENV_JOB_WORKING_DIR = 'HYPERCTL_JOB_WORKING_DIR' 8 | KEY_ENV_SERVER_PORTAL = 'HYPERCTL_SERVER_PORTAL' 9 | KEY_ENV_TMP = 'TMP' 10 | 11 | # placeholder 12 | P_HOST_ENV = 'P_HOST_ENV' 13 | P_TMP_ENV = 'P_TMP_ENV' 14 | 15 | KEY_TEMPLATE_COMMAND = "COMMAND" 16 | 17 | RUN_SH_TEMPLATE = f"""#!/bin/sh 18 | export {KEY_ENV_JOB_NAME}="#{KEY_ENV_JOB_NAME}#" 19 | export {KEY_ENV_JOB_DATA_DIR}="#{KEY_ENV_JOB_DATA_DIR}#" 20 | export {KEY_ENV_SERVER_PORTAL}="#{KEY_ENV_SERVER_PORTAL}#" 21 | export {KEY_ENV_JOB_WORKING_DIR}="#{KEY_ENV_JOB_WORKING_DIR}#" 22 | 23 | #{P_TMP_ENV} 24 | #{P_HOST_ENV} 25 | 26 | if [ -n "$TMP" ]; then 27 | if [ ! -d "$TMP" ]; then 28 | mkdir -p $TMP 29 | fi 30 | fi 31 | 32 | cd ${KEY_ENV_JOB_WORKING_DIR} 33 | #{KEY_TEMPLATE_COMMAND}# >"${KEY_ENV_JOB_DATA_DIR}/stdout" 2>"${KEY_ENV_JOB_DATA_DIR}/stderr" 34 | """ 35 | 36 | HOST_LOCALHOST = "localhost" 37 | 38 | BATCH_TEMP = "hynctl_batch_temp" 39 | 40 | JOB_DATA_DIR_PREFIX = "hynctl_job_" 41 | 42 | 43 | def default_batches_data_dir(batches_data_dir): 44 | if batches_data_dir is None: 45 | bdd_env = os.environ.get(KEY_ENV_BATCHES_DATA_DIR) 46 | if bdd_env is None: 47 | bdd_default = Path("~/hyperctl-batches-data-dir").expanduser().as_posix() 48 | return bdd_default 49 | else: 50 | return bdd_env 51 | else: 52 | return batches_data_dir 53 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/utils_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | import copy 7 | import io 8 | 9 | import pandas as pd 10 | 11 | from hypernets.tabular import get_tool_box 12 | 13 | csv_str = '''x1_int_nanchar,x2_all_nan,x3_const_str,x4_const_int,x5_dup_1,x6_dup_2,x7_dup_f1,x8_dup_f2,x9_f,x10,y 14 | 1.0,,const,5,dup,dup,0.1,0.1,1.23,\\N,1 15 | 2.2,,const,5,dupa,dupa,0.111,0.111,4.4,\\N,1 16 | \\N,,const,5,dupb,dupb,0.12323,0.12323,1.233,\\N,1 17 | 4.,,const,5,dupc,dupc,0.14334,0.14334,4534434.2,\\N,0 18 | 5,,const,5,dupd,dupd,0.144,0.144,2302.2,\\N,0 19 | 6,,const,5,dupe,dupe,0.155,0.155,34334.1,\\N,\\N 20 | ''' 21 | 22 | 23 | class Test_DataCleaner(): 24 | def test_basic(self): 25 | hasher = get_tool_box(pd.DataFrame).data_hasher() 26 | df1 = pd.read_csv(io.StringIO(csv_str)) 27 | hash1 = hasher(df1) 28 | 29 | df2 = pd.read_csv(io.StringIO(csv_str)) 30 | hash2 = hasher(df2) 31 | assert hash1 == hash2 32 | 33 | df3 = df1.head(5) 34 | hash3 = hasher(df3) 35 | assert hash1 != hash3 36 | 37 | df4 = pd.concat([df1, df1.head(1)], axis=0) 38 | hash4 = hasher(df4) 39 | assert hash1 != hash4 40 | 41 | df5 = copy.deepcopy(df1) 42 | df5['x1_int_nanchar'] = ['1.0', '2.2', '\\N', '4.', '5', '6'] 43 | hash5 = hasher(df5) 44 | assert hash1 == hash5 45 | 46 | df6 = copy.deepcopy(df1) 47 | df6['x1_int_nanchar'] = ['2.0', '2.2', '\\N', '4.', '5', '6'] 48 | hash6 = hasher(df6) 49 | assert hash1 != hash6 50 | 51 | # TODO @lxf add unit tests for Dask.DataFrame 52 | -------------------------------------------------------------------------------- /docs/source/hypernets.tabular.evaluator.rst: -------------------------------------------------------------------------------- 1 | hypernets.tabular.evaluator package 2 | =================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.tabular.evaluator.auto\_sklearn module 8 | ------------------------------------------------ 9 | 10 | .. automodule:: hypernets.tabular.evaluator.auto_sklearn 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.tabular.evaluator.h2o module 16 | -------------------------------------- 17 | 18 | .. automodule:: hypernets.tabular.evaluator.h2o 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.tabular.evaluator.hyperdt module 24 | ------------------------------------------ 25 | 26 | .. automodule:: hypernets.tabular.evaluator.hyperdt 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | hypernets.tabular.evaluator.hypergbm module 32 | ------------------------------------------- 33 | 34 | .. automodule:: hypernets.tabular.evaluator.hypergbm 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | hypernets.tabular.evaluator.tests module 40 | ---------------------------------------- 41 | 42 | .. automodule:: hypernets.tabular.evaluator.tests 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | hypernets.tabular.evaluator.tpot module 48 | --------------------------------------- 49 | 50 | .. automodule:: hypernets.tabular.evaluator.tpot 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | Module contents 56 | --------------- 57 | 58 | .. automodule:: hypernets.tabular.evaluator 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | -------------------------------------------------------------------------------- /hypernets/discriminators/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from ._base import get_previous_trials_scores, get_percentile_score, UnPromisingTrial, BaseDiscriminator 8 | from .percentile import PercentileDiscriminator, ProgressivePercentileDiscriminator, OncePercentileDiscriminator 9 | 10 | _discriminators = { 11 | 'percentile': PercentileDiscriminator, 12 | 'once_percentile': OncePercentileDiscriminator, 13 | 'percentile_discriminator': PercentileDiscriminator, 14 | 'progressive': ProgressivePercentileDiscriminator, 15 | 'progressive_percentile': ProgressivePercentileDiscriminator, 16 | 'progressive_percentile_discriminator': ProgressivePercentileDiscriminator, 17 | } 18 | 19 | 20 | def _get_discriminator_cls(identifier): 21 | if isinstance(identifier, str): 22 | cls = _discriminators.get(identifier.lower(), None) 23 | if cls is not None: 24 | return cls 25 | elif isinstance(identifier, type) and issubclass(identifier, BaseDiscriminator): 26 | return identifier 27 | 28 | raise ValueError(f'Illegal discriminator:{identifier}') 29 | 30 | 31 | def make_discriminator(cls, optimize_direction='min', **kwargs): 32 | cls = _get_discriminator_cls(cls) 33 | 34 | if cls == PercentileDiscriminator: 35 | default_kwargs = dict(percentile=0) 36 | elif cls == ProgressivePercentileDiscriminator: 37 | default_kwargs = dict(percentile_list=[0]) 38 | else: 39 | default_kwargs = {} 40 | 41 | kwargs = {**default_kwargs, **kwargs} 42 | discriminator = cls(optimize_direction=optimize_direction, **kwargs) 43 | return discriminator 44 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_dask/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import math 6 | import os 7 | 8 | import psutil 9 | import pytest 10 | 11 | from hypernets.tabular import is_dask_installed 12 | 13 | if_dask_ready = pytest.mark.skipif(not is_dask_installed, reason='dask or dask_ml are not installed') 14 | 15 | 16 | def _startup_dask(overload): 17 | from dask.distributed import LocalCluster, Client 18 | 19 | if os.environ.get('DASK_SCHEDULER_ADDRESS') is not None: 20 | # use dask default settings 21 | client = Client() 22 | else: 23 | # start local cluster 24 | cores = psutil.cpu_count() 25 | workers = math.ceil(cores / 3) 26 | workers = max(2, workers) 27 | if workers > 1: 28 | if overload <= 0: 29 | overload = 1.0 30 | mem_total = psutil.virtual_memory().available / (1024 ** 3) # GB 31 | mem_per_worker = math.ceil(mem_total / workers * overload) 32 | if mem_per_worker > mem_total: 33 | mem_per_worker = mem_total 34 | cluster = LocalCluster(processes=True, n_workers=workers, threads_per_worker=4, 35 | memory_limit=f'{mem_per_worker}GB') 36 | else: 37 | cluster = LocalCluster(processes=False) 38 | 39 | client = Client(cluster) 40 | return client 41 | 42 | 43 | def setup_dask(cls): 44 | try: 45 | from dask.distributed import default_client 46 | client = default_client() 47 | except: 48 | client = _startup_dask(2.0) 49 | print('Dask Client:', client) 50 | 51 | if cls is not None: 52 | setattr(cls, 'dask_client_', client) 53 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | Hypernets has been developed and used by many active community members. Everyone is more than welcomed to make the project better and more accessible to more users. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community. 3 | 4 | We are proud of this project and have been working to make it great since day one. We believe you will love it and we need your help and everything about Hypernets you have in your mind pushes this project forward. 5 | 6 | Join Us! 7 | 8 | # Bug Reports and Feature Requests 9 | The single most important contribution that you can make is to report bugs and make feature requests. The development work on Hypernets is largely driven by these, so please make your voice heard! 10 | 11 | Here are some issue templates we recommend while you report bugs or suggest features. 12 | - Bug Issue 13 | - Feature Request 14 | - Other Issues 15 | 16 | Ideally, you can attach some code in your issue to reproduce the bug. 17 | 18 | 19 | # Contributing via Pull Requests 20 | Code contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 21 | 22 | 1. You are working against the latest source on the master branch. 23 | 2. You check existing open, and recently merged, pull requests to make 24 | sure someone else hasn't addressed the problem already. 25 | 3. You open an issue to discuss any significant work - we would hate for 26 | your time to be wasted. 27 | 28 | To send us a pull request, please: 29 | 30 | 1. Fork the repository. 31 | 2. Modify the source 32 | 3. Ensure local tests pass. 33 | 4. Commit to your fork using clear commit messages. 34 | 5. Send us a pull request, 35 | -------------------------------------------------------------------------------- /hypernets/hyperctl/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Optional 3 | 4 | import yaml 5 | import json 6 | import requests 7 | 8 | 9 | def load_yaml(file_path): 10 | 11 | if not Path(file_path).exists(): 12 | raise FileNotFoundError(file_path) 13 | 14 | with open(file_path, 'r') as f: 15 | content = f.read() 16 | return yaml.load(content, Loader=yaml.CLoader) 17 | 18 | 19 | def load_json(file_path): 20 | if not Path(file_path).exists(): 21 | raise FileNotFoundError(file_path) 22 | 23 | with open(file_path, 'r') as f: 24 | content = f.read() 25 | return json.loads(content) 26 | 27 | 28 | def copy_item(src, dest, key): 29 | v = src.get(key) 30 | if v is not None: 31 | dest[key] = v 32 | 33 | 34 | def http_portal(host, port): 35 | return f"http://{host}:{port}" 36 | 37 | 38 | def get_request(url): 39 | def f(url_, request_data_: str): 40 | return requests.get(url_) 41 | 42 | return _request(url, f, None) 43 | 44 | 45 | def post_request(url, request_data: Optional[str]): 46 | def f(url_, request_data_: str): 47 | return requests.post(url_, data=request_data_) 48 | 49 | return _request(url, f, request_data) 50 | 51 | 52 | def _request(url, req_func, request_data=None): 53 | from hypernets.utils import logging as hyn_logging 54 | logger = hyn_logging.getLogger(__name__) 55 | 56 | logger.debug(f"request data :\n{request_data}\nto {url}") 57 | resp = req_func(url, request_data) 58 | txt_resp = resp.text 59 | logger.debug(f"response text: \n{txt_resp}") 60 | json_resp = json.loads(txt_resp) 61 | code = json_resp['code'] 62 | if code == 0: 63 | return json_resp['data'] 64 | else: 65 | raise RuntimeError(txt_resp) 66 | -------------------------------------------------------------------------------- /hypernets/tests/dispatchers/process_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import pytest 5 | 6 | 7 | def start_broker(host, port): 8 | from hypernets.dispatchers import run_broker 9 | from hypernets.dispatchers.process import LocalProcess 10 | 11 | broker_cmd = f'python -m {run_broker.__name__} --host={host} --port={port}' 12 | broker = LocalProcess(broker_cmd, None, None, None) 13 | broker.start() 14 | 15 | return broker 16 | 17 | 18 | @pytest.mark.xfail(reasone='Ignore') 19 | def test_grpc_broker_run(): 20 | try: 21 | from paramiko import SSHClient, AutoAddPolicy 22 | import grpc 23 | package_exists = True 24 | except: 25 | package_exists = False 26 | if not package_exists: 27 | return 28 | 29 | import tempfile 30 | from hypernets.dispatchers.process import GrpcProcess 31 | from hypernets.utils.common import generate_id 32 | 33 | broker_host = '127.0.0.1' 34 | broker_port = 43218 35 | broker = start_broker(broker_host, broker_port) 36 | time.sleep(2) 37 | 38 | # run process 39 | cmd = 'echo 123' 40 | temp_dir = tempfile.gettempdir() 41 | test_id = generate_id() 42 | out_file, err_file = f'{temp_dir}/test_out_{test_id}.out', f'{temp_dir}/test_out_{test_id}.err' 43 | proc = GrpcProcess(f'{broker_host}:{broker_port}', cmd, None, out_file, err_file) 44 | proc.run() 45 | code = proc.exitcode 46 | 47 | with open(out_file, 'r') as f: 48 | out = f.read() 49 | with open(err_file, 'r') as f: 50 | err = f.read() 51 | 52 | # clean down 53 | os.remove(out_file), os.remove(err_file) 54 | broker.terminate() # todo: fix LocalProcess 55 | 56 | # assert 57 | assert code == 0 58 | assert out == '123\n' 59 | assert err.startswith('pid:') 60 | -------------------------------------------------------------------------------- /hypernets/dispatchers/predict/grpc/predict_service.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from hypernets.dispatchers.predict.grpc.proto import predict_pb2_grpc 4 | from hypernets.dispatchers.predict.grpc.proto.predict_pb2 import PredictResponse 5 | from hypernets.dispatchers.process import LocalProcess 6 | from hypernets.utils import logging 7 | 8 | logger = logging.get_logger(__name__) 9 | 10 | 11 | class PredictService(predict_pb2_grpc.PredictServiceServicer): 12 | def __init__(self, cmd): 13 | super(PredictService, self).__init__() 14 | assert cmd 15 | 16 | self.cmd = cmd 17 | 18 | def predict(self, request, context): 19 | data_file = request.data_file 20 | result_file = request.result_file 21 | 22 | start_at = time.time() 23 | 24 | if logger.is_info_enabled(): 25 | print(f'predict {data_file} --> {result_file}', end='') 26 | 27 | cmd = f'{self.cmd} {data_file} {result_file}' 28 | p = LocalProcess(cmd, None, None, None) 29 | p.start() 30 | p.join() 31 | code = p.exitcode 32 | 33 | res = PredictResponse(data_file=data_file, result_file=result_file, code=code) 34 | 35 | done_at = time.time() 36 | if logger.is_info_enabled(): 37 | print(' done, elapsed %.3f seconds.' % (done_at - start_at)) 38 | return res 39 | 40 | 41 | def serve(addr, cmd): 42 | import grpc 43 | from concurrent import futures 44 | 45 | if logger.is_info_enabled(): 46 | logger.info(f'start predict service at {addr}') 47 | service = PredictService(cmd) 48 | server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) 49 | predict_pb2_grpc.add_PredictServiceServicer_to_server(service, server) 50 | 51 | server.add_insecure_port(addr) 52 | server.start() 53 | 54 | return server, service 55 | -------------------------------------------------------------------------------- /docs/source/hypernets.dispatchers.rst: -------------------------------------------------------------------------------- 1 | hypernets.dispatchers package 2 | ============================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | hypernets.dispatchers.cluster 11 | hypernets.dispatchers.dask 12 | hypernets.dispatchers.predict 13 | hypernets.dispatchers.process 14 | 15 | Submodules 16 | ---------- 17 | 18 | hypernets.dispatchers.cfg module 19 | -------------------------------- 20 | 21 | .. automodule:: hypernets.dispatchers.cfg 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | hypernets.dispatchers.in\_process\_dispatcher module 27 | ---------------------------------------------------- 28 | 29 | .. automodule:: hypernets.dispatchers.in_process_dispatcher 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | hypernets.dispatchers.run module 35 | -------------------------------- 36 | 37 | .. automodule:: hypernets.dispatchers.run 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | hypernets.dispatchers.run\_broker module 43 | ---------------------------------------- 44 | 45 | .. automodule:: hypernets.dispatchers.run_broker 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | 50 | hypernets.dispatchers.run\_predict module 51 | ----------------------------------------- 52 | 53 | .. automodule:: hypernets.dispatchers.run_predict 54 | :members: 55 | :undoc-members: 56 | :show-inheritance: 57 | 58 | hypernets.dispatchers.run\_predict\_server module 59 | ------------------------------------------------- 60 | 61 | .. automodule:: hypernets.dispatchers.run_predict_server 62 | :members: 63 | :undoc-members: 64 | :show-inheritance: 65 | 66 | Module contents 67 | --------------- 68 | 69 | .. automodule:: hypernets.dispatchers 70 | :members: 71 | :undoc-members: 72 | :show-inheritance: 73 | -------------------------------------------------------------------------------- /hypernets/core/pareto.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def pareto_dominate(x1, x2, directions=None): 5 | """dominance in pareto scene, if x1 dominate x2 return True. 6 | """ 7 | if not isinstance(x1, np.ndarray): 8 | x1 = np.array(x1) 9 | 10 | if not isinstance(x2, np.ndarray): 11 | x2 = np.array(x2) 12 | 13 | if directions is None: 14 | directions = ['min'] * x1.shape[0] 15 | 16 | ret = [] 17 | for i in range(x1.shape[0]): 18 | if directions[i] == 'min': 19 | if x1[i] < x2[i]: 20 | ret.append(1) 21 | elif x1[i] == x2[i]: 22 | ret.append(0) 23 | else: 24 | return False 25 | else: 26 | if x1[i] > x2[i]: 27 | ret.append(1) 28 | elif x1[i] == x2[i]: 29 | ret.append(0) 30 | else: 31 | return False 32 | 33 | return np.sum(np.array(ret)) >= 1 34 | 35 | 36 | def calc_nondominated_set(solutions: np.ndarray, dominate_func=None, directions=None): 37 | 38 | assert solutions.ndim == 2 39 | 40 | if directions is None: 41 | directions = ['min'] * solutions.shape[1] 42 | 43 | if dominate_func is None: 44 | dominate_func = pareto_dominate 45 | 46 | def is_pareto_optimal(scores_i): 47 | if (scores_i == None).any(): # illegal individual for the None scores 48 | return False 49 | for scores_j in solutions: 50 | if (scores_i == scores_j).all(): 51 | continue 52 | if dominate_func(x1=scores_j, x2=scores_i, directions=directions): 53 | return False 54 | return True 55 | 56 | optimal = [] 57 | for i, solution in enumerate(solutions): 58 | if is_pareto_optimal(solution): 59 | optimal.append(i) 60 | return optimal 61 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_dask/toolbox_test.py: -------------------------------------------------------------------------------- 1 | import os.path as path 2 | 3 | import pandas as pd 4 | 5 | from hypernets.tabular import get_tool_box 6 | from hypernets.tabular.datasets import dsutils 7 | from . import if_dask_ready, is_dask_installed 8 | 9 | if is_dask_installed: 10 | import dask.dataframe as dd 11 | from hypernets.tabular.dask_ex import DaskToolBox 12 | 13 | 14 | @if_dask_ready 15 | class TestDaskToolBox: 16 | def test_get_tool_box(self): 17 | tb = get_tool_box(dd.DataFrame) 18 | assert tb is DaskToolBox 19 | 20 | ddf = dd.from_pandas(pd.DataFrame(dict( 21 | x1=['a', 'b', 'c'], 22 | x2=[1, 2, 3] 23 | )), npartitions=1) 24 | tb = get_tool_box(ddf) 25 | assert tb is DaskToolBox 26 | 27 | def test_concat_df(self): 28 | df = pd.DataFrame(dict( 29 | x1=['a', 'b', 'c'], 30 | x2=[1, 2, 3] 31 | )) 32 | ddf = dd.from_pandas(df, npartitions=2) 33 | tb = get_tool_box(ddf) 34 | 35 | # DataFrame + DataFrame 36 | df1 = tb.concat_df([ddf, ddf], axis=0) 37 | assert isinstance(df1, dd.DataFrame) 38 | 39 | df1 = df1.compute() 40 | df2 = pd.concat([df, df], axis=0).reset_index(drop=True) 41 | assert (df1 == df2).all().all() 42 | 43 | # DataFrame + array 44 | df1 = tb.concat_df([ddf, ddf.to_dask_array(lengths=True)], axis=0) 45 | assert isinstance(df1, dd.DataFrame) 46 | 47 | df1 = df1.compute() 48 | df2 = pd.concat([df, df], axis=0).reset_index(drop=True) 49 | assert (df1 == df2).all().all() 50 | 51 | def test_load_data(self, ): 52 | data_dir = path.split(dsutils.__file__)[0] 53 | data_file = f'{data_dir}/blood.csv' 54 | 55 | df = DaskToolBox.load_data(data_file, reset_index=True) 56 | assert isinstance(df, dd.DataFrame) 57 | -------------------------------------------------------------------------------- /hypernets/dispatchers/cfg.py: -------------------------------------------------------------------------------- 1 | from hypernets.conf import configure, Configurable, String, Int, Float, Enum 2 | 3 | 4 | @configure() 5 | class DispatchCfg(Configurable): 6 | experiment = String(help='experiment id', 7 | ).tag(config=True) 8 | work_dir = String(help='storage directory path to store running data.' 9 | ).tag(config=True) 10 | backend = Enum(['standalone', 'dask', 'cluster', None], 11 | default_value=None, 12 | help='dispatcher backend' 13 | ).tag(config=True) 14 | trial_retry_limit = Int(1000, min=1, 15 | help='maximum retry number to run trial.' 16 | ).tag(config=True) 17 | 18 | cluster_driver = String(help='driver address, used if backend="cluster"' 19 | ).tag(config=True) 20 | cluster_role = Enum(['driver', 'executor'], 21 | help='node role, used if backend="cluster"' 22 | ).tag(config=True) 23 | cluster_search_queue = Int(1, min=1, 24 | help='search queue size, used if backend="cluster"' 25 | ).tag(config=True) 26 | cluster_summary_interval = Float(60.0, 27 | help='summary interval seconds', 28 | ).tag(config=True) 29 | 30 | dask_search_queue = Int(1, min=1, 31 | help='search queue size, used if backend="dask"' 32 | ).tag(config=True) 33 | dask_search_executors = Int(3, min=1, 34 | help='search executor number, used if backend="dask"' 35 | ).tag(config=True) 36 | 37 | grpc_worker_count = Int(10, min=1, 38 | help='grpc worker count' 39 | ).tag(config=True) 40 | -------------------------------------------------------------------------------- /hypernets/tabular/dask_ex/_dataframe_mapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import numpy as np 3 | from dask import array as da 4 | from dask import dataframe as dd 5 | from scipy import sparse as _sparse 6 | 7 | from hypernets.tabular.dataframe_mapper import DataFrameMapper 8 | from hypernets.utils import logging 9 | 10 | logger = logging.get_logger(__name__) 11 | 12 | 13 | class DaskDataFrameMapper(DataFrameMapper): 14 | @staticmethod 15 | def _fix_feature(fea): 16 | from ._toolbox import DaskToolBox 17 | 18 | if DaskToolBox.is_dask_object(fea): 19 | pass 20 | elif _sparse.issparse(fea): 21 | fea = fea.toarray() 22 | 23 | if len(fea.shape) == 1: 24 | """ 25 | Convert 1-dimensional arrays to 2-dimensional column vectors. 26 | """ 27 | if isinstance(fea, da.Array): 28 | fea = da.stack([fea], axis=-1) 29 | else: 30 | fea = np.array([fea]).T 31 | 32 | return fea 33 | 34 | @staticmethod 35 | def _hstack_array(extracted): 36 | from ._toolbox import DaskToolBox 37 | 38 | if DaskToolBox.exist_dask_object(*extracted): 39 | extracted = [a.values if isinstance(a, dd.DataFrame) else a for a in extracted] 40 | stacked = DaskToolBox.hstack_array(extracted) 41 | else: 42 | stacked = np.hstack(extracted) 43 | return stacked 44 | 45 | def _to_df(self, X, extracted, columns): 46 | if isinstance(X, dd.DataFrame): 47 | from ._toolbox import DaskToolBox 48 | 49 | dfs = [dd.from_dask_array(arr, index=None) if isinstance(arr, da.Array) else arr for arr in extracted] 50 | df = DaskToolBox.concat_df(dfs, axis=1) if len(dfs) > 1 else dfs[0] 51 | df.columns = columns 52 | else: 53 | df = super()._to_df(X, extracted, columns) 54 | 55 | return df 56 | -------------------------------------------------------------------------------- /hypernets/tests/utils/common_test.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import os 3 | 4 | from hypernets.utils import common as common_util 5 | 6 | 7 | def test_camel_keys_to_snake(): 8 | input_dict = { 9 | 'datasetConf': { 10 | 'trainData': './train.csv' 11 | }, 12 | 'name': 'with-feature-selection', 13 | 'jobs': [ 14 | { 15 | 'featureSelection': { 16 | 'leastFeatures': 10 17 | }, 18 | 'callbackSetting': [{ 19 | 'className': 'hypernets.core.ConsoleCallback' 20 | }] 21 | } 22 | ] 23 | } 24 | 25 | ret_dict = common_util.camel_keys_to_snake(input_dict) 26 | assert ret_dict['dataset_conf']['train_data'] == input_dict['datasetConf']['trainData'] 27 | assert ret_dict['name'] == input_dict['name'] 28 | 29 | input_job_conf_dict = input_dict['jobs'][0] 30 | ret_job_conf_dict = ret_dict['jobs'][0] 31 | 32 | assert ret_job_conf_dict['feature_selection']['least_features'] == \ 33 | input_job_conf_dict['featureSelection']['leastFeatures'] 34 | 35 | assert ret_job_conf_dict['callback_setting'][0]['class_name'] == \ 36 | input_job_conf_dict['callbackSetting'][0]['className'] 37 | 38 | 39 | def test_make_tempfile(): 40 | 41 | temp_file_path: str = common_util.get_temp_file_path(prefix='prefix', suffix='.txt') 42 | assert not os.path.exists(temp_file_path) 43 | 44 | assert os.path.basename(temp_file_path).startswith('prefix') 45 | assert os.path.basename(temp_file_path).endswith('.txt') 46 | 47 | temp_file_dir_created = common_util.get_temp_dir_path(prefix='prefix', suffix='prefix', create=True) 48 | assert os.path.exists(temp_file_dir_created) 49 | 50 | temp_file_dir_not_created = common_util.get_temp_dir_path(prefix='prefix', suffix='prefix', create=False) 51 | assert not os.path.exists(temp_file_dir_not_created) 52 | -------------------------------------------------------------------------------- /hypernets/dispatchers/process/grpc_process.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import sys 4 | from multiprocessing import Process, Value as PValue 5 | 6 | from hypernets.dispatchers.process.grpc.process_broker_client import ProcessBrokerClient 7 | from hypernets.utils import logging 8 | 9 | logger = logging.get_logger(__name__) 10 | 11 | 12 | class GrpcProcess(Process): 13 | def __init__(self, grpc_broker, cmd, in_file, out_file, err_file, environment=None): 14 | super(GrpcProcess, self).__init__() 15 | 16 | self.grpc_broker = grpc_broker 17 | self.cmd = cmd 18 | self.in_file = in_file 19 | self.out_file = out_file 20 | self.err_file = err_file 21 | self.environment = environment 22 | self._exit_code = PValue('i', -1) 23 | 24 | def run(self, verbose=False): 25 | if verbose and logger.is_info_enabled(): 26 | msg = f'[{self.name}] [GRPC {self.grpc_broker}] {self.cmd}, out={self.out_file}, err={self.err_file}' 27 | logger.info(msg) 28 | 29 | try: 30 | client = ProcessBrokerClient(self.grpc_broker) 31 | buffer_size = 16 32 | if self.out_file and self.err_file: 33 | with open(self.out_file, 'wb', buffering=0)as o, open(self.err_file, 'wb', buffering=0) as e: 34 | code = client.run(self.cmd.split(' '), stdout=o, stderr=e, buffer_size=buffer_size) 35 | else: 36 | code = client.run(self.cmd.split(' '), stdout=sys.stdout, stderr=sys.stderr, buffer_size=buffer_size) 37 | except KeyboardInterrupt: 38 | code = 137 39 | 40 | if verbose and logger.is_info_enabled(): 41 | logger.info(f'[{self.name}] [GRPC {self.grpc_broker}] {self.cmd} done with {code}') 42 | self._exit_code.value = code 43 | 44 | @property 45 | def exitcode(self): 46 | code = self._exit_code.value 47 | return code if code >= 0 else None 48 | -------------------------------------------------------------------------------- /hypernets/tabular/datasets/dsutils.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import os 3 | 4 | basedir = os.path.dirname(__file__) 5 | 6 | 7 | def load_boston(): 8 | import pandas as pd 9 | from sklearn import datasets 10 | # boston_dataset = datasets.load_boston() 11 | # data = pd.DataFrame(boston_dataset.data) 12 | # data.columns = boston_dataset.feature_names 13 | # data.insert(0, 'target', boston_dataset.target) 14 | data = pd.read_csv(f'{basedir}/boston.csv.gz', compression='gzip') 15 | return data 16 | 17 | 18 | def load_heart_disease_uci(): 19 | import pandas as pd 20 | data = pd.read_csv(f'{basedir}/heart-disease-uci.csv') 21 | return data 22 | 23 | 24 | def load_bank(): 25 | import pandas as pd 26 | data = pd.read_csv(f'{basedir}/bank-uci.csv.gz') 27 | return data 28 | 29 | 30 | def load_bank_by_dask(): 31 | from dask import dataframe as dd 32 | data = dd.read_csv(f'{basedir}/bank-uci.csv.gz', compression='gzip', blocksize=None) 33 | return data 34 | 35 | 36 | def load_adult(): 37 | import pandas as pd 38 | # print(f'Base dir:{basedir}') 39 | data = pd.read_csv(f'{basedir}/adult-uci.csv.gz', compression='gzip', header=None) 40 | return data 41 | 42 | 43 | def load_glass_uci(): 44 | import pandas as pd 45 | # print(f'Base dir:{basedir}') 46 | data = pd.read_csv(f'{basedir}/glass_uci.csv', header=None) 47 | return data 48 | 49 | 50 | def load_blood(): 51 | import pandas as pd 52 | data = pd.read_csv(f'{basedir}/blood.csv') 53 | return data 54 | 55 | 56 | def load_telescope(): 57 | import pandas as pd 58 | data = pd.read_csv(f'{basedir}/telescope.csv') 59 | return data 60 | 61 | 62 | def load_Bike_Sharing(): 63 | import pandas as pd 64 | data = pd.read_csv(f'{basedir}/Bike_Sharing.csv') 65 | return data 66 | 67 | 68 | def load_movielens(): 69 | import pandas as pd 70 | data = pd.read_csv(f'{basedir}/movielens_sample.txt') 71 | return data 72 | -------------------------------------------------------------------------------- /hypernets/dispatchers/predict/grpc/proto/predict_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | import grpc 3 | 4 | from hypernets.dispatchers.predict.grpc.proto import predict_pb2 as hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2 5 | 6 | 7 | class PredictServiceStub(object): 8 | # missing associated documentation comment in .proto file 9 | pass 10 | 11 | def __init__(self, channel): 12 | """Constructor. 13 | 14 | Args: 15 | channel: A grpc.Channel. 16 | """ 17 | self.predict = channel.unary_unary( 18 | '/hypernets.dispatchers.predict.grpc.proto.PredictService/predict', 19 | request_serializer=hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2.PredictRequest.SerializeToString, 20 | response_deserializer=hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2.PredictResponse.FromString, 21 | ) 22 | 23 | 24 | class PredictServiceServicer(object): 25 | # missing associated documentation comment in .proto file 26 | pass 27 | 28 | def predict(self, request, context): 29 | # missing associated documentation comment in .proto file 30 | pass 31 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 32 | context.set_details('Method not implemented!') 33 | raise NotImplementedError('Method not implemented!') 34 | 35 | 36 | def add_PredictServiceServicer_to_server(servicer, server): 37 | rpc_method_handlers = { 38 | 'predict': grpc.unary_unary_rpc_method_handler( 39 | servicer.predict, 40 | request_deserializer=hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2.PredictRequest.FromString, 41 | response_serializer=hypernets_dot_dispatchers_dot_predict_dot_grpc_dot_proto_dot_predict__pb2.PredictResponse.SerializeToString, 42 | ), 43 | } 44 | generic_handler = grpc.method_handlers_generic_handler( 45 | 'hypernets.dispatchers.predict.grpc.proto.PredictService', rpc_method_handlers) 46 | server.add_generic_rpc_handlers((generic_handler,)) 47 | -------------------------------------------------------------------------------- /hypernets/dispatchers/process/local_process.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import subprocess 4 | import sys 5 | from multiprocessing import Process, Value as PValue 6 | 7 | from hypernets.utils import logging 8 | 9 | logger = logging.get_logger(__name__) 10 | 11 | 12 | class LocalProcess(Process): 13 | def __init__(self, cmd, in_file, out_file, err_file, environment=None): 14 | super(LocalProcess, self).__init__() 15 | self.cmd = cmd 16 | self.in_file = in_file 17 | self.out_file = out_file 18 | self.err_file = err_file 19 | self.environment = environment 20 | self._exit_code = PValue('i', -1) 21 | 22 | def run(self, verbose=False): 23 | if verbose and logger.is_info_enabled(): 24 | logger.info(f'[{self.name}] [CMD] {self.cmd}, out={self.out_file}, err={self.err_file}') 25 | 26 | try: 27 | if self.out_file and self.err_file: 28 | with open(self.out_file, 'wb', buffering=0)as o, open(self.err_file, 'wb', buffering=0) as e: 29 | p = subprocess.run(self.cmd.split(' '), 30 | shell=False, 31 | stdin=subprocess.DEVNULL, 32 | stdout=o, 33 | stderr=e) 34 | code = p.returncode 35 | else: 36 | p = subprocess.run(self.cmd.split(' '), 37 | shell=False, 38 | stdin=subprocess.DEVNULL, 39 | stdout=sys.stdout, 40 | stderr=sys.stderr) 41 | code = p.returncode 42 | except KeyboardInterrupt: 43 | code = 137 44 | 45 | if verbose and logger.is_info_enabled(): 46 | logger.info(f'[{self.name}] [CMD] {self.cmd} done with {code}') 47 | 48 | self._exit_code.value = code 49 | 50 | @property 51 | def exitcode(self): 52 | code = self._exit_code.value 53 | return code if code >= 0 else None 54 | -------------------------------------------------------------------------------- /hypernets/tests/hyperctl/test_batch.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | 4 | from hypernets.hyperctl.appliation import BatchApplication 5 | from hypernets.hyperctl.batch import _ShellJob 6 | from hypernets.hyperctl.executor import LocalExecutorManager, RemoteSSHExecutorManager 7 | from hypernets.tests.hyperctl.batch_factory import create_minimum_batch, create_local_batch 8 | 9 | 10 | def test_batch_to_config(): 11 | server_port = 8061 12 | scheduler_interval = 1 13 | # 1. create a batch 14 | batch = create_minimum_batch() 15 | app = BatchApplication(batch, server_port=server_port, 16 | scheduler_exit_on_finish=True, 17 | scheduler_interval=scheduler_interval) 18 | 19 | # 2. to_config 20 | batch_config_dict = app.to_config() 21 | 22 | assert batch_config_dict['job_command'] == 'pwd' 23 | 24 | # 3. asset config content 25 | # 3.1. check jobs 26 | jobs_config = batch_config_dict['jobs'] 27 | assert len(jobs_config) == 1 28 | job_config = jobs_config[0] 29 | 30 | assert job_config['name'] == 'job1' 31 | assert job_config['params']["learning_rate"] == 0.1 32 | 33 | assert job_config['working_dir'] 34 | 35 | # 3.2 TODO check backend 36 | # backend_config = batch_config_dict['backend'] 37 | # assert backend_config['type'] == 'local' 38 | 39 | # 3.3 check server config 40 | server_config = batch_config_dict['server'] 41 | assert server_config['host'] == 'localhost' 42 | assert server_config['port'] == server_port 43 | 44 | # 3.4 check scheduler 45 | scheduler_config = batch_config_dict['scheduler'] 46 | assert scheduler_config['exit_on_finish'] is True 47 | assert scheduler_config['interval'] == 1 48 | 49 | # 3.4. check version 50 | assert batch_config_dict['version'] 51 | 52 | 53 | def test_get_job_by_name(): 54 | batch = create_local_batch() 55 | req_job_name = "job2" 56 | job = batch.get_job_by_name(req_job_name) 57 | assert job.name == req_job_name 58 | assert batch.get_persisted_job_status(req_job_name) == _ShellJob.STATUS_INIT 59 | assert job.params['learning_rate'] == 0.2 60 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/cache_test.py: -------------------------------------------------------------------------------- 1 | from hypernets.tabular import sklearn_ex as skex, get_tool_box 2 | from hypernets.tabular.cache import cache, clear, CacheCallback 3 | from hypernets.tabular.datasets import dsutils 4 | from hypernets.utils import Counter 5 | 6 | 7 | class CacheCounter(CacheCallback): 8 | def __init__(self): 9 | super(CacheCounter, self).__init__() 10 | 11 | self.enter_counter = Counter() 12 | self.apply_counter = Counter() 13 | self.store_counter = Counter() 14 | 15 | def on_enter(self, fn, *args, **kwargs): 16 | self.enter_counter() 17 | 18 | def on_apply(self, fn, cached_data, *args, **kwargs): 19 | self.apply_counter() 20 | 21 | def on_store(self, fn, cached_data, *args, **kwargs): 22 | self.store_counter() 23 | 24 | def reset(self): 25 | self.enter_counter.reset() 26 | self.apply_counter.reset() 27 | self.store_counter.reset() 28 | 29 | 30 | class CachedMultiLabelEncoder(skex.MultiLabelEncoder): 31 | @cache(attr_keys='columns', attrs_to_restore='columns,encoders') 32 | def fit_transform(self, X, *args): 33 | return super().fit_transform(X, *args) 34 | 35 | @cache(attr_keys='columns', attrs_to_restore='columns,encoders') 36 | def fit_transform_as_tuple_result(self, X, *args): 37 | Xt = super().fit_transform(X.copy(), *args) 38 | return X, Xt 39 | 40 | 41 | def test_cache(): 42 | clear() 43 | 44 | df = dsutils.load_bank() 45 | t = skex.MultiLabelEncoder() 46 | X = t.fit_transform(df.copy()) 47 | 48 | t1 = CachedMultiLabelEncoder() 49 | X1 = t1.fit_transform(df.copy()) 50 | t2 = CachedMultiLabelEncoder() 51 | X2 = t2.fit_transform(df.copy()) 52 | 53 | hasher = get_tool_box(df).data_hasher() 54 | assert hasher(X) == hasher(X1) == hasher(X2) 55 | 56 | t3 = CachedMultiLabelEncoder() 57 | X3 = t3.fit_transform_as_tuple_result(df.copy()) 58 | t4 = CachedMultiLabelEncoder() 59 | X4 = t4.fit_transform_as_tuple_result(df.copy()) 60 | assert isinstance(X3, (tuple, list)) 61 | assert isinstance(X4, (tuple, list)) 62 | assert hasher(X3[1]) == hasher(X4[1]) 63 | -------------------------------------------------------------------------------- /hypernets/experiment/general.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from sklearn.model_selection import train_test_split 8 | 9 | from hypernets.utils import logging, const 10 | from . import Experiment 11 | 12 | logger = logging.get_logger(__name__) 13 | 14 | 15 | class GeneralExperiment(Experiment): 16 | def __init__(self, hyper_model, X_train, y_train, X_eval=None, y_eval=None, X_test=None, eval_size=0.3, 17 | task=None, id=None, callbacks=None, random_state=9527): 18 | super(GeneralExperiment, self).__init__(hyper_model, X_train, y_train, X_eval=X_eval, 19 | y_eval=y_eval, X_test=X_test, eval_size=eval_size, task=task, 20 | id=id, callbacks=callbacks, random_state=random_state) 21 | 22 | def train(self, hyper_model, X_train, y_train, X_test, X_eval=None, y_eval=None, **kwargs): 23 | """Run an experiment 24 | """ 25 | self.step_start('data split') 26 | if X_eval is None or y_eval is None: 27 | stratify = y_train 28 | if self.task == const.TASK_REGRESSION: 29 | stratify = None 30 | X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=self.eval_size, 31 | random_state=self.random_state, stratify=stratify) 32 | self.step_end(output={'X_train.shape': X_train.shape, 33 | 'y_train.shape': y_train.shape, 34 | 'X_eval.shape': X_eval.shape, 35 | 'y_eval.shape': y_eval.shape, 36 | 'X_test.shape': None if X_test is None else X_test.shape}) 37 | 38 | self.step_start('search') 39 | hyper_model.search(X_train, y_train, X_eval, y_eval, **kwargs) 40 | best_trial = hyper_model.get_best_trial() 41 | self.step_end(output={'best_trial': best_trial}) 42 | 43 | self.step_start('load estimator') 44 | estimator = hyper_model.load_estimator(best_trial.model_file) 45 | self.step_end(output={'estimator': estimator}) 46 | 47 | return estimator 48 | -------------------------------------------------------------------------------- /hypernets/tests/experiment/general_experiment_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import train_test_split 2 | 3 | from hypernets.examples.plain_model import PlainModel, PlainSearchSpace 4 | from hypernets.experiment import GeneralExperiment 5 | from hypernets.searchers import make_searcher 6 | from hypernets.tabular.datasets import dsutils 7 | 8 | 9 | def create_hyper_model(reward_metric='auc', optimize_direction='max'): 10 | search_space = PlainSearchSpace() 11 | searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction) 12 | hyper_model = PlainModel(searcher=searcher, reward_metric=reward_metric, callbacks=[]) 13 | 14 | return hyper_model 15 | 16 | 17 | def test_general_experiment_of_heart_disease_simple(): 18 | hyper_model = create_hyper_model() 19 | 20 | X = dsutils.load_heart_disease_uci() 21 | y = X.pop('target') 22 | 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 24 | 25 | experiment = GeneralExperiment(hyper_model, X_train, y_train, eval_size=0.3) 26 | estimator = experiment.run(max_trials=5) 27 | trials = hyper_model.get_top_trials(5) 28 | 29 | assert estimator 30 | assert 1 < len(trials) <= 5 31 | 32 | score = estimator.evaluate(X_test, y_test, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision']) 33 | print('evaluate score:', score) 34 | assert score 35 | 36 | 37 | def test_general_experiment_of_heart_disease_with_eval_and_cv(): 38 | hyper_model = create_hyper_model() 39 | 40 | X = dsutils.load_heart_disease_uci() 41 | y = X.pop('target') 42 | 43 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 44 | X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=0.3) 45 | 46 | experiment = GeneralExperiment(hyper_model, X_train, y_train, X_eval=X_eval, y_eval=y_eval, X_test=X_test) 47 | estimator = experiment.run(max_trials=5, cv=True) 48 | trials = hyper_model.get_top_trials(5) 49 | 50 | assert estimator 51 | assert 1 < len(trials) <= 5 52 | 53 | score = estimator.evaluate(X_test, y_test, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision']) 54 | print('evaluate score:', score) 55 | assert score 56 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | import os 21 | import sys 22 | from datetime import datetime 23 | 24 | sys.path.insert(0, os.path.abspath('../..')) 25 | 26 | 27 | def setup(app): 28 | app.add_css_file('css/my_theme.css') 29 | 30 | 31 | now = datetime.now() 32 | project = 'Hypernets' 33 | copyright = f'{now.year}, DataCanvas.com' 34 | author = 'DataCanvas.com' 35 | 36 | # The full version, including alpha/beta/rc tags 37 | # release = '0.2.5' 38 | extensions = ['recommonmark', 39 | 'sphinx.ext.autodoc', 40 | 'sphinx.ext.napoleon', 41 | 'sphinx.ext.viewcode' 42 | # 'sphinx.ext.autodoc', 43 | # 'sphinx.ext.mathjax', 44 | # 'sphinx.ext.ifconfig', 45 | # 'sphinx.ext.viewcode', 46 | # 'sphinx.ext.githubpages', 47 | ] 48 | exclude_patterns = [] 49 | # html_theme = 'alabaster' 50 | html_theme = 'sphinx_rtd_theme' 51 | pygments_style = 'sphinx' 52 | templates_path = ['_templates'] 53 | source_suffix = ['.rst', '.md'] 54 | master_doc = 'index' 55 | html_static_path = ['_static'] 56 | 57 | # One entry per manual page. List of tuples 58 | # (source start file, name, description, authors, manual section). 59 | man_pages = [ 60 | (master_doc, 'Hypernets', 'Hypernets Documentation', 61 | [author], 1) 62 | ] 63 | 64 | texinfo_documents = [ 65 | (master_doc, 'Hypernets', 'Hypernets Documentation', 66 | author, 'Hypernets', 'One line description of project.', 67 | 'Miscellaneous'), 68 | ] 69 | -------------------------------------------------------------------------------- /docs/source/overview.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Hypernets is a general automated search framework, based on which it can implement automatic optimization tools for various machine learning frameworks and libraries, including deep learning frameworks such as tensorflow, keras, pytorch, and machine learning libraries like sklearn, lightgbm, xgboost, etc. 4 | We introduced an abstract search space representation, taking into account the requirements of hyperparameter optimization and neural architecture search(NAS), making Hypernets a general framework that can adapt to various automated machine learning needs. 5 | 6 | The figure below shows conceptual model of Hypernets. 7 | 8 | ![hypernets_conceptual_model](images/hypernets_conceptual_model.png) 9 | 10 | ## Key Components 11 | 12 | ### HyperSpace 13 | The space of all feasible solutions for a model is called **Search Space**. HyperSpace is an abstract representation of the search space composed of `Parameter Space`, `Connection Space`, and `Module Space`. The general form of HyperSpace is a DAG, so it can represent ML pipeline and neural network architecture very flexibly. 14 | 15 | ### Seacher 16 | Search algorithms that looking for a optimal solution in `HyperSpace` and generating samples for `HyperModel`. 17 | 18 | ### HyperModel 19 | High-level interface for users to perform model search and training, as long as the defined search space and training data are passed in to get the best model. HyperModel is an abstract class that needs to implement a dedicated HyperModel for different frameworks or domains. For example, `HyperKeras` is used to automatically search for neural networks built with keras, and `HyperGBM` is used to automatically optimize ML pipeline composed of sklearn, xgboost, and lightgbm.... 20 | 21 | ### Estimator 22 | A specific `HyperModle` needs to be paired with a dedicated `Estimator` to fit and evaluate the sample given by the `HyperModel`. This sample may be a set of hyperparameters, a network architecture, or a mixture of them. 23 | 24 | ### Experiment 25 | The playground to prepare training and testing data, and search the optimized estimator with HyperModel. 26 | 27 | ### Tabular Toolbox 28 | A general tabular data computing layer. At present, we provide the implementations of pandas, cudf and dask data types. 29 | -------------------------------------------------------------------------------- /docs/source/hypernets.hyperctl.rst: -------------------------------------------------------------------------------- 1 | hypernets.hyperctl package 2 | ========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.hyperctl.api module 8 | ----------------------------- 9 | 10 | .. automodule:: hypernets.hyperctl.api 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.hyperctl.appliation module 16 | ------------------------------------ 17 | 18 | .. automodule:: hypernets.hyperctl.appliation 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.hyperctl.batch module 24 | ------------------------------- 25 | 26 | .. automodule:: hypernets.hyperctl.batch 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | hypernets.hyperctl.callbacks module 32 | ----------------------------------- 33 | 34 | .. automodule:: hypernets.hyperctl.callbacks 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | hypernets.hyperctl.cli module 40 | ----------------------------- 41 | 42 | .. automodule:: hypernets.hyperctl.cli 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | hypernets.hyperctl.consts module 48 | -------------------------------- 49 | 50 | .. automodule:: hypernets.hyperctl.consts 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | hypernets.hyperctl.executor module 56 | ---------------------------------- 57 | 58 | .. automodule:: hypernets.hyperctl.executor 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | hypernets.hyperctl.scheduler module 64 | ----------------------------------- 65 | 66 | .. automodule:: hypernets.hyperctl.scheduler 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | hypernets.hyperctl.server module 72 | -------------------------------- 73 | 74 | .. automodule:: hypernets.hyperctl.server 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | hypernets.hyperctl.utils module 80 | ------------------------------- 81 | 82 | .. automodule:: hypernets.hyperctl.utils 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | Module contents 88 | --------------- 89 | 90 | .. automodule:: hypernets.hyperctl 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | -------------------------------------------------------------------------------- /hypernets/hyperctl/api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | from hypernets.hyperctl import consts,utils 5 | from hypernets.utils import logging as hyn_logging 6 | 7 | logger = hyn_logging.get_logger(__name__) 8 | 9 | _job_dict = {} 10 | 11 | 12 | def get_job(job_name, api_server_portal): 13 | url_get_job = f"{api_server_portal}/hyperctl/api/job/{job_name}" 14 | data = utils.get_request(url_get_job) 15 | return data 16 | 17 | 18 | def _get_job_name_and_damon_portal(): 19 | job_name = os.getenv(consts.KEY_ENV_JOB_NAME) 20 | api_server_portal = f"{os.getenv(consts.KEY_ENV_SERVER_PORTAL)}" 21 | 22 | assert job_name 23 | assert api_server_portal 24 | 25 | return job_name, api_server_portal 26 | 27 | 28 | def get_job_params(): 29 | global _job_dict 30 | dev_job_params = _job_dict.get('params') 31 | if dev_job_params is not None: 32 | return dev_job_params 33 | 34 | job_name, api_server_portal = _get_job_name_and_damon_portal() 35 | return get_job(job_name, api_server_portal)['params'] 36 | 37 | 38 | def get_job_data_dir(): 39 | global _job_dict 40 | dev_job_data_dir = _job_dict.get('job_data_dir') 41 | if dev_job_data_dir is not None: 42 | return dev_job_data_dir 43 | 44 | job_working_dir = os.getenv(consts.KEY_ENV_JOB_WORKING_DIR) 45 | return job_working_dir 46 | 47 | 48 | def inject(params, job_data_dir=None): 49 | global _job_dict 50 | job_dict = _job_dict 51 | job_dict['params'] = params 52 | if job_data_dir is None: 53 | tempfile.gettempdir() 54 | job_dict['job_data_dir'] = tempfile.mkdtemp(prefix='hyperctl-') 55 | 56 | 57 | def reset_dev_params(): 58 | global _job_dict 59 | _job_dict = {} 60 | 61 | 62 | def list_jobs(api_server_portal): 63 | # if api_server_portal is None: 64 | # api_server_portal = os.getenv(consts.KEY_ENV_api_server_portal) 65 | assert api_server_portal 66 | url_get_jobs = f"{api_server_portal}/hyperctl/api/job" 67 | data = utils.get_request(url_get_jobs) 68 | return data['jobs'] 69 | 70 | 71 | def kill_job(api_server_portal, job_name): 72 | url_kill_job = f"{api_server_portal}/hyperctl/api/job/{job_name}/kill" 73 | data = utils.post_request(url_kill_job, request_data=None) 74 | return data 75 | -------------------------------------------------------------------------------- /hypernets/tests/searchers/test_genetic.py: -------------------------------------------------------------------------------- 1 | from hypernets.core import get_random_state, set_random_state, HyperSpace, Identity, Bool, Optional, Real, HyperInput, Choice, Int 2 | from hypernets.searchers.genetic import SinglePointCrossOver, ShuffleCrossOver, UniformCrossover, Individual 3 | 4 | 5 | class TestCrossOver: 6 | 7 | @classmethod 8 | def setup_class(cls): 9 | set_random_state(1234) 10 | cls.random_state = get_random_state() 11 | 12 | def test_shuffle_crossover(self): 13 | co = ShuffleCrossOver(random_state=self.random_state) 14 | self.run_crossover(co) 15 | 16 | def test_single_point_crossover(self): 17 | co = SinglePointCrossOver(random_state=self.random_state) 18 | self.run_crossover(co) 19 | 20 | def test_uniform_crossover(self): 21 | co = UniformCrossover(random_state=self.random_state) 22 | try: 23 | self.run_crossover(co) 24 | # P(off=[A or B]) = 0.5 ^ 3 * 2 25 | except Exception as e: 26 | print(e) 27 | 28 | def run_crossover(self, crossover): 29 | # 1. prepare data 30 | random_state = self.random_state 31 | 32 | # 2. construct a search space 33 | def get_space(): 34 | space = HyperSpace() 35 | with space.as_default(): 36 | input1 = HyperInput(name="input1") 37 | id1 = Identity(p1=Choice([1, 2, 3, 4]), p2=Int(1, 100), name="id1") 38 | id2 = Identity(p3=Real(0, 1), name="id2") 39 | id1(input1) 40 | id2(id1) 41 | return space 42 | out = get_space() 43 | print(out) 44 | 45 | # 3. construct individuals 46 | dna1 = get_space() 47 | dna1.assign_by_vectors([0, 50, 0.2]) 48 | ind1 = Individual(dna=dna1, scores=[1, 1], random_state=random_state) 49 | 50 | dna2 = get_space() 51 | dna2.assign_by_vectors([1, 30, 0.5]) 52 | ind2 = Individual(dna=dna2, scores=[1, 1], random_state=random_state) 53 | 54 | output = crossover(ind1=ind1, ind2=ind2, out_space=get_space()) 55 | assert output.all_assigned 56 | 57 | # the offspring is not same as any parents 58 | assert output.vectors != ind1.dna.vectors 59 | assert output.vectors != ind2.dna.vectors 60 | 61 | -------------------------------------------------------------------------------- /hypernets/searchers/grid_searcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | from ..core.searcher import Searcher, OptimizeDirection 7 | from ..core import EarlyStoppingError 8 | from sklearn.model_selection import ParameterGrid 9 | 10 | 11 | class GridSearcher(Searcher): 12 | def __init__(self, space_fn, optimize_direction=OptimizeDirection.Minimize, space_sample_validation_fn=None, 13 | n_expansion=5): 14 | Searcher.__init__(self, space_fn, optimize_direction, space_sample_validation_fn=space_sample_validation_fn) 15 | space = space_fn() 16 | assignable_params = space.get_unassigned_params() 17 | self.grid = {} 18 | self.n_expansion = n_expansion 19 | for p in assignable_params: 20 | self.grid[p.id] = [s.value for s in p.expansion(n_expansion)] 21 | self.all_combinations = list(ParameterGrid(self.grid)) 22 | self.position_ = -1 23 | 24 | @property 25 | def parallelizable(self): 26 | return True 27 | 28 | def sample(self, space_options=None): 29 | sample = self._sample_and_check(self._get_sample) 30 | return sample 31 | 32 | def _get_sample(self): 33 | self.position_ += 1 34 | 35 | if self.position_ >= len(self.all_combinations): 36 | raise EarlyStoppingError('no more samples.') 37 | sample = self.space_fn() 38 | for k, v in self.all_combinations[self.position_].items(): 39 | sample.__dict__[k].assign(v) 40 | assert sample.all_assigned == True 41 | return sample 42 | 43 | def get_best(self): 44 | raise NotImplementedError 45 | 46 | def update_result(self, space, result): 47 | pass 48 | 49 | def reset(self): 50 | self.position_ = -1 51 | 52 | def export(self): 53 | raise NotImplementedError 54 | 55 | 56 | def test_parameter_grid(self): 57 | space = self.get_space() 58 | ps = space.get_unassigned_params() 59 | grid = {} 60 | for p in ps: 61 | grid[p.name] = [s.value for s in p.expansion(2)] 62 | all_vectors = list(ParameterGrid(grid)) 63 | for ps in all_vectors: 64 | space = self.get_space() 65 | for k, v in ps.items(): 66 | space.__dict__[k].assign(v) 67 | assert space.all_assigned == True 68 | -------------------------------------------------------------------------------- /hypernets/tabular/ensemble/stacking.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | from sklearn.linear_model import LogisticRegression, LinearRegression 7 | 8 | from .base_ensemble import BaseEnsemble 9 | 10 | 11 | class StackingEnsemble(BaseEnsemble): 12 | def __init__(self, task, estimators, need_fit=False, n_folds=5, method='soft', meta_model=None, fit_kwargs=None): 13 | super(StackingEnsemble, self).__init__(task, estimators, need_fit, n_folds, method) 14 | if meta_model is None: 15 | if task == 'regression': 16 | self.meta_model = LinearRegression() 17 | else: 18 | self.meta_model = LogisticRegression() 19 | else: 20 | self.meta_model = meta_model 21 | self.fit_kwargs = fit_kwargs if fit_kwargs is not None else {} 22 | 23 | def fit_predictions(self, predictions, y_true): 24 | X = self.__predictions2X(predictions) 25 | self.meta_model.fit(X, y_true, **self.fit_kwargs) 26 | 27 | def __predictions2X(self, predictions): 28 | X = predictions 29 | if len(X.shape) == 3: 30 | if self.task == 'binary': 31 | X = X[:, :, -1] 32 | elif self.task == 'multiclass': 33 | np = self.np 34 | X = np.argmax(X, axis=2) 35 | else: 36 | raise ValueError( 37 | f"The shape of `predictions` and the `task` don't match. shape:{predictions.shape}, task:{self.task}") 38 | return X 39 | 40 | def predictions2predict(self, predictions): 41 | assert self.meta_model is not None 42 | X = self.__predictions2X(predictions) 43 | pred = self.meta_model.predict(X) 44 | if self.task == 'binary': 45 | np = self.np 46 | pred = np.clip(pred, 0, 1) 47 | return pred 48 | 49 | def predictions2predict_proba(self, predictions): 50 | assert self.meta_model is not None 51 | X = self.__predictions2X(predictions) 52 | if hasattr(self.meta_model, 'predict_proba'): 53 | pred = self.meta_model.predict_proba(X) 54 | else: 55 | pred = self.meta_model.predict(X) 56 | 57 | if self.task == 'binary': 58 | np = self.np 59 | pred = np.clip(pred, 0, 1) 60 | return pred 61 | -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/_dataframe_mapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import cudf 6 | import cupy 7 | import numpy as np 8 | from sklearn.pipeline import _name_estimators 9 | 10 | from hypernets.tabular.dataframe_mapper import DataFrameMapper, TransformerPipeline 11 | from ._transformer import Localizable 12 | 13 | 14 | class CumlTransformerPipeline(TransformerPipeline): 15 | def as_local(self): 16 | steps = [(name, tf.as_local()) for name, tf in self.steps] 17 | target = TransformerPipeline(steps) 18 | return target 19 | 20 | 21 | def make_transformer_pipeline(*steps): 22 | """Construct a TransformerPipeline from the given estimators. 23 | """ 24 | return CumlTransformerPipeline(_name_estimators(steps)) 25 | 26 | 27 | class CumlDataFrameMapper(DataFrameMapper, Localizable): 28 | @staticmethod 29 | def _build_transformer(transformers): 30 | if isinstance(transformers, list): 31 | transformers = make_transformer_pipeline(*transformers) 32 | return transformers 33 | 34 | def _to_df(self, X, extracted, columns): 35 | dfs = [cudf.DataFrame(arr, index=None) for arr in extracted] 36 | for df, pos in zip(dfs, np.cumsum([d.shape[1] for d in dfs])): 37 | df.reset_index(drop=True, inplace=True) 38 | df.columns = [f'c{i}' for i in range(pos - df.shape[1], pos)] 39 | df_out = cudf.concat(dfs, axis=1, ignore_index=True) if len(dfs) > 1 else dfs[0] 40 | if len(X) == len(df_out): 41 | df_out.index = X.index 42 | df_out.columns = columns 43 | 44 | return df_out 45 | 46 | @staticmethod 47 | def _hstack_array(extracted): 48 | arrs = [arr.values if isinstance(arr, cudf.DataFrame) else arr for arr in extracted] 49 | return cupy.hstack(arrs) 50 | 51 | @staticmethod 52 | def _fix_feature(fea): 53 | if isinstance(fea, (np.ndarray, cupy.ndarray)) and len(fea.shape) == 1: 54 | fea = fea.reshape(-1, 1) 55 | return fea 56 | 57 | def as_local(self): 58 | target = DataFrameMapper([], default=None, df_out=self.df_out, input_df=self.input_df, 59 | df_out_dtype_transforms=self.df_out_dtype_transforms) 60 | target.fitted_features_ = [(cols, t.as_local(), opts) for cols, t, opts in self.fitted_features_] 61 | return target 62 | -------------------------------------------------------------------------------- /hypernets/tabular/cuml_ex/_data_cleaner.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | 6 | import cudf 7 | import cupy 8 | 9 | from ._transformer import Localizable, copy_attrs_as_local 10 | from ..data_cleaner import DataCleaner, _CleanerHelper 11 | 12 | 13 | class CumlDataCleaner(DataCleaner, Localizable): 14 | @staticmethod 15 | def get_helper(X, y): 16 | if isinstance(X, (cudf.DataFrame, cudf.Series)): 17 | return _CumlCleanerHelper() 18 | else: 19 | return DataCleaner.get_helper(X, y) 20 | 21 | def as_local(self): 22 | target = DataCleaner(nan_chars=self.nan_chars, correct_object_dtype=self.correct_object_dtype, 23 | drop_constant_columns=self.drop_constant_columns, 24 | drop_duplicated_columns=self.drop_duplicated_columns, 25 | drop_label_nan_rows=self.drop_label_nan_rows, 26 | drop_idness_columns=self.drop_idness_columns, 27 | replace_inf_values=self.replace_inf_values, 28 | drop_columns=self.drop_columns, 29 | reserve_columns=self.reserve_columns, 30 | reduce_mem_usage=self.reduce_mem_usage, 31 | int_convert_to=self.int_convert_to) 32 | copy_attrs_as_local(self, target, 'df_meta_', 'columns_', 'dropped_constant_columns_', 33 | 'dropped_idness_columns_', 'dropped_duplicated_columns_') 34 | 35 | return target 36 | 37 | 38 | class _CumlCleanerHelper(_CleanerHelper): 39 | @staticmethod 40 | def _get_duplicated_columns(df): 41 | columns = df.columns.to_list() 42 | duplicates = set() 43 | 44 | for i, c in enumerate(columns[:-1]): 45 | if c in duplicates: 46 | continue 47 | for nc in columns[i + 1:]: 48 | if df[c].equals(df[nc]): 49 | duplicates.add(nc) 50 | 51 | return {c: c in duplicates for c in columns} 52 | 53 | @staticmethod 54 | def replace_nan_chars(X: cudf.DataFrame, nan_chars): 55 | cat_cols = X.select_dtypes(['object', 'string', ]) 56 | if cat_cols.shape[1] > 0: 57 | cat_cols = cat_cols.replace(nan_chars, cupy.nan) 58 | X[cat_cols.columns] = cat_cols 59 | return X 60 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_cuml/cache_test.py: -------------------------------------------------------------------------------- 1 | from hypernets.tabular.cache import cache, clear 2 | from hypernets.tabular.datasets import dsutils 3 | from . import if_cuml_ready, is_cuml_installed 4 | from ..cache_test import CacheCounter 5 | 6 | if is_cuml_installed: 7 | import cudf 8 | from hypernets.tabular.cuml_ex import CumlToolBox 9 | from hypernets.tabular.cuml_ex._transformer import MultiLabelEncoder 10 | 11 | 12 | class CachedCumlMultiLabelEncoder(MultiLabelEncoder): 13 | cache_counter = CacheCounter() 14 | 15 | @cache(attr_keys='columns', 16 | attrs_to_restore='columns,dtype,encoders', 17 | callbacks=cache_counter) 18 | def fit_transform(self, X, *args): 19 | return super().fit_transform(X, *args) 20 | 21 | @cache(attr_keys='columns', 22 | attrs_to_restore='columns,dtype,encoders', 23 | callbacks=cache_counter) 24 | def fit_transform_as_array(self, X, *args): 25 | X = super().fit_transform(X, *args) 26 | return X.values 27 | 28 | 29 | @if_cuml_ready 30 | def test_cache_cuml(): 31 | clear() 32 | 33 | cache_counter = CachedCumlMultiLabelEncoder.cache_counter 34 | df = cudf.from_pandas(dsutils.load_bank()) 35 | 36 | t = MultiLabelEncoder() 37 | X = t.fit_transform(df.copy()) 38 | 39 | cache_counter.reset() 40 | t1 = CachedCumlMultiLabelEncoder() 41 | X1 = t1.fit_transform(df.copy()) 42 | t2 = CachedCumlMultiLabelEncoder() 43 | X2 = t2.fit_transform(df.copy()) 44 | 45 | hasher = CumlToolBox.data_hasher() 46 | assert hasher(X) == hasher(X1) == hasher(X2) 47 | assert cache_counter.enter_counter.value == 2 48 | assert cache_counter.apply_counter.value <= 2 49 | assert cache_counter.store_counter.value <= 2 50 | assert cache_counter.apply_counter.value + cache_counter.store_counter.value == 2 51 | 52 | cache_counter.reset() 53 | t3 = CachedCumlMultiLabelEncoder() 54 | X3 = t3.fit_transform_as_array(df.copy()) 55 | t4 = CachedCumlMultiLabelEncoder() 56 | X4 = t4.fit_transform_as_array(df.copy()) 57 | 58 | assert hasher(X3) == hasher(X4) 59 | assert cache_counter.enter_counter.value == 2 60 | assert cache_counter.apply_counter.value <= 2 61 | assert cache_counter.store_counter.value <= 2 62 | assert cache_counter.apply_counter.value + cache_counter.store_counter.value == 2 63 | -------------------------------------------------------------------------------- /hypernets/experiment/cfg.py: -------------------------------------------------------------------------------- 1 | from hypernets.conf import configure, Configurable, Bool, Int, String, List, Dict 2 | 3 | 4 | @configure() 5 | class ExperimentCfg(Configurable): 6 | experiment_callbacks_console = \ 7 | List(default_value=[], 8 | allow_none=True, config=True, 9 | help='ExperimentCallback instance or name list.' 10 | ) 11 | experiment_callbacks_notebook = \ 12 | List(default_value=['hypernets.experiment.SimpleNotebookCallback', ], 13 | allow_none=True, config=True, 14 | help='ExperimentCallback instance or name list.' 15 | ) 16 | 17 | experiment_default_target_set = \ 18 | List(default_value=['y', 'target', 'class'], 19 | allow_none=True, config=True, 20 | help='Default target names.' 21 | ) 22 | experiment_auto_down_sample_enabled = \ 23 | Bool(False, 24 | allow_none=True, config=True, 25 | help='' 26 | ) 27 | experiment_auto_down_sample_rows_threshold = \ 28 | Int(10000, 29 | allow_none=True, config=True, 30 | help='' 31 | ) 32 | experiment_discriminator = \ 33 | String('once_percentile', 34 | allow_none=True, config=True, 35 | help='discriminator identity, "percentile" or "progressive"', 36 | ) 37 | experiment_discriminator_options = \ 38 | Dict(default_value={'percentile': 50, 'min_trials': 5, 'min_steps': 5, 'stride': 1}, 39 | key_trait=String(), 40 | allow_none=True, config=True, 41 | help='discriminator settings', 42 | ) 43 | experiment_data_adaption_min_cols_limit = \ 44 | Int(10, 45 | allow_none=True, config=True, 46 | help='' 47 | ) 48 | 49 | hyper_model_callbacks_console = \ 50 | List(default_value=['hypernets.core.callbacks.SummaryCallback', ], 51 | allow_none=True, config=True, 52 | help='Callback instance or name list.' 53 | ) 54 | hyper_model_callbacks_notebook = \ 55 | List(default_value=['hypernets.core.callbacks.NotebookCallback', 56 | 'hypernets.core.callbacks.ProgressiveCallback', ], 57 | allow_none=True, config=True, 58 | help='Callback instance or name list.' 59 | ) 60 | -------------------------------------------------------------------------------- /hypernets/tabular/dask_ex/_data_cleaner.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from functools import partial 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from dask import dataframe as dd, array as da 10 | 11 | from hypernets.utils import logging 12 | from ..data_cleaner import DataCleaner, _CleanerHelper 13 | 14 | logger = logging.get_logger(__name__) 15 | 16 | 17 | class DaskDataCleaner(DataCleaner): 18 | @staticmethod 19 | def get_helper(X, y): 20 | if isinstance(X, (dd.DataFrame, dd.Series, da.Array)): 21 | return _DaskCleanerHelper() 22 | else: 23 | return DataCleaner.get_helper(X, y) 24 | 25 | 26 | class _DaskCleanerHelper(_CleanerHelper): 27 | @staticmethod 28 | def reduce_mem_usage(df, excludes=None): 29 | raise NotImplementedError('"reduce_mem_usage" is not supported for Dask DataFrame.') 30 | 31 | @staticmethod 32 | def _get_duplicated_columns(df): 33 | duplicates = df.reduction(chunk=lambda c: pd.DataFrame(c.T.duplicated()).T, 34 | aggregate=lambda a: np.all(a, axis=0)).compute() 35 | return duplicates 36 | 37 | @staticmethod 38 | def _detect_dtype(dtype, df): 39 | result = {} 40 | df = df.copy() 41 | for col in df.columns.to_list(): 42 | try: 43 | df[col] = df[col].astype(dtype) 44 | result[col] = [True] # as-able 45 | except: 46 | result[col] = [False] 47 | return pd.DataFrame(result) 48 | 49 | def _correct_object_dtype_as(self, X, df_meta): 50 | for dtype, columns in df_meta.items(): 51 | columns = [c for c in columns if str(X[c].dtype) != dtype] 52 | if len(columns) == 0: 53 | continue 54 | 55 | correctable = X[columns].reduction(chunk=partial(self._detect_dtype, dtype), 56 | aggregate=lambda a: np.all(a, axis=0), 57 | meta={c: 'bool' for c in columns}).compute() 58 | correctable = [i for i, v in correctable.items() if v] 59 | # for col in correctable: 60 | # X[col] = X[col].astype(dtype) 61 | if correctable: 62 | X[correctable] = X[correctable].astype(dtype) 63 | logger.info(f'Correct columns [{",".join(correctable)}] to {dtype}.') 64 | 65 | return X 66 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/psudo_labeling_test.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from math import ceil 3 | 4 | from hypernets.tabular import get_tool_box 5 | from hypernets.tabular import sklearn_ex as skex 6 | from hypernets.tabular.datasets import dsutils 7 | 8 | 9 | class TestPseudoLabeling: 10 | @classmethod 11 | def setup_class(cls): 12 | cls.df = cls.load_data() 13 | 14 | @staticmethod 15 | def load_data(): 16 | df = dsutils.load_bank() 17 | return skex.MultiLabelEncoder().fit_transform(df) 18 | 19 | def run_sample(self, X, y): 20 | tb = get_tool_box(X, y) 21 | model = tb.general_estimator(X, y) 22 | 23 | X_train, X_test, y_train, y_test = \ 24 | tb.train_test_split(X, y, test_size=0.5, random_state=7) 25 | model.fit(X_train, y_train) 26 | proba = model.predict_proba(X_test) 27 | 28 | preds = model.predict(X_test) 29 | preds, = tb.to_local(preds) 30 | c0 = Counter(preds) 31 | print('original samples:', c0) 32 | 33 | options = dict(threshold=0.8, number=10, quantile=0.8) 34 | for strategy in ['threshold', 'number', 'quantile', ]: 35 | pl = tb.pseudo_labeling(strategy=strategy, **options) 36 | X_pseudo, y_pseudo = pl.select(X_test.copy(), model.classes_, proba.copy()) 37 | 38 | y_pseudo, = tb.to_local(y_pseudo) 39 | 40 | # validate result data 41 | if len(y_pseudo) > 0: 42 | expected_y_pseudo = model.predict(X_pseudo) 43 | expected_y_pseudo, = tb.to_local(expected_y_pseudo) 44 | assert (expected_y_pseudo == y_pseudo).all() 45 | 46 | # validate sample numbers 47 | c = Counter(y_pseudo) 48 | if strategy == 'number': 49 | assert all([v <= options['number'] for k, v in c.items()]) 50 | elif strategy == 'quantile': 51 | if self.is_quantile_exact(): 52 | expected_c = {k: ceil(c0[k] * (1 - options['quantile'])) for k, v in c0.items()} 53 | assert c == expected_c 54 | 55 | @staticmethod 56 | def is_quantile_exact(): 57 | return True 58 | 59 | def test_binary(self): 60 | X = self.df.copy() 61 | y = X.pop('y') 62 | self.run_sample(X, y) 63 | 64 | def test_multiclass(self): 65 | X = self.df.copy() 66 | y = X.pop('education') 67 | self.run_sample(X, y) 68 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/tb_dask/cache_test.py: -------------------------------------------------------------------------------- 1 | from hypernets.tabular.cache import cache, clear 2 | from hypernets.tabular.datasets import dsutils 3 | from . import if_dask_ready, is_dask_installed 4 | from ..cache_test import CacheCounter 5 | 6 | if is_dask_installed: 7 | import dask.dataframe as dd 8 | from hypernets.tabular import dask_ex as dex 9 | 10 | 11 | class CachedDaskMultiLabelEncoder(dex.SafeOrdinalEncoder): 12 | cache_counter = CacheCounter() 13 | 14 | @cache(attr_keys='columns', 15 | attrs_to_restore='columns,dtype,categorical_columns_,non_categorical_columns_,categories_', 16 | callbacks=cache_counter) 17 | def fit_transform(self, X, *args): 18 | return super().fit_transform(X, *args) 19 | 20 | @cache(attr_keys='columns', 21 | attrs_to_restore='columns,dtype,categorical_columns_,non_categorical_columns_,categories_', 22 | callbacks=cache_counter) 23 | def fit_transform_as_array(self, X, *args): 24 | X = super().fit_transform(X, *args) 25 | return X.to_dask_array(lengths=True) 26 | 27 | 28 | @if_dask_ready 29 | def test_cache_dask(): 30 | clear() 31 | 32 | cache_counter = CachedDaskMultiLabelEncoder.cache_counter 33 | df = dd.from_pandas(dsutils.load_bank(), npartitions=2) 34 | 35 | t = dex.SafeOrdinalEncoder() 36 | X = t.fit_transform(df.copy()) 37 | 38 | cache_counter.reset() 39 | t1 = CachedDaskMultiLabelEncoder() 40 | X1 = t1.fit_transform(df.copy()) 41 | t2 = CachedDaskMultiLabelEncoder() 42 | X2 = t2.fit_transform(df.copy()) 43 | 44 | hasher = dex.DaskToolBox.data_hasher() 45 | assert hasher(X) == hasher(X1) == hasher(X2) 46 | assert cache_counter.enter_counter.value == 2 47 | assert cache_counter.apply_counter.value <= 2 48 | assert cache_counter.store_counter.value <= 2 49 | assert cache_counter.apply_counter.value + cache_counter.store_counter.value == 2 50 | 51 | cache_counter.reset() 52 | t3 = CachedDaskMultiLabelEncoder() 53 | X3 = t3.fit_transform_as_array(df.copy()) 54 | t4 = CachedDaskMultiLabelEncoder() 55 | X4 = t4.fit_transform_as_array(df.copy()) 56 | 57 | assert hasher(X3) == hasher(X4) 58 | assert cache_counter.enter_counter.value == 2 59 | assert cache_counter.apply_counter.value <= 2 60 | assert cache_counter.store_counter.value <= 2 61 | assert cache_counter.apply_counter.value + cache_counter.store_counter.value == 2 62 | -------------------------------------------------------------------------------- /docs/source/hypernets.searchers.rst: -------------------------------------------------------------------------------- 1 | hypernets.searchers package 2 | =========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | hypernets.searchers.evolution\_searcher module 8 | ---------------------------------------------- 9 | 10 | .. automodule:: hypernets.searchers.evolution_searcher 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | hypernets.searchers.genetic module 16 | ---------------------------------- 17 | 18 | .. automodule:: hypernets.searchers.genetic 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | hypernets.searchers.grid\_searcher module 24 | ----------------------------------------- 25 | 26 | .. automodule:: hypernets.searchers.grid_searcher 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | hypernets.searchers.mcts\_core module 32 | ------------------------------------- 33 | 34 | .. automodule:: hypernets.searchers.mcts_core 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | hypernets.searchers.mcts\_searcher module 40 | ----------------------------------------- 41 | 42 | .. automodule:: hypernets.searchers.mcts_searcher 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | hypernets.searchers.moead\_searcher module 48 | ------------------------------------------ 49 | 50 | .. automodule:: hypernets.searchers.moead_searcher 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | hypernets.searchers.moo module 56 | ------------------------------ 57 | 58 | .. automodule:: hypernets.searchers.moo 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | hypernets.searchers.nsga\_searcher module 64 | ----------------------------------------- 65 | 66 | .. automodule:: hypernets.searchers.nsga_searcher 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | hypernets.searchers.playback\_searcher module 72 | --------------------------------------------- 73 | 74 | .. automodule:: hypernets.searchers.playback_searcher 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | hypernets.searchers.random\_searcher module 80 | ------------------------------------------- 81 | 82 | .. automodule:: hypernets.searchers.random_searcher 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | Module contents 88 | --------------- 89 | 90 | .. automodule:: hypernets.searchers 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | -------------------------------------------------------------------------------- /hypernets/tabular/evaluator/hyperdt.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | import numpy as np 7 | from deeptables.models.hyper_dt import HyperDT 8 | from deeptables.models.hyper_dt import mini_dt_space 9 | from hypernets.core import EarlyStoppingCallback 10 | from hypernets.core.searcher import OptimizeDirection 11 | from hypernets.searchers import EvolutionSearcher 12 | from sklearn.model_selection import train_test_split 13 | 14 | from . import BaseEstimator 15 | 16 | 17 | class HyperDTEstimator(BaseEstimator): 18 | def __init__(self, task, reward_metric, max_trials=30, epochs=100, earlystop_rounds=30, time_limit=3600, 19 | expected_reward=None, **kwargs): 20 | super(HyperDTEstimator, self).__init__(task) 21 | self.name = 'HyperDT' 22 | self.kwargs = kwargs 23 | self.estimator = None 24 | self.max_trials = max_trials 25 | self.reward_metric = reward_metric 26 | self.epochs = epochs 27 | self.earlystop_rounds = earlystop_rounds 28 | self.time_limit = time_limit 29 | self.expected_reward = expected_reward 30 | 31 | def train(self, X, y, X_test): 32 | searcher = EvolutionSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, population_size=30, 33 | sample_size=10, regularized=True, candidates_size=10) 34 | es = EarlyStoppingCallback(self.earlystop_rounds, 'max', time_limit=self.time_limit, 35 | expected_reward=self.expected_reward) 36 | 37 | hdt = HyperDT(searcher, 38 | callbacks=[es], 39 | reward_metric=self.reward_metric, 40 | cache_preprocessed_data=True, 41 | ) 42 | stratify = y 43 | if self.task == 'regression': 44 | stratify = None 45 | X_train, X_eval, y_train, y_eval = train_test_split(X, y, test_size=0.3, 46 | random_state=9527, stratify=stratify) 47 | 48 | hdt.search(X_train, y_train, X_eval, y_eval, max_trials=self.max_trials, epochs=self.epochs) 49 | best_trial = hdt.get_best_trial() 50 | self.estimator = hdt.load_estimator(best_trial.model_file) 51 | 52 | def predict_proba(self, X): 53 | proba = self.estimator.predict_proba(X) 54 | return proba 55 | 56 | def predict(self, X): 57 | return self.estimator.predict(X) 58 | -------------------------------------------------------------------------------- /hypernets/core/mutables.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | from collections import OrderedDict 6 | 7 | 8 | class MutableScope: 9 | def __init__(self): 10 | self.reset() 11 | self.stack = [] 12 | 13 | @property 14 | def current_path(self): 15 | return '.'.join(self.stack) 16 | 17 | def entry(self, name): 18 | self.stack.append(name) 19 | 20 | def exit(self): 21 | self.stack.pop() 22 | 23 | def reset(self): 24 | self.id_dict = OrderedDict() 25 | self.name_dict = OrderedDict() 26 | 27 | def register(self, mutable): 28 | assert isinstance(mutable, Mutable) 29 | 30 | if mutable.name is None: 31 | mutable.id = self.assign_id(mutable) 32 | mutable.name = mutable.id 33 | else: 34 | if self.name_dict.get(mutable.name) is not None: 35 | raise ValueError(f'name `{mutable.name}` is duplicate.') 36 | mutable.id = f'ID_{mutable.name}' 37 | 38 | self.name_dict[mutable.name] = mutable 39 | self.id_dict[mutable.id] = mutable 40 | 41 | def assign_id(self, mutable): 42 | prefix = mutable.__class__.__name__ 43 | if mutable.type is not None: 44 | prefix = mutable.type + '_' + prefix 45 | i = 1 46 | while True: 47 | id = f'{prefix}_{i}' 48 | if id not in self.id_dict: 49 | break 50 | i += 1 51 | return id 52 | 53 | def get_mutable(self, id): 54 | return self.id_dict[id] 55 | 56 | def get_mutable_by_name(self, name): 57 | return self.name_dict[name] 58 | 59 | 60 | class Mutable(object): 61 | def __init__(self, scope, name=None): 62 | self.attach_to_scope(scope, name) 63 | 64 | def attach_to_scope(self, scope, name=None): 65 | assert scope is not None, 'scope cannot be None' 66 | self.scope = scope 67 | self.name = name 68 | self.alias = None 69 | self.scope.register(self) 70 | self.path = scope.current_path 71 | 72 | def __repr__(self): 73 | # if self.alias is not None: 74 | # return 'ALIAS:' + self.alias 75 | # else: 76 | # return 'ID:' + self._id 77 | return self._id 78 | 79 | @property 80 | def type(self): 81 | return None 82 | 83 | @property 84 | def id(self): 85 | return self._id 86 | 87 | @id.setter 88 | def id(self, id): 89 | self._id = id 90 | 91 | def update(self): 92 | pass 93 | -------------------------------------------------------------------------------- /hypernets/tests/utils/tuning_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from hypernets.utils.param_tuning import search_params 8 | from hypernets.core.search_space import Choice, Real, Int 9 | import numpy as np 10 | 11 | 12 | def func1(p1=Choice(['a', 'b'], random_state=np.random.RandomState(9527)), 13 | p2=Int(1, 10, 2, random_state=np.random.RandomState(9527)), 14 | p3=Real(1.0, 5.0, random_state=np.random.RandomState(9527)), p4=9): 15 | print(f'p1:{p1},p2:{p2},p3{p3},p4:{p4}') 16 | return p2 * p3 17 | 18 | 19 | def func_early_stopping(p1=Choice(['a', 'b'], random_state=np.random.RandomState(9527)), 20 | p2=Int(1, 10, 2, random_state=np.random.RandomState(9527)), 21 | p3=Real(1.0, 5.0, random_state=np.random.RandomState(9527)), 22 | p4=9): 23 | print(f'p1:{p1},p2:{p2},p3{p3},p4:{p4}') 24 | return 0.6 25 | 26 | 27 | class Test_ParamTuning(): 28 | def test_search_params(self): 29 | print('start') 30 | history = search_params(func1, 'grid', max_trials=10, optimize_direction='max') 31 | best = history.get_best() 32 | assert best.reward[0] == 14.370000000000001 33 | assert best.trial_no == 10 34 | 35 | def test_trigger_by_trials(self): 36 | from hypernets.core import EarlyStoppingCallback 37 | es = EarlyStoppingCallback(3, 'max', 38 | time_limit=3600, 39 | expected_reward=1) 40 | 41 | history = search_params(func_early_stopping, 'grid', max_trials=10, optimize_direction='max', callbacks=[es]) 42 | best = history.get_best() 43 | assert best.reward[0] == 0.6 44 | assert best.trial_no == 1 45 | assert len(history.trials) == 4 46 | assert es.triggered_reason == EarlyStoppingCallback.REASON_TRIAL_LIMIT 47 | 48 | def test_trigger_by_reward(self): 49 | from hypernets.core import EarlyStoppingCallback 50 | es = EarlyStoppingCallback(3, 'max', 51 | time_limit=3600, 52 | expected_reward=0.5) 53 | 54 | history = search_params(func_early_stopping, 'grid', max_trials=10, optimize_direction='max', callbacks=[es]) 55 | best = history.get_best() 56 | assert best.reward[0] == 0.6 57 | assert best.trial_no == 1 58 | assert len(history.trials) == 1 59 | assert es.triggered_reason == EarlyStoppingCallback.REASON_EXPECTED_REWARD 60 | 61 | -------------------------------------------------------------------------------- /hypernets/core/searcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | """ 5 | import enum 6 | 7 | from hypernets.utils import to_repr 8 | from .stateful import Stateful 9 | 10 | 11 | class OptimizeDirection(enum.Enum): 12 | Minimize = 'min' 13 | Maximize = 'max' 14 | 15 | 16 | class Searcher(Stateful): 17 | def __init__(self, space_fn, optimize_direction=OptimizeDirection.Minimize, use_meta_learner=True, 18 | space_sample_validation_fn=None): 19 | self.space_fn = space_fn 20 | self.use_meta_learner = use_meta_learner 21 | self.optimize_direction = optimize_direction 22 | self.meta_learner = None 23 | self.space_sample_validation_fn = space_sample_validation_fn 24 | 25 | def set_meta_learner(self, meta_learner): 26 | self.meta_learner = meta_learner 27 | 28 | @property 29 | def parallelizable(self): 30 | return False 31 | 32 | def sample(self, space_options=None): 33 | raise NotImplementedError 34 | 35 | def _random_sample(self, **space_kwargs): 36 | if space_kwargs is None: 37 | space_kwargs = {} 38 | space_sample = self.space_fn(**space_kwargs) 39 | space_sample.random_sample() 40 | return space_sample 41 | 42 | def _sample_and_check(self, sample_fn, space_options=None): 43 | if space_options is None: 44 | space_options = {} 45 | 46 | counter = 0 47 | while True: 48 | space_sample = sample_fn(**space_options) 49 | counter += 1 50 | if counter >= 1000: 51 | raise ValueError('Unable to take valid sample and exceed the retry limit 1000.') 52 | if self.space_sample_validation_fn is not None: 53 | if self.space_sample_validation_fn(space_sample): 54 | break 55 | else: 56 | break 57 | return space_sample 58 | 59 | def get_best(self): 60 | raise NotImplementedError 61 | 62 | def update_result(self, space, result): 63 | raise NotImplementedError 64 | 65 | def summary(self): 66 | return 'No Summary' 67 | 68 | def reset(self): 69 | raise NotImplementedError 70 | 71 | def export(self): 72 | raise NotImplementedError 73 | 74 | def kind(self): 75 | """Type of the Searcher, should be one of soo, moo. 76 | This property used to avoid having to import MOOSearcher when detecting Searcher type. 77 | """ 78 | return 'soo' 79 | 80 | def __repr__(self): 81 | return to_repr(self) 82 | -------------------------------------------------------------------------------- /hypernets/tests/tabular/toolbox_test.py: -------------------------------------------------------------------------------- 1 | import os.path as path 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from hypernets.tabular import get_tool_box 7 | from hypernets.tabular.datasets import dsutils 8 | from hypernets.utils import const 9 | 10 | 11 | class TestToolBox: 12 | 13 | def test_infer_task_type(self): 14 | y1 = np.random.randint(0, 2, size=(1000), dtype='int') 15 | y2 = np.random.randint(0, 2, size=(1000)).astype('str') 16 | y3 = np.random.randint(0, 20, size=(1000)).astype('object') 17 | y4 = np.random.random(size=(1000)).astype('float') 18 | y5 = np.array([1, 1, 2, 2, 'na']) 19 | 20 | tb = get_tool_box(y1) 21 | 22 | task, _ = tb.infer_task_type(y1) 23 | assert task == const.TASK_BINARY 24 | 25 | task, _ = tb.infer_task_type(y2) 26 | assert task == const.TASK_BINARY 27 | 28 | task, _ = tb.infer_task_type(y3) 29 | assert task == const.TASK_MULTICLASS 30 | 31 | task, _ = tb.infer_task_type(y4) 32 | assert task == const.TASK_REGRESSION 33 | 34 | task, _ = tb.infer_task_type(y5, excludes=['na']) 35 | assert task == const.TASK_BINARY 36 | 37 | def test_detect_estimator_lightgbm(self): 38 | tb = get_tool_box(pd.DataFrame) 39 | detector = tb.estimator_detector('lightgbm.LGBMClassifier', 'binary') 40 | r = detector() 41 | assert r == {'installed', 'initialized', 'fitted'} 42 | 43 | def test_concat_df(self): 44 | df = pd.DataFrame(dict( 45 | x1=['a', 'b', 'c'], 46 | x2=[1, 2, 3] 47 | )) 48 | tb = get_tool_box(pd.DataFrame) 49 | 50 | # DataFrame + DataFrame 51 | df1 = tb.concat_df([df, df], axis=0) 52 | df2 = pd.concat([df, df], axis=0) 53 | assert (df1 == df2).all().all() 54 | 55 | # DataFrame + ndarray 56 | df1 = tb.concat_df([df, df.values], axis=0) 57 | df2 = pd.concat([df, df], axis=0) 58 | assert isinstance(df1, pd.DataFrame) 59 | assert (df1 == df2).all().all() 60 | 61 | # Series + ndarray 62 | s = df['x1'] 63 | df1 = tb.concat_df([s, s.values], axis=0) 64 | df2 = pd.concat([s, s], axis=0) 65 | assert isinstance(df1, pd.Series) 66 | assert (df1 == df2).all() 67 | 68 | def test_load_data(self, ): 69 | data_dir = path.split(dsutils.__file__)[0] 70 | data_file = f'{data_dir}/blood.csv' 71 | tb = get_tool_box(pd.DataFrame) 72 | df = tb.load_data(data_file) 73 | assert isinstance(df, pd.DataFrame) 74 | -------------------------------------------------------------------------------- /hypernets/core/objective.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from hypernets.core.searcher import OptimizeDirection 3 | 4 | 5 | class Objective(metaclass=abc.ABCMeta): 6 | """ Objective = Indicator metric + Direction""" 7 | 8 | def __init__(self, name, direction, need_train_data=False, need_val_data=True, need_test_data=False): 9 | self.name = name 10 | self.direction = direction 11 | self.need_train_data = need_train_data 12 | self.need_val_data = need_val_data 13 | self.need_test_data = need_test_data 14 | 15 | def evaluate(self, trial, estimator, X_train, y_train, X_val, y_val, X_test=None, **kwargs) -> float: 16 | if self.need_test_data: 17 | assert X_test is not None, "need test data" 18 | 19 | if self.need_train_data: 20 | assert X_train is not None and y_train is not None, "need train data" 21 | 22 | if self.need_val_data: 23 | assert X_val is not None and X_val is not None, "need validation data" 24 | 25 | return self._evaluate(trial, estimator, X_train, y_train, X_val, y_val, X_test=X_test, **kwargs) 26 | 27 | @abc.abstractmethod 28 | def _evaluate(self, trial, estimator, X_train, y_train, X_val, y_val, X_test=None, **kwargs) -> float: 29 | raise NotImplementedError 30 | 31 | def evaluate_cv(self, trial, estimator, X_trains, y_trains, 32 | X_vals, y_vals, X_test=None, **kwargs) -> float: 33 | 34 | if self.need_test_data: 35 | assert X_test is not None, "need test data" 36 | 37 | if self.need_train_data: 38 | assert X_trains is not None and y_trains is not None, "need train data" 39 | assert len(X_trains) == len(y_trains) 40 | 41 | if self.need_val_data: 42 | assert X_vals is not None and y_vals is not None, "need validation data" 43 | assert len(X_vals) == len(y_vals) 44 | 45 | return self._evaluate_cv(trial=trial, estimator=estimator, X_trains=X_trains, y_trains=y_trains, 46 | X_vals=X_vals, y_vals=y_vals, X_test=X_test, **kwargs) 47 | 48 | @abc.abstractmethod 49 | def _evaluate_cv(self, trial, estimator, X_trains, y_trains, X_vals, y_vals, X_test=None, **kwargs) -> float: 50 | raise NotImplementedError 51 | 52 | def __repr__(self): 53 | return f"{self.__class__.__name__}(name={self.name}, direction={self.direction}," \ 54 | f" need_train_data={self.need_train_data}," \ 55 | f" need_val_data={self.need_val_data}," \ 56 | f" need_test_data={self.need_test_data})" 57 | -------------------------------------------------------------------------------- /hypernets/core/meta_learner.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | from lightgbm import LGBMRegressor 7 | import numpy as np 8 | from ..utils import logging 9 | 10 | logger = logging.get_logger(__name__) 11 | 12 | 13 | class MetaLearner(object): 14 | def __init__(self, history, dataset_id, trial_store): 15 | self.trial_store = trial_store 16 | self.dataset_id = dataset_id 17 | self.history = history 18 | self.regressors = {} 19 | self.store_history = {} 20 | 21 | if logger.is_info_enabled(): 22 | logger.info(f'Initialize Meta Learner: dataset_id:{dataset_id}') 23 | 24 | def new_sample(self, space_sample): 25 | self.fit(space_sample.signature) 26 | 27 | def fit(self, space_signature): 28 | 29 | features = self.extract_features_and_labels(space_signature) 30 | x = [] 31 | y = [] 32 | for feature, label in features: 33 | if label != 0: 34 | x.append(feature) 35 | y.append(label) 36 | 37 | store_history = self.store_history.get(space_signature) 38 | 39 | if self.trial_store is not None and store_history is None: 40 | trials = self.trial_store.get_all(self.dataset_id, space_signature) 41 | store_x = [] 42 | store_y = [] 43 | for t in trials: 44 | store_x.append(t.space_sample_vectors) 45 | store_y.append(t.reward) 46 | store_history = (store_x, store_y) 47 | self.store_history[space_signature] = store_history 48 | 49 | if store_history is None: 50 | store_history = ([], []) 51 | 52 | store_x, store_y = store_history 53 | x = x + store_x 54 | y = y + store_y 55 | if len(x) >= 2: 56 | regressor = LGBMRegressor(min_data=1, min_data_in_bin=1, verbosity=-1) 57 | regressor.fit(x, y) 58 | # if logger.is_info_enabled(): 59 | # logger.info(regressor.predict(x)) 60 | self.regressors[space_signature] = regressor 61 | 62 | def predict(self, space_sample, default_value=np.inf): 63 | regressor = self.regressors.get(space_sample.signature) 64 | if regressor is not None: 65 | score = regressor.predict([space_sample.vectors]) 66 | else: 67 | score = default_value 68 | return score 69 | 70 | def extract_features_and_labels(self, signature): 71 | features = [(t.space_sample.vectors, t.reward) for t in self.history.trials if 72 | t.space_sample.signature == signature] 73 | return features 74 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | .idea 3 | 4 | ### Python template 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | dt_output/ 134 | log/ 135 | trial_store/ 136 | tmp/ 137 | catboost_info/ 138 | 139 | #dispatchers 140 | logs/ 141 | workdir/ 142 | dask-worker-space/ 143 | 144 | -------------------------------------------------------------------------------- /hypernets/tests/discriminators/percentile.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'yangjian' 3 | """ 4 | 5 | """ 6 | 7 | from hypernets.discriminators import PercentileDiscriminator, ProgressivePercentileDiscriminator 8 | 9 | from . import history, group_id 10 | 11 | 12 | class Test_PercentileDiscriminator(): 13 | def test_percentile(self): 14 | d = PercentileDiscriminator(50, min_trials=5, min_steps=5, stride=1, history=history, optimize_direction='min') 15 | p1 = d.is_promising([0.9, 0.9, 0.9, 0.9], group_id) 16 | assert p1 == True 17 | 18 | p2 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.9], group_id) 19 | assert p2 == False 20 | 21 | p2 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.525], group_id) 22 | assert p2 == False 23 | 24 | p2 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.524], group_id) 25 | assert p2 == True 26 | 27 | d = PercentileDiscriminator(0, min_trials=5, min_steps=5, stride=1, history=history, optimize_direction='min') 28 | p1 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.50], group_id) 29 | assert p1 == True 30 | p1 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.56], group_id) 31 | assert p1 == False 32 | 33 | d = PercentileDiscriminator(100, min_trials=5, min_steps=5, stride=1, history=history, optimize_direction='min') 34 | p1 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.55], group_id) 35 | assert p1 == False 36 | p1 = d.is_promising([0.9, 0.9, 0.9, 0.9, 0.49], group_id) 37 | assert p1 == True 38 | 39 | def test_progressive_percentile(self): 40 | d = ProgressivePercentileDiscriminator([100, 90, 80, 60, 50, 40, 30, 20, 10, 0], min_trials=5, min_steps=5, 41 | stride=1, 42 | history=history, optimize_direction='min') 43 | p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1], group_id) 44 | assert p1 == True 45 | 46 | p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.56], group_id) 47 | assert p1 == False 48 | 49 | p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1], group_id) 50 | assert p1 == True 51 | 52 | p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.45], group_id) 53 | assert p1 == False 54 | 55 | p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], group_id) 56 | assert p1 == True 57 | 58 | p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.345], group_id) 59 | assert p1 == False 60 | 61 | p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], group_id) 62 | assert p1 == True 63 | 64 | p1 = d.is_promising([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.34], group_id) 65 | assert p1 == False 66 | -------------------------------------------------------------------------------- /hypernets/dispatchers/run.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import argparse 3 | 4 | from hypernets.dispatchers.cluster import Cluster 5 | 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser('run HyperNets experiment in cluster.') 9 | parser.add_argument('--experiment', '-experiment', 10 | default=None, 11 | help='experiment id, default current timestamp') 12 | parser.add_argument('--driver-broker', '-driver-broker', 13 | help='address of the driver broker' 14 | ', eg: grpc://: to use grpc process broker' 15 | ', or just to use ssh process') 16 | parser.add_argument('--driver-port', '-driver-port', 17 | type=int, default=8001, 18 | help='tcp port of the driver' 19 | ', the executors will connect to this port with grpc' 20 | ', default 8001') 21 | parser.add_argument('--executor-brokers', '-executor-brokers', 22 | required=True, 23 | help='addresses of the executor nodes, separated by comma. ' 24 | 'eg: "grpc://:,' 25 | 'grpc://:"') 26 | parser.add_argument('--with-driver', '-with-driver', 27 | type=int, default=1, 28 | help='start driver progress or not, default 1') 29 | parser.add_argument('--spaces-dir', '-spaces-dir', 30 | default='tmp', 31 | help='driver directory to store space files, default "tmp"') 32 | parser.add_argument('--logs-dir', '-logs-dir', 33 | default='logs', 34 | help='local directory to store log files') 35 | parser.add_argument('--report-interval', '-report-interval', 36 | type=int, default=60, 37 | help='report cluster processes, default 60') 38 | args, argv = parser.parse_known_args() 39 | 40 | cluster = Cluster(args.experiment, 41 | args.driver_broker, 42 | args.driver_port, 43 | args.with_driver, 44 | args.executor_brokers.split(','), 45 | args.spaces_dir, 46 | args.logs_dir, 47 | args.report_interval, 48 | *argv) 49 | cluster.run() 50 | 51 | 52 | if __name__ == '__main__': 53 | try: 54 | main() 55 | print('done') 56 | except KeyboardInterrupt as e: 57 | print('KeyboardInterrupt') 58 | --------------------------------------------------------------------------------