├── .gitattributes ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── docker ├── Dockerfile ├── README.md └── docker-compose.yml ├── docs ├── Makefile ├── _static │ ├── base_learner_origin.png │ ├── baselearner.gif │ ├── create_ensemble.png │ ├── create_open_project.png │ ├── ensemble.gif │ ├── finalized_base_learner.png │ ├── list_base_learners.png │ ├── listbaselearner.gif │ ├── main_data_extraction.png │ ├── meta_feature_extraction.png │ └── verified_base_learner.png ├── advanced.rst ├── conf.py ├── index.rst ├── installation.rst ├── thirdparty.rst └── walkthrough.rst ├── requirements.txt ├── runtestserver.py ├── setup.py └── xcessiv ├── __init__.py ├── automatedruns.py ├── config.py ├── constants.py ├── exceptions.py ├── functions.py ├── models.py ├── presets ├── __init__.py ├── cvsetting.py ├── learnersetting.py ├── learnersource.py ├── metricsetting.py └── tests │ ├── __init__.py │ ├── test_cvsetting.py │ ├── test_learnersetting.py │ └── test_metricsetting.py ├── rqtasks.py ├── scripts ├── __init__.py ├── runapp.py ├── runserver.py └── runworker.py ├── server.py ├── stacker.py ├── tests ├── __init__.py ├── extractmaindataset.py ├── myrf.py ├── test_functions.py ├── test_models.py ├── test_stacker.py └── test_views.py ├── ui ├── .gitignore ├── README.md ├── package.json ├── public │ ├── favicon.ico │ └── index.html └── src │ ├── App.css │ ├── App.js │ ├── App.test.js │ ├── AutomatedRuns │ ├── AutomatedRuns.css │ ├── AutomatedRunsDisplay.js │ └── Modals.js │ ├── BaseLearner │ ├── BaseLearner.css │ ├── BaseLearnerMoreDetailsModal.js │ └── ListBaseLearner.js │ ├── BaseLearnerOrigin │ ├── BaseLearnerOrigin.css │ ├── BaseLearnerOrigin.js │ ├── BaseLearnerOriginModals.js │ ├── ListBaseLearnerOrigin.js │ └── MetricGenerators.js │ ├── DatasetExtraction │ ├── DataExtractionTabs.js │ ├── DataVerificationResult.js │ ├── MainDataExtraction.css │ ├── MainDataExtraction.js │ ├── MetaFeatureExtraction.js │ ├── Modals.js │ ├── StackedEnsembleCV.js │ └── TestDataExtraction.js │ ├── Ensemble │ ├── Ensemble.css │ ├── EnsembleBuilder.js │ ├── EnsembleMoreDetailsModal.js │ └── ListEnsemble.js │ ├── containers │ └── ContainerBaseLearner.js │ ├── index.css │ ├── index.js │ └── logo.svg └── views.py /.gitattributes: -------------------------------------------------------------------------------- 1 | xcessiv/ui/src/BaseLearnerOrigin/* linguist-vendored 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # Distribution / packaging 9 | .Python 10 | build/ 11 | .cache/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | c 27 | 28 | # Unit test / coverage reports 29 | htmlcov/ 30 | .tox/ 31 | coverage 32 | .coverage 33 | .coverage.* 34 | .cache 35 | nosetests.xml 36 | coverage.xml 37 | *.cover 38 | .hypothesis/ 39 | 40 | # IntelliJ/Pycharm/Webstorm files 41 | .idea/ 42 | 43 | # misc 44 | .DS_Store 45 | .env 46 | *~ 47 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.5" 5 | install: 6 | - pip install --upgrade pip setuptools wheel 7 | - pip install --only-binary=numpy,scipy numpy scipy 8 | - pip install xgboost 9 | - pip install -r requirements.txt 10 | # command to run tests 11 | script: py.test 12 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Steps for contributing 2 | 3 | Fixing a bug you found in Xcessiv? Suggesting a feature? Listed here are some guidelines to keep in mind when contributing. 4 | 5 | 1. **Open an issue** along with detailed explanation. For bug reports, include the code to reproduce the bug. For feature requests, explain why you think the feature could be useful. 6 | 7 | 2. **Fork the repository**. If you're contributing code, clone the forked repository into your local machine. 8 | 9 | 3. **Run the tests** to make sure they pass on your machine. Simply run `pytest` at the root folder and make sure all tests pass. 10 | 11 | 4. **Create a new branch**. Please do not commit directly to the master branch. Create your own branch and place your additions there. 12 | 13 | 5. **Write your code**. For Python, please follow PEP8 coding standards. Also, if you're adding a function, you must [write a docstring using the Google format](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) detailing the API of your function. Take a look at the docstrings of the other Xcessiv functions to get an idea of what the docstring of yours should look like. 14 | 15 | 6. **Write/modify the corresponding unit tests**. After adding in your code and the corresponding unit tests, run `pytest` again to make sure they pass. 16 | 17 | 7. **Submit a pull request**. After submitting a PR, if all tests pass, your code will be reviewed and merged promptly. 18 | 19 | Thank you for taking the time to make Xcessiv better! 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Reiichiro S. Nakano 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include requirements.txt 4 | recursive-include xcessiv/ui/build * 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Xcessiv 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/xcessiv.svg)]() 4 | [![license](https://img.shields.io/github/license/reiinakano/xcessiv.svg)]() 5 | [![PyPI](https://img.shields.io/pypi/pyversions/xcessiv.svg)]() 6 | [![Build Status](https://travis-ci.org/reiinakano/xcessiv.svg?branch=master)](https://travis-ci.org/reiinakano/xcessiv) 7 | 8 | ### Xcessiv is a tool to help you create the biggest, craziest, and most *excessive* stacked ensembles you can think of. 9 | 10 | Stacked ensembles are simple in theory. You combine the predictions of smaller models and feed *those* into another model. However, in practice, implementing them can be a major headache. 11 | 12 | Xcessiv holds your hand through all the implementation details of creating and optimizing stacked ensembles so you're free to fully define only the things you care about. 13 | 14 | ## The Xcessiv process 15 | 16 | ### Define your base learners and performance metrics 17 | 18 | ![define_base_learner](docs/_static/baselearner.gif) 19 | 20 | ### Keep track of hundreds of different model-hyperparameter combinations 21 | 22 | ![list_base_learner](docs/_static/listbaselearner.gif) 23 | 24 | ### Effortlessly choose your base learners and create an ensemble with the click of a button 25 | 26 | ![ensemble](docs/_static/ensemble.gif) 27 | 28 | ## Features 29 | 30 | * Fully define your data source, cross-validation process, relevant metrics, and base learners with Python code 31 | * Any model following the Scikit-learn API can be used as a base learner 32 | * Task queue based architecture lets you take full advantage of multiple cores and embarrassingly parallel hyperparameter searches 33 | * Direct integration with [TPOT](https://github.com/rhiever/tpot) for automated pipeline construction 34 | * Automated hyperparameter search through Bayesian optimization 35 | * Easy management and comparison of hundreds of different model-hyperparameter combinations 36 | * Automatic saving of generated secondary meta-features 37 | * Stacked ensemble creation in a few clicks 38 | * Automated ensemble construction through greedy forward model selection 39 | * Export your stacked ensemble as a standalone Python file to support multiple levels of stacking 40 | 41 | ## Installation and Documentation 42 | 43 | You can find installation instructions and detailed documentation hosted [here](http://xcessiv.readthedocs.io/). 44 | 45 | ## FAQ 46 | 47 | #### Where does Xcessiv fit in the machine learning process? 48 | 49 | Xcessiv fits in the model building part of the process after data preparation and feature engineering. At this point, there is no universally acknowledged way of determining which algorithm will work best for a particular dataset (see [No Free Lunch Theorem](https://en.wikipedia.org/wiki/No_free_lunch_theorem)), and while heuristic optimization methods do exist, things often break down into trial and error as you try to find the best model-hyperparameter combinations. 50 | 51 | Stacking is an almost surefire method to improve performance beyond that of any single model, however, the complexity of proper implementation often makes it impractical to apply them in practice outside of Kaggle competitions. Xcessiv aims to make the construction of stacked ensembles as painless as possible and lower the barrier for entry. 52 | 53 | #### I don't care about fancy stacked ensembles and what not, should I still use Xcessiv? 54 | 55 | Absolutely! Even without the ensembling functionality, the sheer amount of utility provided by keeping track of the performance of hundreds, and even thousands of ML models and hyperparameter combinations is a huge boon. 56 | 57 | #### How does Xcessiv generate meta-features for stacking? 58 | 59 | You can choose whether to generate meta-features through cross-validation (stacked generalization) or with a holdout set (blending). You can read about these two methods and a lot more about stacked ensembles in the [Kaggle Ensembling Guide](https://mlwave.com/kaggle-ensembling-guide/). It's a great article and provides most of the inspiration for this project. 60 | 61 | ## Contributing 62 | 63 | Xcessiv is in its very early stages and needs the open-source community to guide it along. 64 | 65 | There are many ways to contribute to Xcessiv. You could report a bug, suggest a feature, submit a pull request, improve documentation, and many more. 66 | 67 | If you would like to contribute something, please visit our [Contributor Guidelines](CONTRIBUTING.md). 68 | 69 | ## Project Status 70 | 71 | Xcessiv is currently in alpha and is unstable. Future versions are not guaranteed to be backwards-compatible with current project files. 72 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2 2 | 3 | MAINTAINER Reiichiro Nakano 4 | 5 | RUN pip --no-cache-dir install \ 6 | numpy \ 7 | scipy \ 8 | sklearn \ 9 | pandas \ 10 | SQLAlchemy \ 11 | Flask \ 12 | gevent \ 13 | redis \ 14 | rq \ 15 | six 16 | 17 | RUN pip --no-cache-dir install xcessiv 18 | 19 | RUN mkdir /XcessivProjects 20 | 21 | EXPOSE 1994 22 | 23 | WORKDIR "/XcessivProjects" 24 | 25 | CMD ["xcessiv"] 26 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Using Xcessiv via Docker 2 | 3 | This directory contains a `Dockerfile` for Xcessiv to work regardless of platform. 4 | 5 | ## Install Docker 6 | 7 | The first step is to [install Docker](https://docs.docker.com/installation/) for your operating system. 8 | 9 | ## Steps to use this image 10 | 11 | First, you must run a Redis server that Xcessiv will be able to connect to. You can run the [Redis Docker image](https://hub.docker.com/_/redis/) here if you want. Ensure that your Docker container will be able to communicate with the Redis server by properly configuring [container networking](https://docs.docker.com/engine/userguide/networking/). Additionally, here is a great [StackOverflow post](https://stackoverflow.com/questions/24319662/from-inside-of-a-docker-container-how-do-i-connect-to-the-localhost-of-the-mach) that covers communication from within a Docker container. 12 | 13 | Let's say you've figured that our Docker container will be able to communicate with Redis at `172.17.42.1:6379`. 14 | 15 | To start Xcessiv with Redis at `172.17.42.1:6379`, simply run: 16 | 17 | `$ docker run -P --name='xcessiv' reiinakano/xcessiv xcessiv -H "172.17.42.1" -P 6379` 18 | 19 | The `-P` flag for Docker is used to expose port 1994 to the host, so you can use your web browser to interact with Xcessiv at `localhost:1994`. 20 | 21 | ### Mounting projects folder into container for persistence 22 | 23 | To save any projects you make with Xcessiv, you'll want to mount your own project folder into the Xcessiv Docker container's project folder. To do this, run: 24 | 25 | `$ docker run -P --name='xcessiv' -v /myxcessiv/XcessivProjects/:/XcessivProjects/ reiinakano/xcessiv` 26 | 27 | where `/myxcessiv/XcessivProjects/` is the host directory you want to save projects to. 28 | 29 | ### Using your own configuration file 30 | 31 | To start Xcessiv with your own configuration file, run: 32 | 33 | `$ docker run -P --name='xcessiv' -v /myxcessiv/myconf/config.py:/root/.xcessiv/config.py reiinakano/xcessiv` 34 | 35 | where `/myxcessiv/myconf/` is a local directory containing the `config.py` file you want to use. 36 | 37 | ## Use docker-compose 38 | 39 | The simplest form to start Xcessiv is with docker-compose. This will start Redis, Xcessiv and defines a shared data directory in `./data`. Simply type: 40 | 41 | `$ docker-compose up` 42 | 43 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | 4 | redis: 5 | image: "redis:alpine" 6 | 7 | xcessiv: 8 | image: reiinakano/xcessiv 9 | command: bash -c "xcessiv -H redis -P 6379" 10 | depends_on: 11 | - "redis" 12 | ports: 13 | - "1994:1994" 14 | volumes: 15 | - ./data:/XcessivProjects 16 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Xcessiv 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_static/base_learner_origin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/base_learner_origin.png -------------------------------------------------------------------------------- /docs/_static/baselearner.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/baselearner.gif -------------------------------------------------------------------------------- /docs/_static/create_ensemble.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/create_ensemble.png -------------------------------------------------------------------------------- /docs/_static/create_open_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/create_open_project.png -------------------------------------------------------------------------------- /docs/_static/ensemble.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/ensemble.gif -------------------------------------------------------------------------------- /docs/_static/finalized_base_learner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/finalized_base_learner.png -------------------------------------------------------------------------------- /docs/_static/list_base_learners.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/list_base_learners.png -------------------------------------------------------------------------------- /docs/_static/listbaselearner.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/listbaselearner.gif -------------------------------------------------------------------------------- /docs/_static/main_data_extraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/main_data_extraction.png -------------------------------------------------------------------------------- /docs/_static/meta_feature_extraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/meta_feature_extraction.png -------------------------------------------------------------------------------- /docs/_static/verified_base_learner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/docs/_static/verified_base_learner.png -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Xcessiv documentation build configuration file, created by 4 | # sphinx-quickstart on Sat May 20 16:04:29 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | # import os 20 | # import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosectionlabel'] 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ['_templates'] 37 | 38 | # The suffix(es) of source filenames. 39 | # You can specify multiple suffix as a list of string: 40 | # 41 | # source_suffix = ['.rst', '.md'] 42 | source_suffix = '.rst' 43 | 44 | # The master toctree document. 45 | master_doc = 'index' 46 | 47 | # General information about the project. 48 | project = u'Xcessiv' 49 | copyright = u'2017, Reiichiro Nakano' 50 | author = u'Reiichiro Nakano' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = u'0.1.0' 58 | # The full version, including alpha/beta/rc tags. 59 | release = u'0.1.0' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | # 64 | # This is also used if you do content translation via gettext catalogs. 65 | # Usually you set "language" from the command line for these cases. 66 | language = None 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | # This patterns also effect to html_static_path and html_extra_path 71 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 72 | 73 | # The name of the Pygments (syntax highlighting) style to use. 74 | pygments_style = 'sphinx' 75 | 76 | # If true, `todo` and `todoList` produce output, else they produce nothing. 77 | todo_include_todos = False 78 | 79 | 80 | # -- Options for HTML output ---------------------------------------------- 81 | 82 | # The theme to use for HTML and HTML Help pages. See the documentation for 83 | # a list of builtin themes. 84 | # 85 | html_theme = 'sphinx_rtd_theme' 86 | 87 | # Theme options are theme-specific and customize the look and feel of a theme 88 | # further. For a list of options available for each theme, see the 89 | # documentation. 90 | # 91 | # html_theme_options = {} 92 | 93 | # Add any paths that contain custom static files (such as style sheets) here, 94 | # relative to this directory. They are copied after the builtin static files, 95 | # so a file named "default.css" will overwrite the builtin "default.css". 96 | html_static_path = ['_static'] 97 | 98 | 99 | # -- Options for HTMLHelp output ------------------------------------------ 100 | 101 | # Output file base name for HTML help builder. 102 | htmlhelp_basename = 'Xcessivdoc' 103 | 104 | 105 | # -- Options for LaTeX output --------------------------------------------- 106 | 107 | latex_elements = { 108 | # The paper size ('letterpaper' or 'a4paper'). 109 | # 110 | # 'papersize': 'letterpaper', 111 | 112 | # The font size ('10pt', '11pt' or '12pt'). 113 | # 114 | # 'pointsize': '10pt', 115 | 116 | # Additional stuff for the LaTeX preamble. 117 | # 118 | # 'preamble': '', 119 | 120 | # Latex figure (float) alignment 121 | # 122 | # 'figure_align': 'htbp', 123 | } 124 | 125 | # Grouping the document tree into LaTeX files. List of tuples 126 | # (source start file, target name, title, 127 | # author, documentclass [howto, manual, or own class]). 128 | latex_documents = [ 129 | (master_doc, 'Xcessiv.tex', u'Xcessiv Documentation', 130 | u'Reiichiro Nakano', 'manual'), 131 | ] 132 | 133 | 134 | # -- Options for manual page output --------------------------------------- 135 | 136 | # One entry per manual page. List of tuples 137 | # (source start file, name, description, authors, manual section). 138 | man_pages = [ 139 | (master_doc, 'xcessiv', u'Xcessiv Documentation', 140 | [author], 1) 141 | ] 142 | 143 | 144 | # -- Options for Texinfo output ------------------------------------------- 145 | 146 | # Grouping the document tree into Texinfo files. List of tuples 147 | # (source start file, target name, title, author, 148 | # dir menu entry, description, category) 149 | texinfo_documents = [ 150 | (master_doc, 'Xcessiv', u'Xcessiv Documentation', 151 | author, 'Xcessiv', 'One line description of project.', 152 | 'Miscellaneous'), 153 | ] 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Xcessiv documentation master file, created by 2 | sphinx-quickstart on Sat May 20 16:04:29 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Xcessiv's documentation! 7 | =================================== 8 | 9 | Xcessiv is a web-based application for quick and scalable hyperparameter tuning and stacked ensembling in Python. 10 | 11 | ---------------- 12 | 13 | Features 14 | -------- 15 | 16 | * Fully define your data source, cross-validation process, relevant metrics, and base learners with Python code 17 | * Any model following the Scikit-learn API can be used as a base learner 18 | * Task queue based architecture lets you take full advantage of multiple cores and embarrassingly parallel hyperparameter searches 19 | * Direct integration with `TPOT `_ for automated pipeline construction 20 | * Automated hyperparameter search through Bayesian optimization 21 | * Easy management and comparison of hundreds of different model-hyperparameter combinations 22 | * Automatic saving of generated secondary meta-features 23 | * Stacked ensemble creation in a few clicks 24 | * Automated ensemble construction through greedy forward model selection 25 | * Export your stacked ensemble as a standalone Python file to support multiple levels of stacking 26 | 27 | ---------------- 28 | 29 | Define your base learners and performance metrics 30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | .. image:: _static/baselearner.gif 33 | :align: center 34 | :alt: Base learner gif 35 | 36 | ---------------- 37 | 38 | Keep track of hundreds of different model-hyperparameter combinations 39 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 40 | 41 | .. image:: _static/listbaselearner.gif 42 | :align: center 43 | :alt: List base learner gif 44 | 45 | ---------------- 46 | 47 | Effortlessly choose your base learners and create stacked ensembles 48 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 49 | 50 | .. image:: _static/ensemble.gif 51 | :align: center 52 | :alt: Ensemble gif 53 | 54 | ---------------- 55 | 56 | Contents 57 | -------- 58 | 59 | .. toctree:: 60 | :maxdepth: 2 61 | :name: mastertoc 62 | 63 | installation 64 | walkthrough 65 | advanced 66 | thirdparty 67 | 68 | 69 | Indices and tables 70 | ================== 71 | 72 | * :ref:`genindex` 73 | * :ref:`modindex` 74 | * :ref:`search` 75 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation and Configuration 2 | ============================== 3 | 4 | Xcessiv is currently tested on Python ``2.7`` and Python ``3.5``. 5 | 6 | .. admonition:: A note about Windows 7 | 8 | Because of its dependency on RQ, Xcessiv does not natively support Windows. At the moment, the only alternative for Windows users to run Xcessiv is to use Docker. See :ref:`Installation through Docker` for details. 9 | 10 | Installing and running Redis 11 | ---------------------------- 12 | 13 | For Xcessiv to work properly, it must be able to access a running Redis server. 14 | 15 | Instructions for installing and running Redis are OS dependent and can be found at https://redis.io/topics/quickstart. 16 | 17 | Make sure to take note of the port at which Redis is running, especially if it is not running at the default Redis port 6379. 18 | 19 | Installing Xcessiv 20 | ------------------ 21 | 22 | Installing with Pip 23 | ~~~~~~~~~~~~~~~~~~~ 24 | 25 | The easiest and recommended way to install Xcessiv is to use pip:: 26 | 27 | pip install xcessiv 28 | 29 | Installing from source 30 | ~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | If you want to install the latest version of Xcessiv from the master branch, you need some extra JavaScript tools to build the ReactJS application. 33 | 34 | First, you need to `install Node>=6 `_ and `Create React App `_. 35 | 36 | Then, run the following commands to clone the Xcessiv master branch and build and install Xcessiv.:: 37 | 38 | git clone https://github.com/reiinakano/xcessiv.git 39 | cd xcessiv/xcessiv/ui 40 | npm run build 41 | cd .. 42 | cd .. 43 | python setup.py install 44 | 45 | Installation through Docker 46 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 47 | 48 | An alternative way to run Xcessiv is to run the server inside a Docker container. At the moment, this is the only alternative for Windows users to run Xcessiv. 49 | 50 | There is a full guide for using Docker to run Xcessiv `here `_. 51 | 52 | Configuration 53 | ------------- 54 | 55 | To configure Xcessiv outside the default settings, create a Python file at ``{HOME_FOLDER}/.xcessiv/config.py``. Here are the parameters (at their default values) you can copy / paste in that configuration module.:: 56 | 57 | #--------------------------------------------------- 58 | # Xcessiv config 59 | #--------------------------------------------------- 60 | REDIS_HOST = 'localhost' # Host address of Redis server 61 | REDIS_PORT = 6379 # Port of Redis Server 62 | REDIS_DB = 8 # Redis database number to use 63 | 64 | XCESSIV_PORT = 1994 # Port at which to start the Xcessiv server 65 | NUM_WORKERS = 1 # Number of RQ workers to start 66 | 67 | Please note that aside from this configuration file, another way to configure Xcessiv is to directly pass the parameters when starting Xcessiv from the command line. In this case, the configuration variables passed through the command line overrides the the configuration found in ``config.py``. See :ref:`Starting Xcessiv` for details. 68 | -------------------------------------------------------------------------------- /docs/thirdparty.rst: -------------------------------------------------------------------------------- 1 | Xcessiv and Third Party Libraries 2 | ================================= 3 | 4 | Xcessiv provides an extremely flexible framework for experimentation with your own algorithms. Anything you can dream of can be used, as long as they conform to the **scikit-learn** interface. 5 | 6 | Here are a few example workflows using third party libraries that work well with Xcessiv. 7 | 8 | --------------------------- 9 | 10 | Xcessiv with TPOT 11 | ----------------- 12 | 13 | .. admonition:: Note 14 | 15 | As of v0.4, Xcessiv now provides direct integration with TPOT. View :ref:`TPOT base learner construction` for details. This section is kept here to demonstrate the power of stacking together different TPOT pipelines. 16 | 17 | Xcessiv is a great tool for tuning different models and pipelines and stacking them into one big ensemble, but with all the possible combinations of pipelines, where would you even begin? 18 | 19 | Enter TPOT. 20 | 21 | TPOT is `a Python tool that automatically creates and optimizes machine learning pipelines using genetic programming `_. 22 | 23 | TPOT will automatically try out hundreds of different machine learning pipelines and pick out the best one it finds. You can then export it as source code that contains a :class:`sklearn.pipeline.Pipeline` object. From there, you can just add your curated and tuned pipeline as a new base learner type, ready for even further improvement from stacking. 24 | 25 | In this example, we'll be using the `Hill-Valley dataset with noise `_ from the UCI Machine Learning Repository. To load it into Xcessiv, we'll use a neat little Python wrapper called `pmlb `_. Start by installing pmlb:: 26 | 27 | pip install pmlb 28 | 29 | Now, start a new Xcessiv project and let's dive in. 30 | 31 | Set up data entry and cross-validation into Xcessiv 32 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 33 | 34 | Copy the following code into the **Main Dataset Extraction** code block.:: 35 | 36 | from pmlb import fetch_data 37 | 38 | def extract_main_dataset(): 39 | return fetch_data('Hill_Valley_with_noise', local_cache_dir='./', return_X_y=True) 40 | 41 | pmlb has a nice interface that lets you extract datasets in a **scikit-learn** format very easily. You can change the argument to ``local_cache_dir`` above to any directory where you want to store the dataset. This way, the dataset is only downloaded the first time :func:`extract_main_dataset` is run. 42 | 43 | Since the dataset is rather small, we'll use cross-validation. For both **Base learner Cross-validation** and **Stacked Ensemble Cross-validation**, copy the following code.:: 44 | 45 | from sklearn.model_selection import KFold 46 | 47 | def return_splits_iterable(X, y): 48 | """This function returns an iterable that splits the given dataset 49 | K times into different train-test splits. 50 | """ 51 | RANDOM_STATE = 8 52 | N_SPLITS = 5 53 | SHUFFLE = True 54 | 55 | return KFold(n_splits=N_SPLITS, random_state=RANDOM_STATE, shuffle=SHUFFLE).split(X, y) 56 | 57 | Run TPOT to get an optimized pipeline 58 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 59 | 60 | Open up your favorite place to run Python code (I used Jupyter notebook) and run the following TPOT code.:: 61 | 62 | from pmlb import fetch_data 63 | from tpot import TPOTClassifier 64 | 65 | X, y = fetch_data('Hill_Valley_with_noise', local_cache_dir='./', return_X_y=True) 66 | tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, n_jobs=-1) 67 | tpot.fit(X, y) 68 | 69 | tpot.export('tpot_1.py') 70 | 71 | This snippet will run the TPOT algorithm on the Hill valley with noise dataset and automatically find an optimal pipeline. Then, it will export the found pipeline as Python code in ``tpot_1.py``. 72 | 73 | Note that this could take a while. On my computer, it took around 30 minutes to an hour to run. If you want, you can just skip this part since the pipelines I found will be available in this documentation anyway. 74 | 75 | .. admonition:: Note 76 | 77 | Note that the TPOT algorithm is stochastic, so different runs will probably result in different pipelines found. It might be best to set the ``random_state`` parameter in :class:`TPOTClassifier` for reproducibility. This randomness is a good thing, because stacking works best when very different base learners are used. 78 | 79 | Once the algorithm is finished running, open up ``tpot_1.py`` and you should see something like the following code.:: 80 | 81 | import numpy as np 82 | 83 | from sklearn.ensemble import ExtraTreesClassifier 84 | from sklearn.model_selection import train_test_split 85 | from sklearn.pipeline import make_pipeline 86 | from sklearn.preprocessing import Normalizer 87 | 88 | # NOTE: Make sure that the class is labeled 'class' in the data file 89 | tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) 90 | features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) 91 | training_features, testing_features, training_classes, testing_classes = \ 92 | train_test_split(features, tpot_data['class'], random_state=42) 93 | 94 | exported_pipeline = make_pipeline( 95 | Normalizer(norm="max"), 96 | ExtraTreesClassifier(bootstrap=False, criterion="entropy", max_features=0.15, min_samples_leaf=7, min_samples_split=13, n_estimators=100) 97 | ) 98 | 99 | exported_pipeline.fit(training_features, training_classes) 100 | results = exported_pipeline.predict(testing_features) 101 | 102 | You can see that our exported pipeline is in the variable ``exported_pipeline``. This is actually the only part of the code we need to add into Xcessiv. 103 | 104 | Adding TPOT Pipelines to Xcessiv 105 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 106 | 107 | Create a new base learner setup and copy the following code into Xcessiv.:: 108 | 109 | from sklearn.ensemble import ExtraTreesClassifier 110 | from sklearn.model_selection import train_test_split 111 | from sklearn.pipeline import make_pipeline 112 | from sklearn.preprocessing import Normalizer 113 | 114 | base_learner = make_pipeline( 115 | Normalizer(norm="max"), 116 | ExtraTreesClassifier(bootstrap=False, criterion="entropy", max_features=0.15, min_samples_leaf=7, min_samples_split=13, n_estimators=100, random_state=8) 117 | ) 118 | 119 | This is a stripped down version of the code in ``tpot_1.py``, with only the part we need. Notice two changes: we renamed ``exported_pipeline`` to ``base_learner`` to follow the Xcessiv format, and set the ``random_state`` parameter in the :class:`sklearn.ensemble.ExtraTreesClassifier` object to 8 for determinism. 120 | 121 | Name your base learner "TPOT 1", set ``predict_proba`` as the meta-feature generator, and add the following preset metrics: **Accuracy from Scores/Probabilities**, **Recall from Scores/Probabilities**, **Precision from Scores/Probabilities**, **F1 Score from Scores/Probabilities**, and **AUC from Scores/Probabilities**. 122 | 123 | Since the hill-valley dataset is binary, verify and finalize your base learner on the breast cancer dataset. 124 | 125 | Keep in mind that the pipeline returned by TPOT has already been tuned, so there isn't much need to tune it now. Feel free to do so, though. It's very easy to do this in Xcessiv. For this case, let's just create a single new base learner with default hyperparameters. You should get a pretty good accuracy of about 0.9868. 126 | 127 | As mentioned earlier, different runs of TPOT will probably produce different results. I ran the script two more times, this time with different random seeds set. For a random state of 10, TPOT produced the following pipeline (stripped down to Xcessiv format).:: 128 | 129 | from copy import copy 130 | from sklearn.ensemble import VotingClassifier 131 | from sklearn.model_selection import train_test_split 132 | from sklearn.pipeline import make_pipeline, make_union 133 | from sklearn.preprocessing import FunctionTransformer 134 | from sklearn.svm import LinearSVC 135 | 136 | base_learner = make_pipeline( 137 | make_union(VotingClassifier([("est", LinearSVC(C=5.0, loss="hinge", tol=0.0001, random_state=8))]), FunctionTransformer(copy)), 138 | LinearSVC(C=0.0001, random_state=8, loss="squared_hinge") 139 | ) 140 | 141 | This combination of Linear SVCs and a VotingClassifier gets an accuracy of about 0.9612. 142 | 143 | For a random state of 242, the following stripped down pipeline is produced.:: 144 | 145 | from sklearn.model_selection import train_test_split 146 | from sklearn.neighbors import KNeighborsClassifier 147 | from sklearn.pipeline import make_pipeline 148 | from sklearn.preprocessing import Normalizer 149 | 150 | base_learner = make_pipeline( 151 | Normalizer(norm="l1"), 152 | KNeighborsClassifier(n_neighbors=22, p=1) 153 | ) 154 | 155 | This pipeline gets an accuracy of 0.9876, our highest so far. 156 | 157 | Stacking TPOT Pipelines together 158 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 159 | 160 | Once they're in Xcessiv, TPOT pipelines are just regular base learners you can tune or stack. For now, we've got three high-performing base learners with rather different decision models i.e. a tree-based model, a linear SVM, and a nearest neighbors classifier. These should be ideal to stack together. 161 | 162 | Create and finalize a preset Logistic Regression base learner. We'll use this to stack the base learners together. 163 | 164 | Let's begin by stacking together the two highest performers, the ExtraTreesClassifier and the KNeighborsClassifier without the original features. Right off the bat, cross-validating on the secondary meta-features yields an accuracy of 0.9975. 165 | 166 | Going further, let's see if adding the less effective (on its own) Linear SVM will prove useful to our small ensemble. Running it, we get an even better 0.9992 accuracy. 167 | 168 | It seems that seeing how the Linear SVM looks at the problem lets our Logistic Regression meta-learner further improve its own understanding of the data. 169 | 170 | Quoting top Kaggler Marios Michailidis: 171 | 172 | Sometimes it is useful to allow XGBoost to see what a KNN-classifier sees. 173 | 174 | And that's it for our guide to using TPOT in Xcessiv. There's loads more you can try if you want to push up model performance even more. For instance, why not see if a TPOT pipeline as your secondary learner will work better? Or try experimenting with adding the original features appended to the meta-features. Xcessiv is built for this kind of crazy exploration. Go get those accuracies up as high as you can! 175 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bayesian-optimization>=0.4.0 2 | Flask>=0.11 3 | gevent>=1.1 4 | numpy>=1.12 5 | redis>=2.10 6 | rq>=0.7 7 | scikit-learn>=0.18 8 | scipy>=0.18 9 | six>=1.10 10 | SQLAlchemy>=1.1 11 | TPOT>=0.8 12 | -------------------------------------------------------------------------------- /runtestserver.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | from xcessiv import app 3 | import sys 4 | 5 | 6 | if __name__ == '__main__': 7 | port = 8080 if len(sys.argv) < 2 else sys.argv[1] 8 | app.run(debug=True, port=int(port), host='0.0.0.0') 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from setuptools import setup, find_packages 3 | from setuptools.command.test import test as TestCommand 4 | import io 5 | import os 6 | import sys 7 | 8 | here = os.path.abspath(os.path.dirname(__file__)) 9 | 10 | 11 | def read(*filenames, **kwargs): 12 | encoding = kwargs.get('encoding', 'utf-8') 13 | sep = kwargs.get('sep', '\n') 14 | buf = [] 15 | for filename in filenames: 16 | with io.open(filename, encoding=encoding) as f: 17 | buf.append(f.read()) 18 | return sep.join(buf) 19 | 20 | long_description = read('README.md') 21 | 22 | 23 | class PyTest(TestCommand): 24 | def finalize_options(self): 25 | TestCommand.finalize_options(self) 26 | self.test_args = [] 27 | self.test_suite = True 28 | 29 | def run_tests(self): 30 | import pytest 31 | errcode = pytest.main(self.test_args) 32 | sys.exit(errcode) 33 | 34 | setup( 35 | name='xcessiv', 36 | version='0.5.1', 37 | url='https://github.com/reiinakano/xcessiv', 38 | license='Apache License 2.0', 39 | author='Reiichiro Nakano', 40 | tests_require=['pytest'], 41 | install_requires=[ 42 | 'bayesian-optimization', 43 | 'Flask>=0.11.0', 44 | 'gevent>=1.1.0', 45 | 'numpy>=1.12.0', 46 | 'redis>=2.10.0', 47 | 'rq>=0.7.0', 48 | 'scikit-learn>=0.18.0', 49 | 'scipy>=0.18.0', 50 | 'six>=1.10.0', 51 | 'SQLAlchemy>=1.1.0', 52 | 'TPOT>=0.8' 53 | ], 54 | cmdclass={'test': PyTest}, 55 | author_email='reiichiro.s.nakano@gmail.com', 56 | description='A web-based application for quick and ' 57 | 'scalable construction of massive machine learning ensembles.', 58 | long_description=long_description, 59 | packages=find_packages(), 60 | include_package_data=True, 61 | platforms='any', 62 | test_suite='xcessiv.tests', 63 | classifiers = [ 64 | 'Programming Language :: Python', 65 | 'Programming Language :: Python :: 2', 66 | 'Programming Language :: Python :: 2.7', 67 | 'Programming Language :: Python :: 3', 68 | 'Programming Language :: Python :: 3.5', 69 | 'Programming Language :: JavaScript', 70 | 'Natural Language :: English', 71 | 'Intended Audience :: Developers', 72 | 'Intended Audience :: Science/Research', 73 | 'License :: OSI Approved :: Apache Software License', 74 | 'Operating System :: Unix', 75 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 76 | ], 77 | extras_require={ 78 | 'testing': ['pytest'], 79 | }, 80 | entry_points={ 81 | 'console_scripts': [ 82 | 'xcessiv = xcessiv.scripts.runapp:main' 83 | ] 84 | } 85 | ) 86 | -------------------------------------------------------------------------------- /xcessiv/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | from flask import Flask 3 | 4 | 5 | __version__ = '0.5.1' 6 | 7 | 8 | app = Flask(__name__, static_url_path='/static', static_folder='ui/build/static') 9 | app.config.from_object('xcessiv.config') 10 | 11 | 12 | import xcessiv.views 13 | -------------------------------------------------------------------------------- /xcessiv/config.py: -------------------------------------------------------------------------------- 1 | REDIS_HOST = 'localhost' 2 | REDIS_PORT = 6379 3 | REDIS_DB = 8 4 | QUEUES = ['default'] 5 | 6 | XCESSIV_PORT = 1994 7 | NUM_WORKERS = 1 8 | 9 | XCESSIV_META_FEATURES_FOLDER = 'meta-features' 10 | XCESSIV_NOTEBOOK_NAME = 'xcnb.db' 11 | -------------------------------------------------------------------------------- /xcessiv/constants.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | from xcessiv.presets.cvsetting import k_fold 3 | 4 | 5 | extraction_default_source = """\"\"\"In this code block, you must define the function `extract_{}_dataset`. 6 | `extract_{}_dataset` must take no arguments and return a tuple (X, y), where X is a 7 | Numpy array with shape (n_samples, n_features) corresponding to the features of your 8 | {} dataset and y is the Numpy array corresponding to the ground truth labels of each 9 | sample. 10 | \"\"\" 11 | 12 | def extract_{}_dataset(): 13 | return [[1, 2], [2, 1]], [0, 1] 14 | """ 15 | 16 | meta_feature_generation_default_source = k_fold['source'] 17 | 18 | DEFAULT_EXTRACTION_MAIN_DATASET = { 19 | "source": extraction_default_source.format('main', 'main', 'main', 'main') 20 | } 21 | DEFAULT_EXTRACTION_TEST_DATASET = { 22 | "method": None, 23 | "source": extraction_default_source.format('test', 'test', 'test', 'test') 24 | } 25 | DEFAULT_EXTRACTION_META_FEATURE_GENERATION = { 26 | "source": meta_feature_generation_default_source 27 | } 28 | 29 | tpot_learner_docstring = '''""" 30 | The following code is directly from the TPOT learner's `export` function. 31 | You must modify it in order to conform to the Xcessiv format. 32 | The only relevant lines are the lines required to define `exported_pipeline`. 33 | You may remove all others and rename `exported_pipeline` to `base_learner`. 34 | """ 35 | ''' 36 | -------------------------------------------------------------------------------- /xcessiv/exceptions.py: -------------------------------------------------------------------------------- 1 | """This module contains specific exceptions to be handled by Flask""" 2 | 3 | 4 | class UserError(Exception): 5 | def __init__(self, message, status_code=400, **kwargs): 6 | super(UserError, self).__init__(self) 7 | self.message = message 8 | self.status_code = status_code 9 | self.kwargs = kwargs 10 | 11 | def to_dict(self): 12 | rv = dict(self.kwargs or ()) 13 | rv['message'] = self.message 14 | return rv 15 | 16 | def __repr__(self): 17 | return self.message 18 | -------------------------------------------------------------------------------- /xcessiv/functions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import imp 3 | import sys 4 | import os 5 | import hashlib 6 | import json 7 | import numpy as np 8 | from sqlalchemy import create_engine 9 | from sqlalchemy.orm import Session 10 | from six import exec_, iteritems 11 | from sklearn import datasets 12 | from sklearn import model_selection 13 | from xcessiv import app, exceptions 14 | 15 | 16 | def hash_file(path, block_size=65536): 17 | """Returns SHA256 checksum of a file 18 | 19 | Args: 20 | path (string): Absolute file path of file to hash 21 | 22 | block_size (int, optional): Number of bytes to read per block 23 | """ 24 | sha256 = hashlib.sha256() 25 | with open(path, 'rb') as f: 26 | for block in iter(lambda: f.read(block_size), b''): 27 | sha256.update(block) 28 | return sha256.hexdigest() 29 | 30 | 31 | def hash_string(string): 32 | """Hashes an input string using SHA256""" 33 | return hashlib.sha256(string).hexdigest() 34 | 35 | 36 | def import_object_from_path(path, object): 37 | """Used to import an object from an absolute path. 38 | 39 | This function takes an absolute path and imports it as a Python module. 40 | It then returns the object with name `object` from the imported module. 41 | 42 | Args: 43 | path (string): Absolute file path of .py file to import 44 | 45 | object (string): Name of object to extract from imported module 46 | """ 47 | with open(path) as f: 48 | return import_object_from_string_code(f.read(), object) 49 | 50 | 51 | def import_object_from_string_code(code, object): 52 | """Used to import an object from arbitrary passed code. 53 | 54 | Passed in code is treated as a module and is imported and added 55 | to `sys.modules` with its SHA256 hash as key. 56 | 57 | Args: 58 | code (string): Python code to import as module 59 | 60 | object (string): Name of object to extract from imported module 61 | """ 62 | sha256 = hashlib.sha256(code.encode('UTF-8')).hexdigest() 63 | module = imp.new_module(sha256) 64 | try: 65 | exec_(code, module.__dict__) 66 | except Exception as e: 67 | raise exceptions.UserError('User code exception', exception_message=str(e)) 68 | sys.modules[sha256] = module 69 | try: 70 | return getattr(module, object) 71 | except AttributeError: 72 | raise exceptions.UserError("{} not found in code".format(object)) 73 | 74 | 75 | def import_string_code_as_module(code): 76 | """Used to run arbitrary passed code as a module 77 | 78 | Args: 79 | code (string): Python code to import as module 80 | 81 | Returns: 82 | module: Python module 83 | """ 84 | sha256 = hashlib.sha256(code.encode('UTF-8')).hexdigest() 85 | module = imp.new_module(sha256) 86 | try: 87 | exec_(code, module.__dict__) 88 | except Exception as e: 89 | raise exceptions.UserError('User code exception', exception_message=str(e)) 90 | sys.modules[sha256] = module 91 | return module 92 | 93 | 94 | def verify_dataset(X, y): 95 | """Verifies if a dataset is valid for use i.e. scikit-learn format 96 | 97 | Used to verify a dataset by returning shape and basic statistics of 98 | returned data. This will also provide quick and dirty check on 99 | capability of host machine to process the data. 100 | 101 | Args: 102 | X (array-like): Features array 103 | 104 | y (array-like): Label array 105 | 106 | Returns: 107 | X_shape (2-tuple of int): Shape of X returned 108 | 109 | y_shape (1-tuple of int): Shape of y returned 110 | 111 | Raises: 112 | AssertionError: `X_shape` must be of length 2 and `y_shape` must be of 113 | length 1. `X` must have the same number of elements as `y` 114 | i.e. X_shape[0] == y_shape[0]. If any of these conditions are not met, 115 | an AssertionError is raised. 116 | """ 117 | X_shape, y_shape = np.array(X).shape, np.array(y).shape 118 | if len(X_shape) != 2: 119 | raise exceptions.UserError("X must be 2-dimensional array") 120 | if len(y_shape) != 1: 121 | raise exceptions.UserError("y must be 1-dimensional array") 122 | if X_shape[0] != y_shape[0]: 123 | raise exceptions.UserError("X must have same number of elements as y") 124 | return dict( 125 | features_shape=X_shape, 126 | labels_shape=y_shape 127 | ) 128 | 129 | 130 | def is_valid_json(x): 131 | """Returns true if x can be JSON serialized 132 | 133 | Args: 134 | x: Object to test 135 | """ 136 | try: 137 | json.dumps(x) 138 | return True 139 | except TypeError: 140 | return False 141 | 142 | 143 | def make_serializable(json): 144 | """This function ensures that the dictionary is JSON serializable. If not, 145 | keys with non-serializable values are removed from the return value. 146 | 147 | Args: 148 | json (dict): Dictionary to convert to serializable 149 | 150 | Returns: 151 | new_dict (dict): New dictionary with non JSON serializable values removed 152 | """ 153 | new_dict = dict() 154 | for key, value in iteritems(json): 155 | if is_valid_json(value): 156 | new_dict[key] = value 157 | 158 | return new_dict 159 | 160 | 161 | def get_sample_dataset(dataset_properties): 162 | """Returns sample dataset 163 | 164 | Args: 165 | dataset_properties (dict): Dictionary corresponding to the properties of the dataset 166 | used to verify the estimator and metric generators. 167 | 168 | Returns: 169 | X (array-like): Features array 170 | 171 | y (array-like): Labels array 172 | 173 | splits (iterator): This is an iterator that returns train test splits for 174 | cross-validation purposes on ``X`` and ``y``. 175 | """ 176 | kwargs = dataset_properties.copy() 177 | data_type = kwargs.pop('type') 178 | if data_type == 'multiclass': 179 | try: 180 | X, y = datasets.make_classification(random_state=8, **kwargs) 181 | splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) 182 | except Exception as e: 183 | raise exceptions.UserError(repr(e)) 184 | elif data_type == 'iris': 185 | X, y = datasets.load_iris(return_X_y=True) 186 | splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) 187 | elif data_type == 'mnist': 188 | X, y = datasets.load_digits(return_X_y=True) 189 | splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) 190 | elif data_type == 'breast_cancer': 191 | X, y = datasets.load_breast_cancer(return_X_y=True) 192 | splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) 193 | elif data_type == 'boston': 194 | X, y = datasets.load_boston(return_X_y=True) 195 | splits = model_selection.KFold(n_splits=2, random_state=8).split(X) 196 | elif data_type == 'diabetes': 197 | X, y = datasets.load_diabetes(return_X_y=True) 198 | splits = model_selection.KFold(n_splits=2, random_state=8).split(X) 199 | else: 200 | raise exceptions.UserError('Unknown dataset type {}'.format(dataset_properties['type'])) 201 | return X, y, splits 202 | 203 | 204 | def verify_estimator_class(est, meta_feature_generator, metric_generators, dataset_properties): 205 | """Verify if estimator object is valid for use i.e. scikit-learn format 206 | 207 | Verifies if an estimator is fit for use by testing for existence of methods 208 | such as `get_params` and `set_params`. Must also be able to properly fit on 209 | and predict a sample iris dataset. 210 | 211 | Args: 212 | est: Estimator object with `fit`, `predict`/`predict_proba`, 213 | `get_params`, and `set_params` methods. 214 | 215 | meta_feature_generator (str, unicode): Name of the method used by the estimator 216 | to generate meta-features on a set of data. 217 | 218 | metric_generators (dict): Dictionary of key value pairs where the key 219 | signifies the name of the metric calculated and the value is a list 220 | of strings, when concatenated, form Python code containing the 221 | function used to calculate the metric from true values and the 222 | meta-features generated. 223 | 224 | dataset_properties (dict): Dictionary corresponding to the properties of the dataset 225 | used to verify the estimator and metric generators. 226 | 227 | Returns: 228 | performance_dict (mapping): Mapping from performance metric 229 | name to performance metric value e.g. "Accuracy": 0.963 230 | 231 | hyperparameters (mapping): Mapping from the estimator's hyperparameters to 232 | their default values e.g. "n_estimators": 10 233 | """ 234 | X, y, splits = get_sample_dataset(dataset_properties) 235 | 236 | if not hasattr(est, "get_params"): 237 | raise exceptions.UserError('Estimator does not have get_params method') 238 | if not hasattr(est, "set_params"): 239 | raise exceptions.UserError('Estimator does not have set_params method') 240 | if not hasattr(est, meta_feature_generator): 241 | raise exceptions.UserError('Estimator does not have meta-feature generator' 242 | ' {}'.format(meta_feature_generator)) 243 | 244 | performance_dict = dict() 245 | 246 | true_labels = [] 247 | preds = [] 248 | 249 | try: 250 | for train_index, test_index in splits: 251 | X_train, X_test = X[train_index], X[test_index] 252 | y_train, y_test = y[train_index], y[test_index] 253 | est.fit(X_train, y_train) 254 | true_labels.append(y_test) 255 | preds.append(getattr(est, meta_feature_generator)(X_test)) 256 | true_labels = np.concatenate(true_labels) 257 | preds = np.concatenate(preds, axis=0) 258 | except Exception as e: 259 | raise exceptions.UserError(repr(e)) 260 | 261 | if preds.shape[0] != true_labels.shape[0]: 262 | raise exceptions.UserError('Estimator\'s meta-feature generator ' 263 | 'does not produce valid shape') 264 | 265 | for key in metric_generators: 266 | metric_generator = import_object_from_string_code( 267 | metric_generators[key], 268 | 'metric_generator' 269 | ) 270 | try: 271 | performance_dict[key] = metric_generator(true_labels, preds) 272 | except Exception as e: 273 | raise exceptions.UserError(repr(e)) 274 | 275 | return performance_dict, make_serializable(est.get_params()) 276 | 277 | 278 | def get_path_from_query_string(req): 279 | """Gets path from query string 280 | 281 | Args: 282 | req (flask.request): Request object from Flask 283 | 284 | Returns: 285 | path (str): Value of "path" parameter from query string 286 | 287 | Raises: 288 | exceptions.UserError: If "path" is not found in query string 289 | """ 290 | if req.args.get('path') is None: 291 | raise exceptions.UserError('Path not found in query string') 292 | return req.args.get('path') 293 | 294 | 295 | class DBContextManager(): 296 | """Use this context manager to automatically start and close a database session 297 | 298 | Examples: 299 | >>> with DBContextManager('ProjectFolder') as session: 300 | >>> # Do stuff with session 301 | """ 302 | def __init__(self, path): 303 | """Initialize context manager 304 | 305 | Args: 306 | path (str, unicode): Path to project folder 307 | """ 308 | self.path = os.path.join(path, app.config['XCESSIV_NOTEBOOK_NAME']) 309 | 310 | def __enter__(self): 311 | if not os.path.exists(self.path): 312 | raise exceptions.UserError('{} does not exist'.format(self.path)) 313 | sqlite_url = 'sqlite:///{}'.format(self.path) 314 | engine = create_engine(sqlite_url) 315 | 316 | self.session = Session(bind=engine) 317 | 318 | return self.session 319 | 320 | def __exit__(self, exc_type, exc_val, exc_tb): 321 | if hasattr(self, 'session'): 322 | if exc_type is not None: 323 | self.session.rollback() 324 | self.session.close() 325 | return False # re-raise any exception 326 | -------------------------------------------------------------------------------- /xcessiv/presets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/xcessiv/presets/__init__.py -------------------------------------------------------------------------------- /xcessiv/presets/cvsetting.py: -------------------------------------------------------------------------------- 1 | """This module contains preset settings for defining cross-validation iterators""" 2 | from __future__ import absolute_import, print_function, division, unicode_literals 3 | 4 | 5 | __all__ = [ 6 | 'k_fold', 7 | 'stratified_k_fold', 8 | 'shuffle_split', 9 | 'stratified_shuffle_split', 10 | 'leave_one_out', 11 | 'leave_p_out', 12 | 'group_k_fold', 13 | 'time_series_split' 14 | ] 15 | 16 | k_fold = { 17 | 'name': 'K-fold Cross Validation', 18 | 'source': 19 | """from sklearn.model_selection import KFold 20 | 21 | def return_splits_iterable(X, y): 22 | \"\"\"This function returns an iterable that splits the given dataset 23 | K times into different train-test splits. 24 | \"\"\" 25 | RANDOM_STATE = 8 26 | N_SPLITS = 5 27 | SHUFFLE = True 28 | 29 | return KFold(n_splits=N_SPLITS, random_state=RANDOM_STATE, shuffle=SHUFFLE).split(X, y) 30 | """ 31 | } 32 | 33 | stratified_k_fold = { 34 | 'name': 'Stratified K-fold Cross Validation', 35 | 'source': 36 | """from sklearn.model_selection import StratifiedKFold 37 | 38 | def return_splits_iterable(X, y): 39 | \"\"\"This function returns an iterable that splits the given dataset 40 | K times into different stratified train-test splits. 41 | \"\"\" 42 | RANDOM_STATE = 8 43 | N_SPLITS = 5 44 | SHUFFLE = True 45 | 46 | return StratifiedKFold(n_splits=N_SPLITS, random_state=RANDOM_STATE, shuffle=SHUFFLE).split(X, y) 47 | """ 48 | } 49 | 50 | shuffle_split = { 51 | 'name': 'Shuffle Split', 52 | 'source': 53 | """from sklearn.model_selection import ShuffleSplit 54 | 55 | def return_splits_iterable(X, y): 56 | \"\"\"This function returns an iterable that splits the given dataset 57 | randomly into different train-test splits. For applications where you 58 | only need a single train-test split (large datasets), you can do this 59 | by setting `n_splits` to 1. 60 | \"\"\" 61 | RANDOM_STATE = 8 62 | N_SPLITS = 1 63 | TEST_SIZE = 0.25 64 | TRAIN_SIZE = None 65 | 66 | return ShuffleSplit(n_splits=N_SPLITS, random_state=RANDOM_STATE, 67 | test_size=TEST_SIZE, train_size=TRAIN_SIZE).split(X, y) 68 | """ 69 | } 70 | 71 | stratified_shuffle_split = { 72 | 'name': 'Stratified Shuffle Split', 73 | 'source': 74 | """from sklearn.model_selection import StratifiedShuffleSplit 75 | 76 | def return_splits_iterable(X, y): 77 | \"\"\"This function returns an iterable that splits the given dataset 78 | randomly into different stratified train-test splits. For applications where you 79 | only need a single train-test split (large datasets), you can do this 80 | by setting `n_splits` to 1. 81 | \"\"\" 82 | RANDOM_STATE = 8 83 | N_SPLITS = 1 84 | TEST_SIZE = 0.25 85 | TRAIN_SIZE = None 86 | 87 | return StratifiedShuffleSplit(n_splits=N_SPLITS, random_state=RANDOM_STATE, 88 | test_size=TEST_SIZE, train_size=TRAIN_SIZE).split(X, y) 89 | """ 90 | } 91 | 92 | 93 | leave_one_out = { 94 | 'name': 'Leave One Out', 95 | 'source': 96 | """from sklearn.model_selection import LeaveOneOut 97 | 98 | def return_splits_iterable(X, y): 99 | \"\"\" This function returns an iterable that splits the dataset, where 100 | train dataset is n-1, leaving only one out to test. 101 | \"\"\" 102 | 103 | return LeaveOneOut().split(X, y) 104 | """ 105 | } 106 | 107 | leave_p_out = { 108 | 'name': 'Leave P Out', 109 | 'source': 110 | """from sklearn.model_selection import LeavePOut 111 | 112 | def return_splits_iterable(X, y): 113 | \"\"\" This function returns an iterable that splits the dataset, where 114 | train dataset is n-p, leaving p out to test. 115 | \"\"\" 116 | P=2 117 | 118 | return LeavePOut(p=P).split(X, y) 119 | """ 120 | } 121 | 122 | group_k_fold = { 123 | 'name': 'Group K-fold', 124 | 'source': 125 | """from sklearn.model_selection import GroupKFold 126 | 127 | def return_splits_iterable(X, y): 128 | \"\"\" This function returns a K-fold iterator variant 129 | with non-overlapping groups. The number of distict groups has to be at least 130 | equal to the number of folds 131 | \"\"\" 132 | 133 | N_SPLITS = 3 134 | 135 | GROUPS = None # You must fill this with your own group labels 136 | 137 | return GroupKFold(n_splits=N_SPLITS).split(X, y, groups=GROUPS) 138 | """ 139 | } 140 | 141 | time_series_split = { 142 | 'name': 'Time Series Split', 143 | 'source': 144 | """from sklearn.model_selection import TimeSeriesSplit 145 | 146 | def return_splits_iterable(X, y): 147 | \"\"\" This function is a variation of Kfold where it splits 148 | time-series dataset at fixed time intervals. 149 | \"\"\" 150 | 151 | N_SPLITS = 3 152 | 153 | return TimeSeriesSplit(n_splits=N_SPLITS).split(X, y) 154 | """ 155 | } 156 | 157 | -------------------------------------------------------------------------------- /xcessiv/presets/learnersetting.py: -------------------------------------------------------------------------------- 1 | """This module contains preset settings for defining base learners""" 2 | from __future__ import absolute_import, print_function, division, unicode_literals 3 | from xcessiv.presets import learnersource 4 | 5 | __all__ = [ 6 | 7 | # Classifiers 8 | 'sklearn_random_forest_classifier', 9 | 'sklearn_extra_trees_classifier', 10 | 'sklearn_logistic_regression', 11 | 'sklearn_knn_classifier', 12 | 'sklearn_svm_classifier', 13 | 'sklearn_gaussian_nb', 14 | 'sklearn_adaboost_classifier', 15 | 'xgboost_classifier', 16 | 17 | # Regressors 18 | 'sklearn_random_forest_regressor', 19 | 'sklearn_extra_trees_regressor', 20 | 'sklearn_bagging_regressor', 21 | 'sklearn_GP_regressor', 22 | 'sklearn_ridge_regressor', 23 | 'sklearn_lasso_regressor', 24 | 'sklearn_kernel_ridge_regressor', 25 | 'sklearn_knn_regressor', 26 | 'sklearn_svr_regressor', 27 | 'sklearn_decision_tree_regressor', 28 | 'sklearn_linear_regression', 29 | 'sklearn_adaboost_regressor', 30 | 'xgboost_regressor', 31 | 32 | # Transformers 33 | 'identity_transformer' 34 | ] 35 | 36 | # Classifiers 37 | sklearn_random_forest_classifier = { 38 | 'name': 'scikit-learn Random Forest Classifier', 39 | 'source': learnersource.sklearn_random_forest_classifier_source, 40 | 'meta_feature_generator': 'predict_proba' 41 | } 42 | 43 | sklearn_extra_trees_classifier = { 44 | 'name': 'scikit-learn Extra Trees Classifier', 45 | 'source': learnersource.sklearn_extra_trees_classifier_source, 46 | 'meta_feature_generator': 'predict_proba' 47 | } 48 | 49 | sklearn_logistic_regression = { 50 | 'name': 'scikit-learn Logistic Regression', 51 | 'source': learnersource.sklearn_logistic_regression_source, 52 | 'meta_feature_generator': 'predict_proba' 53 | } 54 | 55 | sklearn_knn_classifier = { 56 | 'name': 'scikit-learn KNN Classifier', 57 | 'source': learnersource.sklearn_knn_classifier_source, 58 | 'meta_feature_generator': 'predict_proba' 59 | } 60 | 61 | sklearn_svm_classifier = { 62 | 'name': 'scikit-learn SVM Classifier', 63 | 'source': learnersource.sklearn_svm_classifier_source, 64 | 'meta_feature_generator': 'decision_function' 65 | } 66 | 67 | sklearn_gaussian_nb = { 68 | 'name': 'scikit-learn Gaussian Naive Bayes', 69 | 'source': learnersource.sklearn_gaussian_nb_source, 70 | 'meta_feature_generator': 'predict_proba' 71 | } 72 | 73 | sklearn_adaboost_classifier = { 74 | 'name': 'scikit-learn AdaBoost Classifier', 75 | 'source': learnersource.sklearn_adaboost_classifier_source, 76 | 'meta_feature_generator': 'predict_proba' 77 | } 78 | 79 | xgboost_classifier = { 80 | 'name': 'XGBoost Classifier', 81 | 'source': learnersource.xgboost_classifier_source, 82 | 'meta_feature_generator': 'predict_proba' 83 | } 84 | 85 | # Regressors 86 | sklearn_random_forest_regressor = { 87 | 'name': 'scikit-learn Random Forest Regressor', 88 | 'source': learnersource.sklearn_random_forest_regressor_source, 89 | 'meta_feature_generator': 'predict' 90 | } 91 | 92 | sklearn_extra_trees_regressor = { 93 | 'name': 'scikit-learn Extra Trees Regressor', 94 | 'source': learnersource.sklearn_extra_trees_regressor_source, 95 | 'meta_feature_generator': 'predict' 96 | } 97 | 98 | sklearn_bagging_regressor = { 99 | 'name': 'scikit-learn Bagging Regressor', 100 | 'source': learnersource.sklearn_bagging_regressor_source, 101 | 'meta_feature_generator': 'predict' 102 | } 103 | 104 | sklearn_GP_regressor = { 105 | 'name': 'scikit-learn Gaussian Process Regressor', 106 | 'source': learnersource.sklearn_GP_regressor_source, 107 | 'meta_feature_generator': 'predict' 108 | } 109 | 110 | sklearn_ridge_regressor = { 111 | 'name': 'scikit-learn Ridge Regressor', 112 | 'source': learnersource.sklearn_ridge_regressor_source, 113 | 'meta_feature_generator': 'predict' 114 | } 115 | 116 | sklearn_lasso_regressor = { 117 | 'name': 'scikit-learn Lasso Regressor', 118 | 'source': learnersource.sklearn_lasso_regressor_source, 119 | 'meta_feature_generator': 'predict' 120 | } 121 | 122 | sklearn_kernel_ridge_regressor = { 123 | 'name': 'scikit-learn Kernel Ridge Regressor', 124 | 'source': learnersource.sklearn_kernel_ridge_regressor_source, 125 | 'meta_feature_generator': 'predict' 126 | } 127 | 128 | sklearn_knn_regressor = { 129 | 'name': 'scikit-learn K-NN Regressor', 130 | 'source': learnersource.sklearn_knn_regressor_source, 131 | 'meta_feature_generator': 'predict' 132 | } 133 | 134 | sklearn_svr_regressor = { 135 | 'name': 'scikit-learn Support Vector Regressor', 136 | 'source': learnersource.sklearn_svr_regressor_source, 137 | 'meta_feature_generator': 'predict' 138 | } 139 | 140 | sklearn_decision_tree_regressor = { 141 | 'name': 'scikit-learn Decision Tree Regressor', 142 | 'source': learnersource.sklearn_decision_tree_regressor_source, 143 | 'meta_feature_generator': 'predict' 144 | } 145 | 146 | sklearn_linear_regression = { 147 | 'name': 'scikit-learn Linear Regression', 148 | 'source': learnersource.sklearn_linear_regression_source, 149 | 'meta_feature_generator': 'predict' 150 | } 151 | 152 | sklearn_adaboost_regressor = { 153 | 'name': 'scikit-learn AdaBoost Regressor', 154 | 'source': learnersource.sklearn_adaboost_regressor_source, 155 | 'meta_feature_generator': 'predict' 156 | } 157 | 158 | xgboost_regressor = { 159 | 'name': 'XGBoost Regressor', 160 | 'source': learnersource.xgboost_regressor_source, 161 | 'meta_feature_generator': 'predict' 162 | } 163 | 164 | 165 | identity_transformer = { 166 | 'name': 'Identity Transformer', 167 | 'source': learnersource.identity_transformer_source, 168 | 'meta_feature_generator': 'transform' 169 | } 170 | -------------------------------------------------------------------------------- /xcessiv/presets/learnersource.py: -------------------------------------------------------------------------------- 1 | """This module contains preset source codes for base learners""" 2 | from __future__ import absolute_import, print_function, division, unicode_literals 3 | 4 | __all__ = [ 5 | 6 | # Classifiers 7 | 'sklearn_random_forest_classifier_source', 8 | 'sklearn_extra_trees_classifier_source', 9 | 'sklearn_logistic_regression_source', 10 | 'sklearn_knn_classifier_source', 11 | 'sklearn_svm_classifier_source', 12 | 'sklearn_gaussian_nb_source', 13 | 'sklearn_adaboost_classifier_source', 14 | 'xgboost_classifier_source', 15 | 16 | # Regressors 17 | 'sklearn_random_forest_regressor_source', 18 | 'sklearn_extra_trees_regressor_source', 19 | 'sklearn_bagging_regressor_source', 20 | 'sklearn_GP_regressor_source', 21 | 'sklearn_ridge_regressor_source', 22 | 'sklearn_lasso_regressor_source', 23 | 'sklearn_kernel_ridge_regressor_source', 24 | 'sklearn_knn_regressor_source', 25 | 'sklearn_svr_regressor_source', 26 | 'sklearn_decision_tree_regressor_source', 27 | 'sklearn_linear_regression_source', 28 | 'sklearn_adaboost_regressor_source', 29 | 'xgboost_regressor_source', 30 | 31 | # Transformers 32 | 'identity_transformer_source' 33 | ] 34 | 35 | sklearn_random_forest_classifier_source = \ 36 | """from sklearn.ensemble import RandomForestClassifier 37 | 38 | base_learner = RandomForestClassifier(random_state=8) 39 | """ 40 | 41 | sklearn_extra_trees_classifier_source = \ 42 | """from sklearn.ensemble import ExtraTreesClassifier 43 | 44 | base_learner = ExtraTreesClassifier(random_state=8) 45 | """ 46 | 47 | sklearn_logistic_regression_source = \ 48 | """from sklearn.linear_model import LogisticRegression 49 | 50 | base_learner = LogisticRegression() 51 | """ 52 | 53 | sklearn_knn_classifier_source = \ 54 | """from sklearn.neighbors import KNeighborsClassifier 55 | 56 | base_learner = KNeighborsClassifier() 57 | """ 58 | 59 | sklearn_svm_classifier_source = \ 60 | """from sklearn.svm import SVC 61 | 62 | base_learner = SVC(random_state=8) 63 | """ 64 | 65 | sklearn_gaussian_nb_source = \ 66 | """from sklearn.naive_bayes import GaussianNB 67 | 68 | base_learner = GaussianNB() 69 | """ 70 | 71 | sklearn_adaboost_classifier_source = \ 72 | """from sklearn.ensemble import AdaBoostClassifier 73 | 74 | base_learner = AdaBoostClassifier(random_state=8) 75 | """ 76 | 77 | xgboost_classifier_source = \ 78 | """from xgboost import XGBClassifier 79 | 80 | base_learner = XGBClassifier(seed=8) 81 | """ 82 | 83 | sklearn_random_forest_regressor_source = \ 84 | """from sklearn.ensemble import RandomForestRegressor 85 | 86 | base_learner = RandomForestRegressor(random_state=8) 87 | """ 88 | 89 | sklearn_extra_trees_regressor_source = \ 90 | """from sklearn.ensemble import ExtraTreesRegressor 91 | 92 | base_learner = ExtraTreesRegressor(random_state=8) 93 | """ 94 | 95 | sklearn_bagging_regressor_source = \ 96 | """from sklearn.ensemble import BaggingRegressor 97 | 98 | base_learner = BaggingRegressor(random_state=8) 99 | """ 100 | 101 | sklearn_GP_regressor_source = \ 102 | """from sklearn.gaussian_process import GaussianProcessRegressor 103 | 104 | base_learner = GaussianProcessRegressor(random_state=8) 105 | """ 106 | 107 | sklearn_ridge_regressor_source = \ 108 | """from sklearn.linear_model import Ridge 109 | 110 | base_learner = Ridge(random_state=8) 111 | """ 112 | 113 | sklearn_lasso_regressor_source = \ 114 | """from sklearn.linear_model import Lasso 115 | 116 | base_learner = Lasso(random_state=8) 117 | """ 118 | 119 | sklearn_kernel_ridge_regressor_source = \ 120 | """from sklearn.kernel_ridge import KernelRidge 121 | 122 | base_learner = KernelRidge() 123 | """ 124 | 125 | sklearn_knn_regressor_source = \ 126 | """from sklearn.neighbors import KNeighborsRegressor 127 | 128 | base_learner = KNeighborsRegressor() 129 | """ 130 | 131 | sklearn_svr_regressor_source = \ 132 | """from sklearn.svm import SVR 133 | 134 | base_learner = SVR() 135 | """ 136 | 137 | sklearn_decision_tree_regressor_source = \ 138 | """from sklearn.tree import DecisionTreeRegressor 139 | 140 | base_learner = DecisionTreeRegressor(random_state=8) 141 | """ 142 | 143 | sklearn_linear_regression_source = \ 144 | """from sklearn.linear_model import LinearRegression 145 | 146 | base_learner = LinearRegression() 147 | """ 148 | 149 | sklearn_adaboost_regressor_source = \ 150 | """from sklearn.ensemble import AdaBoostRegressor 151 | 152 | base_learner = AdaBoostRegressor(random_state=8) 153 | """ 154 | 155 | xgboost_regressor_source = \ 156 | """from xgboost import XGBRegressor 157 | 158 | base_learner = XGBRegressor(seed=8) 159 | """ 160 | 161 | identity_transformer_source = \ 162 | """from sklearn.preprocessing import FunctionTransformer 163 | 164 | base_learner = FunctionTransformer() 165 | """ 166 | -------------------------------------------------------------------------------- /xcessiv/presets/metricsetting.py: -------------------------------------------------------------------------------- 1 | """This module contains preset settings for defining metric generators""" 2 | from __future__ import absolute_import, print_function, division, unicode_literals 3 | 4 | 5 | __all__ = [ 6 | 'accuracy_from_scores', 7 | 'accuracy_from_preds', 8 | 'recall_from_scores', 9 | 'recall_from_preds', 10 | 'precision_from_scores', 11 | 'precision_from_preds', 12 | 'f1_score_from_scores', 13 | 'f1_score_from_preds', 14 | 'mae', 15 | 'mse', 16 | 'median_absolute_error', 17 | 'r2_score', 18 | 'explained_variance_score', 19 | 'roc_auc_score_from_scores' 20 | ] 21 | 22 | 23 | accuracy_from_scores = { 24 | 'name': 'Accuracy', 25 | 'source': 26 | """from sklearn.metrics import accuracy_score 27 | import numpy as np 28 | 29 | def metric_generator(y_true, y_probas): 30 | \"\"\"This function computes the accuracy given the true labels array (y_true) 31 | and the scores/probabilities array (y_probas) with shape (num_samples, num_classes). 32 | For the function to work correctly, the columns of the probabilities array must 33 | correspond to a sorted set of the unique values present in y_true. 34 | \"\"\" 35 | classes_ = np.unique(y_true) 36 | if len(classes_) != y_probas.shape[1]: 37 | raise ValueError('The shape of y_probas does not correspond to the number of unique values in y_true') 38 | argmax = np.argmax(y_probas, axis=1) 39 | y_preds = classes_[argmax] 40 | return accuracy_score(y_true, y_preds) 41 | """, 42 | 'selection_name': 'Accuracy from Scores/Probabilities' 43 | } 44 | 45 | accuracy_from_preds = { 46 | 'name': 'Accuracy', 47 | 'source': 48 | """from sklearn.metrics import accuracy_score 49 | 50 | def metric_generator(y_true, y_preds): 51 | \"\"\"This function computes the accuracy given the true labels array (y_true) 52 | and the predicted labels array (y_preds). 53 | \"\"\" 54 | return accuracy_score(y_true, y_preds) 55 | """, 56 | 'selection_name': 'Accuracy from Predictions' 57 | } 58 | 59 | recall_from_scores = { 60 | 'name': 'Recall', 61 | 'source': 62 | """from sklearn.metrics import recall_score 63 | import numpy as np 64 | 65 | def metric_generator(y_true, y_probas): 66 | \"\"\"This function computes the recall given the true labels array (y_true) 67 | and the scores/probabilities array (y_probas) with shape (num_samples, num_classes). 68 | For the function to work correctly, the columns of the probabilities array must 69 | correspond to a sorted set of the unique values present in y_true. If there are more than 70 | two classes, micro-averaging is used by default. 71 | \"\"\" 72 | classes_ = np.unique(y_true) 73 | if len(classes_) != np.array(y_probas).shape[1]: 74 | raise ValueError('The shape of y_probas does not correspond to the number of unique values in y_true') 75 | argmax = np.argmax(y_probas, axis=1) 76 | y_preds = classes_[argmax] 77 | if np.array(y_probas).shape[1] > 2: 78 | score = recall_score(y_true, y_preds, average='micro') 79 | else: 80 | score = recall_score(y_true, y_preds) 81 | return score 82 | """, 83 | 'selection_name': 'Recall from Scores/Probabilities' 84 | } 85 | 86 | 87 | recall_from_preds = { 88 | 'name': 'Recall', 89 | 'source': 90 | """from sklearn.metrics import recall_score 91 | import numpy as np 92 | 93 | def metric_generator(y_true, y_preds): 94 | \"\"\"This function computes the recall given the true labels array (y_true) 95 | and the predicted labels array (y_preds). 96 | \"\"\" 97 | classes_ = np.unique(y_true) 98 | if len(classes_) > 2: 99 | score = recall_score(y_true, y_preds, average='micro') 100 | else: 101 | score = recall_score(y_true, y_preds) 102 | return score 103 | """, 104 | 'selection_name': 'Recall from Predictions' 105 | } 106 | 107 | precision_from_scores = { 108 | 'name': 'Precision', 109 | 'source': 110 | """from sklearn.metrics import precision_score 111 | import numpy as np 112 | 113 | def metric_generator(y_true, y_probas): 114 | \"\"\"This function computes the precision given the true labels array (y_true) 115 | and the scores/probabilities array (y_probas) with shape (num_samples, num_classes). 116 | For the function to work correctly, the columns of the probabilities array must 117 | correspond to a sorted set of the unique values present in y_true. If there are more than 118 | two classes, micro-averaging is used by default. 119 | \"\"\" 120 | classes_ = np.unique(y_true) 121 | if len(classes_) != np.array(y_probas).shape[1]: 122 | raise ValueError('The shape of y_probas does not correspond to the number of unique values in y_true') 123 | argmax = np.argmax(y_probas, axis=1) 124 | y_preds = classes_[argmax] 125 | if np.array(y_probas).shape[1] > 2: 126 | score = precision_score(y_true, y_preds, average='micro') 127 | else: 128 | score = precision_score(y_true, y_preds) 129 | return score 130 | """, 131 | 'selection_name': 'Precision from Scores/Probabilities' 132 | } 133 | 134 | 135 | precision_from_preds = { 136 | 'name': 'Precision', 137 | 'source': 138 | """from sklearn.metrics import precision_score 139 | import numpy as np 140 | 141 | def metric_generator(y_true, y_preds): 142 | \"\"\"This function computes the precision given the true labels array (y_true) 143 | and the predicted labels array (y_preds). 144 | \"\"\" 145 | classes_ = np.unique(y_true) 146 | if len(classes_) > 2: 147 | score = precision_score(y_true, y_preds, average='micro') 148 | else: 149 | score = precision_score(y_true, y_preds) 150 | return score 151 | """, 152 | 'selection_name': 'Precision from Predictions' 153 | } 154 | 155 | f1_score_from_scores = { 156 | 'name': 'F1 Score', 157 | 'source': 158 | """from sklearn.metrics import f1_score 159 | import numpy as np 160 | 161 | def metric_generator(y_true, y_probas): 162 | \"\"\"This function computes the F1 score given the true labels array (y_true) 163 | and the scores/probabilities array (y_probas) with shape (num_samples, num_classes). 164 | For the function to work correctly, the columns of the probabilities array must 165 | correspond to a sorted set of the unique values present in y_true. If there are more than 166 | two classes, micro-averaging is used by default. 167 | \"\"\" 168 | classes_ = np.unique(y_true) 169 | if len(classes_) != np.array(y_probas).shape[1]: 170 | raise ValueError('The shape of y_probas does not correspond to the number of unique values in y_true') 171 | argmax = np.argmax(y_probas, axis=1) 172 | y_preds = classes_[argmax] 173 | if np.array(y_probas).shape[1] > 2: 174 | score = f1_score(y_true, y_preds, average='micro') 175 | else: 176 | score = f1_score(y_true, y_preds) 177 | return score 178 | """, 179 | 'selection_name': 'F1 Score from Scores/Probabilities' 180 | } 181 | 182 | 183 | f1_score_from_preds = { 184 | 'name': 'F1 Score', 185 | 'source': 186 | """from sklearn.metrics import f1_score 187 | import numpy as np 188 | 189 | def metric_generator(y_true, y_preds): 190 | \"\"\"This function computes the F1 score given the true labels array (y_true) 191 | and the predicted labels array (y_preds). 192 | \"\"\" 193 | classes_ = np.unique(y_true) 194 | if len(classes_) > 2: 195 | score = f1_score(y_true, y_preds, average='micro') 196 | else: 197 | score = f1_score(y_true, y_preds) 198 | return score 199 | """, 200 | 'selection_name': 'F1 Score from Predictions' 201 | } 202 | 203 | mae = { 204 | 'name': 'Mean Absolute Error', 205 | 'source': 206 | """from sklearn.metrics import mean_absolute_error 207 | 208 | metric_generator = mean_absolute_error 209 | """, 210 | 'selection_name': 'Mean Absolute Error' 211 | } 212 | 213 | mse = { 214 | 'name': 'Mean Squared Error', 215 | 'source': 216 | """from sklearn.metrics import mean_squared_error 217 | 218 | metric_generator = mean_squared_error 219 | """, 220 | 'selection_name': 'Mean Squared Error' 221 | } 222 | 223 | median_absolute_error = { 224 | 'name': 'Median Absolute Error', 225 | 'source': 226 | """from sklearn.metrics import median_absolute_error 227 | 228 | metric_generator = median_absolute_error 229 | """, 230 | 'selection_name': 'Median Absolute Error' 231 | } 232 | 233 | r2_score = { 234 | 'name': 'R-Squared Score', 235 | 'source': 236 | """from sklearn.metrics import r2_score 237 | 238 | metric_generator = r2_score 239 | """, 240 | 'selection_name': 'R-Squared Score' 241 | } 242 | 243 | explained_variance_score = { 244 | 'name': 'Explained Variance Score', 245 | 'source': 246 | """from sklearn.metrics import explained_variance_score 247 | 248 | metric_generator = explained_variance_score 249 | """, 250 | 'selection_name': 'Explained Variance Score' 251 | } 252 | 253 | roc_auc_score_from_scores = { 254 | 'name': 'ROC AUC Score', 255 | 'source': 256 | """from sklearn.metrics import roc_auc_score 257 | from sklearn.preprocessing import label_binarize 258 | import numpy as np 259 | 260 | def metric_generator(y_true, y_probas): 261 | \"\"\"This function computes the Area under Curve of the 262 | Receiver Operating Characteristic given the true labels array (y_true) 263 | and the scores/probabilities array (y_probas). In a non-binary classification 264 | task, this will calculate a weighted mean of the AUC for each class. This 265 | behavior can be changed by passing a different parameter to the 266 | `average` argument. 267 | \"\"\" 268 | classes_ = np.unique(y_true) 269 | if len(classes_) != np.array(y_probas).shape[1]: 270 | raise ValueError('The shape of y_probas does not correspond to the number of unique values in y_true') 271 | binarized = label_binarize(y_true, classes_) 272 | if len(classes_) == 2: 273 | binarized = binarized.ravel() 274 | y_probas = y_probas[:, 1] 275 | return roc_auc_score(binarized, y_probas, average='weighted') 276 | """, 277 | 'selection_name': 'ROC AUC Score from Scores/Probabilities' 278 | } 279 | -------------------------------------------------------------------------------- /xcessiv/presets/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/xcessiv/presets/tests/__init__.py -------------------------------------------------------------------------------- /xcessiv/presets/tests/test_cvsetting.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import unittest 3 | from sklearn.datasets import load_iris 4 | from xcessiv import functions 5 | from xcessiv.presets import cvsetting 6 | 7 | 8 | class TestKFold(unittest.TestCase): 9 | def setUp(self): 10 | self.X, self.y = load_iris(return_X_y=True) 11 | 12 | def test_k_fold_source(self): 13 | module = functions.import_string_code_as_module(cvsetting.k_fold['source']) 14 | assert hasattr(module, 'return_splits_iterable') 15 | 16 | list(module.return_splits_iterable(self.X, self.y)) 17 | 18 | del module 19 | 20 | 21 | class TestStratifiedKFold(unittest.TestCase): 22 | def setUp(self): 23 | self.X, self.y = load_iris(return_X_y=True) 24 | 25 | def test_source(self): 26 | module = functions.import_string_code_as_module(cvsetting.stratified_k_fold['source']) 27 | assert hasattr(module, 'return_splits_iterable') 28 | 29 | list(module.return_splits_iterable(self.X, self.y)) 30 | 31 | del module 32 | 33 | 34 | class TestShuffleSplit(unittest.TestCase): 35 | def setUp(self): 36 | self.X, self.y = load_iris(return_X_y=True) 37 | 38 | def test_source(self): 39 | module = functions.import_string_code_as_module(cvsetting.shuffle_split['source']) 40 | assert hasattr(module, 'return_splits_iterable') 41 | 42 | list(module.return_splits_iterable(self.X, self.y)) 43 | 44 | del module 45 | 46 | 47 | class TestStratifiedShuffleSplit(unittest.TestCase): 48 | def setUp(self): 49 | self.X, self.y = load_iris(return_X_y=True) 50 | 51 | def test_source(self): 52 | module = functions.import_string_code_as_module(cvsetting.stratified_shuffle_split['source']) 53 | assert hasattr(module, 'return_splits_iterable') 54 | 55 | list(module.return_splits_iterable(self.X, self.y)) 56 | 57 | del module 58 | 59 | 60 | class TestLeaveOneOut(unittest.TestCase): 61 | def setUp(self): 62 | self.X, self.y = load_iris(return_X_y=True) 63 | 64 | def test_source(self): 65 | module = functions.import_string_code_as_module(cvsetting.leave_one_out['source']) 66 | assert hasattr(module, 'return_splits_iterable') 67 | 68 | list(module.return_splits_iterable(self.X, self.y)) 69 | 70 | del module 71 | 72 | 73 | class TestLeavePOut(unittest.TestCase): 74 | def setUp(self): 75 | self.X, self.y = load_iris(return_X_y=True) 76 | 77 | def test_source(self): 78 | module = functions.import_string_code_as_module(cvsetting.leave_p_out['source']) 79 | assert hasattr(module, 'return_splits_iterable') 80 | 81 | list(module.return_splits_iterable(self.X, self.y)) 82 | 83 | del module 84 | 85 | 86 | class TestGroupKFold(unittest.TestCase): 87 | def setUp(self): 88 | self.X, self.y = load_iris(return_X_y=True) 89 | 90 | def test_source(self): 91 | module = functions.import_string_code_as_module(cvsetting.group_k_fold['source']) 92 | assert hasattr(module, 'return_splits_iterable') 93 | 94 | generator = module.return_splits_iterable(self.X, self.y) 95 | self.assertRaises( 96 | ValueError, 97 | list, 98 | generator 99 | ) 100 | 101 | del module 102 | 103 | 104 | class TestTimeSeriesSplit(unittest.TestCase): 105 | def setUp(self): 106 | self.X, self.y = load_iris(return_X_y=True) 107 | 108 | def test_source(self): 109 | module = functions.import_string_code_as_module(cvsetting.leave_one_out['source']) 110 | assert hasattr(module, 'return_splits_iterable') 111 | 112 | list(module.return_splits_iterable(self.X, self.y)) 113 | 114 | del module 115 | -------------------------------------------------------------------------------- /xcessiv/presets/tests/test_learnersetting.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import unittest 3 | from sklearn.datasets import load_iris, load_boston 4 | from xcessiv import functions 5 | from xcessiv.presets import learnersetting 6 | 7 | 8 | class TestClassifiers(unittest.TestCase): 9 | def setUp(self): 10 | self.X, self.y = load_iris(return_X_y=True) 11 | self.classifier_settings = [ 12 | 'sklearn_random_forest_classifier', 13 | 'sklearn_extra_trees_classifier', 14 | 'sklearn_logistic_regression', 15 | 'sklearn_knn_classifier', 16 | 'sklearn_svm_classifier', 17 | 'sklearn_gaussian_nb', 18 | 'sklearn_adaboost_classifier', 19 | 'xgboost_classifier', 20 | ] 21 | 22 | def test_learner_settings(self): 23 | for key in self.classifier_settings: 24 | setting = getattr(learnersetting, key) 25 | module = functions.import_string_code_as_module( 26 | setting['source'] 27 | ) 28 | 29 | assert hasattr(module.base_learner, 'get_params') 30 | assert hasattr(module.base_learner, 'set_params') 31 | assert hasattr(module.base_learner, 'fit') 32 | assert hasattr(module.base_learner, setting['meta_feature_generator']) 33 | 34 | module.base_learner.fit(self.X, self.y) 35 | 36 | del module 37 | 38 | 39 | class TestRegressors(unittest.TestCase): 40 | def setUp(self): 41 | self.X, self.y = load_boston(return_X_y=True) 42 | self.regressor_settings = [ 43 | 'sklearn_random_forest_regressor', 44 | 'sklearn_extra_trees_regressor', 45 | 'sklearn_bagging_regressor', 46 | 'sklearn_GP_regressor', 47 | 'sklearn_ridge_regressor', 48 | 'sklearn_lasso_regressor', 49 | 'sklearn_kernel_ridge_regressor', 50 | 'sklearn_knn_regressor', 51 | 'sklearn_svr_regressor', 52 | 'sklearn_decision_tree_regressor', 53 | 'sklearn_linear_regression', 54 | 'sklearn_adaboost_regressor', 55 | 'xgboost_regressor', 56 | ] 57 | 58 | def test_learner_settings(self): 59 | for key in self.regressor_settings: 60 | setting = getattr(learnersetting, key) 61 | module = functions.import_string_code_as_module( 62 | setting['source'] 63 | ) 64 | 65 | assert hasattr(module.base_learner, 'get_params') 66 | assert hasattr(module.base_learner, 'set_params') 67 | assert hasattr(module.base_learner, 'fit') 68 | assert hasattr(module.base_learner, setting['meta_feature_generator']) 69 | 70 | module.base_learner.fit(self.X, self.y) 71 | 72 | del module 73 | 74 | 75 | class TestTransformers(unittest.TestCase): 76 | def setUp(self): 77 | self.X, self.y = load_boston(return_X_y=True) 78 | self.transformer_settings = [ 79 | 'identity_transformer' 80 | ] 81 | 82 | def test_learner_settings(self): 83 | for key in self.transformer_settings: 84 | setting = getattr(learnersetting, key) 85 | module = functions.import_string_code_as_module( 86 | setting['source'] 87 | ) 88 | 89 | assert hasattr(module.base_learner, 'get_params') 90 | assert hasattr(module.base_learner, 'set_params') 91 | assert hasattr(module.base_learner, 'fit') 92 | assert hasattr(module.base_learner, setting['meta_feature_generator']) 93 | 94 | module.base_learner.fit(self.X, self.y) 95 | 96 | del module 97 | -------------------------------------------------------------------------------- /xcessiv/presets/tests/test_metricsetting.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import unittest 3 | import numpy as np 4 | from sklearn.datasets import load_iris, load_breast_cancer, load_boston 5 | from sklearn.linear_model import LogisticRegression, LinearRegression 6 | from sklearn.model_selection import cross_val_predict 7 | from xcessiv import functions 8 | from xcessiv.presets import metricsetting 9 | 10 | 11 | clf = LogisticRegression(random_state=8) 12 | 13 | multiclass_X, multiclass_y = load_iris(return_X_y=True) 14 | multiclass_preds = cross_val_predict(clf, multiclass_X, multiclass_y, method='predict') 15 | multiclass_probas = cross_val_predict(clf, multiclass_X, multiclass_y, method='predict_proba') 16 | 17 | binary_X, binary_y = load_breast_cancer(return_X_y=True) 18 | binary_preds = cross_val_predict(clf, binary_X, binary_y, method='predict') 19 | binary_probas = cross_val_predict(clf, binary_X, binary_y, method='predict_proba') 20 | 21 | regression_X, regression_y = load_boston(return_X_y=True) 22 | reg = LinearRegression() 23 | regression_preds = cross_val_predict(reg, regression_X, regression_y, method='predict') 24 | 25 | 26 | class TestAccuracyFromScores(unittest.TestCase): 27 | def test_source(self): 28 | module = functions.import_string_code_as_module(metricsetting.accuracy_from_scores['source']) 29 | 30 | assert np.round(module.metric_generator(binary_y, binary_probas), 2) == 0.95 31 | assert np.round(module.metric_generator(multiclass_y, multiclass_probas), 2) == 0.95 32 | 33 | del module 34 | 35 | 36 | class TestAccuracyFromPreds(unittest.TestCase): 37 | def test_source(self): 38 | module = functions.import_string_code_as_module(metricsetting.accuracy_from_preds['source']) 39 | 40 | assert np.round(module.metric_generator(binary_y, binary_preds), 2) == 0.95 41 | assert np.round(module.metric_generator(multiclass_y, multiclass_preds), 2) == 0.95 42 | 43 | del module 44 | 45 | 46 | class TestRecallFromScores(unittest.TestCase): 47 | def test_source(self): 48 | module = functions.import_string_code_as_module(metricsetting.recall_from_scores['source']) 49 | 50 | assert np.round(module.metric_generator(binary_y, binary_probas), 2) == 0.97 51 | assert np.round(module.metric_generator(multiclass_y, multiclass_probas), 2) == 0.95 52 | 53 | del module 54 | 55 | 56 | class TestRecallFromPreds(unittest.TestCase): 57 | def test_source(self): 58 | module = functions.import_string_code_as_module(metricsetting.recall_from_preds['source']) 59 | 60 | assert np.round(module.metric_generator(binary_y, binary_preds), 2) == 0.97 61 | assert np.round(module.metric_generator(multiclass_y, multiclass_preds), 2) == 0.95 62 | 63 | del module 64 | 65 | 66 | class TestPrecisionFromScores(unittest.TestCase): 67 | def test_source(self): 68 | module = functions.import_string_code_as_module(metricsetting.precision_from_scores['source']) 69 | 70 | assert np.round(module.metric_generator(binary_y, binary_probas), 2) == 0.95 71 | assert np.round(module.metric_generator(multiclass_y, multiclass_probas), 2) == 0.95 72 | 73 | del module 74 | 75 | 76 | class TestPrecisionFromPreds(unittest.TestCase): 77 | def test_source(self): 78 | module = functions.import_string_code_as_module(metricsetting.precision_from_preds['source']) 79 | 80 | assert np.round(module.metric_generator(binary_y, binary_preds), 2) == 0.95 81 | assert np.round(module.metric_generator(multiclass_y, multiclass_preds), 2) == 0.95 82 | 83 | del module 84 | 85 | 86 | class TestF1ScoreFromScores(unittest.TestCase): 87 | def test_source(self): 88 | module = functions.import_string_code_as_module(metricsetting.f1_score_from_scores['source']) 89 | 90 | assert np.round(module.metric_generator(binary_y, binary_probas), 2) == 0.96 91 | assert np.round(module.metric_generator(multiclass_y, multiclass_probas), 2) == 0.95 92 | 93 | del module 94 | 95 | 96 | class TestF1ScoreFromPreds(unittest.TestCase): 97 | def test_source(self): 98 | module = functions.import_string_code_as_module(metricsetting.f1_score_from_preds['source']) 99 | 100 | assert np.round(module.metric_generator(binary_y, binary_preds), 2) == 0.96 101 | assert np.round(module.metric_generator(multiclass_y, multiclass_preds), 2) == 0.95 102 | 103 | del module 104 | 105 | 106 | class TestROCAUCFromScores(unittest.TestCase): 107 | def test_source(self): 108 | module = functions.import_string_code_as_module( 109 | metricsetting.roc_auc_score_from_scores['source'] 110 | ) 111 | 112 | assert np.round(module.metric_generator(binary_y, binary_probas), 2) == 0.99 113 | assert np.round(module.metric_generator(multiclass_y, multiclass_probas), 2) == 0.99 114 | 115 | del module 116 | 117 | 118 | class TestMAE(unittest.TestCase): 119 | def test_source(self): 120 | module = functions.import_string_code_as_module(metricsetting.mae['source']) 121 | 122 | assert np.round(module.metric_generator(regression_y, regression_preds), 2) == 6.99 123 | 124 | del module 125 | 126 | 127 | class TestMSE(unittest.TestCase): 128 | def test_source(self): 129 | module = functions.import_string_code_as_module(metricsetting.mse['source']) 130 | 131 | assert np.round(module.metric_generator(regression_y, regression_preds), 2) == 168.09 132 | 133 | del module 134 | 135 | 136 | class TestMedianAbsoluteError(unittest.TestCase): 137 | def test_source(self): 138 | module = functions.import_string_code_as_module(metricsetting.median_absolute_error['source']) 139 | 140 | assert np.round(module.metric_generator(regression_y, regression_preds), 2) == 3.72 141 | 142 | del module 143 | 144 | 145 | class TestR2Score(unittest.TestCase): 146 | def test_source(self): 147 | module = functions.import_string_code_as_module(metricsetting.r2_score['source']) 148 | 149 | assert np.round(module.metric_generator(regression_y, regression_preds), 2) == -0.99 150 | 151 | del module 152 | 153 | 154 | class TestExplainedVarianceScore(unittest.TestCase): 155 | def test_source(self): 156 | module = functions.import_string_code_as_module(metricsetting.explained_variance_score['source']) 157 | 158 | assert np.round(module.metric_generator(regression_y, regression_preds), 2) == -0.89 159 | 160 | del module 161 | -------------------------------------------------------------------------------- /xcessiv/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/xcessiv/scripts/__init__.py -------------------------------------------------------------------------------- /xcessiv/scripts/runapp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import os 3 | import sys 4 | import argparse 5 | from multiprocessing import Process, Pipe 6 | from redis import Redis 7 | from xcessiv.server import launch 8 | from xcessiv.scripts.runworker import runworker 9 | from xcessiv import app 10 | from six import iteritems 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser(description='Launch Xcessiv server and workers') 15 | parser.add_argument('-w', '--worker', help='Define number of workers', type=int) 16 | parser.add_argument('-p', '--port', help='Port number to be used by web server', 17 | type=int) 18 | parser.add_argument('-H', '--host', help='Redis host') 19 | parser.add_argument('-P', '--redisport', help='Redis port', type=int) 20 | parser.add_argument('-D', '--redisdb', help='Redis database number', type=int) 21 | args = parser.parse_args() 22 | 23 | # Check if Windows 24 | if os.name == 'nt': 25 | raise OSError('Xcessiv has detected that you are using Windows. ' 26 | 'Unfortunately, Xcessiv does not currently support Windows. ' 27 | 'The accepted workaround for this is to use Docker to run ' 28 | 'Xcessiv instead. Please check the Xcessiv documentation for ' 29 | 'more details.') 30 | 31 | # Overwrite configuration from configuration file 32 | default_config_path = os.path.join(os.path.expanduser('~'), '.xcessiv/config.py') 33 | if os.path.exists(default_config_path): 34 | print('Config file found at ' + default_config_path) 35 | app.config.from_pyfile(default_config_path) 36 | 37 | # Overwrite configuration from command line arguments 38 | cli_config = { 39 | 'REDIS_HOST': args.host, 40 | 'REDIS_PORT': args.redisport, 41 | 'REDIS_DB': args.redisdb, 42 | 'XCESSIV_PORT': args.port, 43 | 'NUM_WORKERS': args.worker 44 | } 45 | cli_config = dict((key, value) for key, value in iteritems(cli_config) if value is not None) 46 | app.config.update(**cli_config) 47 | 48 | redis_conn = (Redis(app.config['REDIS_HOST'], 49 | app.config['REDIS_PORT'], 50 | app.config['REDIS_DB'])) 51 | redis_conn.get(None) # will throw exception if Redis is unavailable 52 | 53 | cwd = os.getcwd() 54 | print(cwd) 55 | 56 | processes = [] 57 | try: 58 | server_proc = Process(target=launch, args=(app,)) 59 | server_proc.start() 60 | 61 | for i in range(app.config['NUM_WORKERS']): 62 | p = Process(target=runworker, args=(app,)) 63 | processes.append(p) 64 | p.start() 65 | 66 | server_proc.join() 67 | finally: 68 | for proc in processes: 69 | proc.terminate() 70 | proc.join() 71 | server_proc.terminate() 72 | server_proc.join() 73 | 74 | 75 | if __name__ == '__main__': 76 | main() 77 | -------------------------------------------------------------------------------- /xcessiv/scripts/runserver.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | from xcessiv.server import launch 3 | 4 | 5 | if __name__ == '__main__': 6 | launch() 7 | -------------------------------------------------------------------------------- /xcessiv/scripts/runworker.py: -------------------------------------------------------------------------------- 1 | from rq import Connection, Worker 2 | from redis import Redis 3 | 4 | 5 | def runworker(app): 6 | 7 | REDIS_HOST = app.config['REDIS_HOST'] 8 | REDIS_PORT = app.config['REDIS_PORT'] 9 | REDIS_DB = app.config['REDIS_DB'] 10 | QUEUES = app.config['QUEUES'] 11 | 12 | redis_conn = Connection(Redis(REDIS_HOST, 13 | REDIS_PORT, 14 | REDIS_DB)) 15 | with redis_conn: 16 | w = Worker(QUEUES) 17 | w.work() 18 | -------------------------------------------------------------------------------- /xcessiv/server.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | from gevent.wsgi import WSGIServer 3 | import webbrowser 4 | 5 | 6 | def launch(app): 7 | http_server = WSGIServer(('', app.config['XCESSIV_PORT']), app) 8 | webbrowser.open_new('http://localhost:' + str(app.config['XCESSIV_PORT'])) 9 | http_server.serve_forever() 10 | -------------------------------------------------------------------------------- /xcessiv/stacker.py: -------------------------------------------------------------------------------- 1 | import sklearn 2 | if sklearn.__version__.startswith('0.18'): 3 | from sklearn.pipeline import _BasePipeline as bp 4 | else: 5 | from sklearn.utils.metaestimators import _BaseComposition as bp 6 | import numpy as np 7 | 8 | 9 | class XcessivStackedEnsemble(bp): 10 | """Contains the class for the Xcessiv stacked ensemble""" 11 | def __init__(self, base_learners, meta_feature_generators, 12 | secondary_learner, cv_function): 13 | super(XcessivStackedEnsemble, self).__init__() 14 | 15 | self.base_learners = base_learners 16 | self.meta_feature_generators = meta_feature_generators 17 | self.secondary_learner = secondary_learner 18 | self.cv_function = cv_function 19 | self._named_learners = [('bl{}'.format(idx), base_learner) for idx, base_learner 20 | in enumerate(base_learners)] 21 | self._named_learners.append(('secondary-learner', secondary_learner)) 22 | 23 | def get_params(self, deep=True): 24 | """Get parameters for this estimator. 25 | 26 | Args: 27 | 28 | deep (boolean, optional): If True, will return the parameters for this estimator and 29 | contained subobjects that are estimators. 30 | 31 | Returns 32 | params: mapping of string to any Parameter names mapped to their values. 33 | """ 34 | return self._get_params('_named_learners', deep=deep) 35 | 36 | def set_params(self, **params): 37 | """Set the parameters of this estimator.""" 38 | self._set_params('_named_learners', **params) 39 | return self 40 | 41 | def fit(self, X, y): 42 | print('Fitting {} base learners'.format(len(self.base_learners))) 43 | 44 | all_learner_meta_features = [] 45 | for idx, base_learner in enumerate(self.base_learners): 46 | 47 | single_learner_meta_features = [] 48 | test_indices = [] 49 | for num, (train_idx, test_idx) in enumerate(self.cv_function(X, y)): 50 | print('Fold {} of base learner {}'.format(num+1, idx+1)) 51 | 52 | base_learner.fit(X[train_idx], y[train_idx]) 53 | 54 | preds = getattr(base_learner, self.meta_feature_generators[idx])(X[test_idx]) 55 | 56 | if len(preds.shape) == 1: 57 | preds = preds.reshape(-1, 1) 58 | 59 | single_learner_meta_features.append( 60 | preds 61 | ) 62 | 63 | test_indices.append(test_idx) 64 | 65 | single_learner_meta_features = np.concatenate(single_learner_meta_features) 66 | all_learner_meta_features.append(single_learner_meta_features) 67 | 68 | all_learner_meta_features = np.concatenate(all_learner_meta_features, axis=1) 69 | test_indices = np.concatenate(test_indices) # reorganized order due to CV 70 | 71 | print('Fitting meta-learner') 72 | 73 | self.secondary_learner.fit(all_learner_meta_features, y[test_indices]) 74 | 75 | return self 76 | 77 | def _process_using_meta_feature_generator(self, X, meta_feature_generator): 78 | """Process using secondary learner meta-feature generator 79 | 80 | Since secondary learner meta-feature generator can be anything e.g. predict, predict_proba, 81 | this internal method gives the ability to use any string. Just make sure secondary learner 82 | has the method. 83 | 84 | Args: 85 | X (array-like): Features array 86 | 87 | meta_feature_generator (str, unicode): Method for use by secondary learner 88 | """ 89 | 90 | all_learner_meta_features = [] 91 | for idx, base_learner in enumerate(self.base_learners): 92 | single_learner_meta_features = getattr(base_learner, 93 | self.meta_feature_generators[idx])(X) 94 | 95 | if len(single_learner_meta_features.shape) == 1: 96 | single_learner_meta_features = single_learner_meta_features.reshape(-1, 1) 97 | all_learner_meta_features.append(single_learner_meta_features) 98 | 99 | all_learner_meta_features = np.concatenate(all_learner_meta_features, axis=1) 100 | 101 | out = getattr(self.secondary_learner, meta_feature_generator)(all_learner_meta_features) 102 | 103 | return out 104 | -------------------------------------------------------------------------------- /xcessiv/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/xcessiv/tests/__init__.py -------------------------------------------------------------------------------- /xcessiv/tests/extractmaindataset.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_digits 2 | 3 | 4 | def extract_main_dataset(): 5 | X, y = load_digits(return_X_y=True) 6 | return X, y 7 | 8 | dummy_variable = 2 9 | -------------------------------------------------------------------------------- /xcessiv/tests/myrf.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import RandomForestClassifier 2 | import joblib 3 | 4 | 5 | class MyClassifier(RandomForestClassifier): 6 | def save(self, filepath): 7 | joblib.dump(self, filepath, 3) 8 | 9 | @staticmethod 10 | def load(filepath): 11 | return joblib.load(filepath) 12 | -------------------------------------------------------------------------------- /xcessiv/tests/test_functions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import unittest 3 | import os 4 | import numpy as np 5 | from xcessiv import functions, exceptions 6 | from sklearn.datasets import load_digits 7 | from sklearn.ensemble import RandomForestClassifier 8 | from sklearn.decomposition import PCA 9 | from sklearn.pipeline import Pipeline 10 | import pickle 11 | 12 | 13 | filepath = os.path.join(os.path.dirname(__file__), 14 | 'extractmaindataset.py') 15 | 16 | 17 | class TestHashFile(unittest.TestCase): 18 | def test_hash_file(self): 19 | assert functions.hash_file(filepath) == "1c67f8f573b69a9da2f986e1006ff63a" \ 20 | "10fbb70298af45d0293e490b65b34edc" 21 | 22 | assert functions.hash_file(filepath) == functions.hash_file(filepath, 2) 23 | 24 | 25 | class TestImportObjectFromPath(unittest.TestCase): 26 | def test_import_object_from_path(self): 27 | returned_object = functions.import_object_from_path(filepath, 28 | "extract_main_dataset") 29 | assert callable(returned_object) 30 | 31 | pickle.loads(pickle.dumps(returned_object)) # make sure pickle works 32 | 33 | 34 | class TestImportObjectFromStringCode(unittest.TestCase): 35 | def test_import_object_from_string_code(self): 36 | with open(filepath) as f: 37 | returned_object = functions.\ 38 | import_object_from_string_code(f.read(), "extract_main_dataset") 39 | 40 | assert callable(returned_object) 41 | 42 | pickle.loads(pickle.dumps(returned_object)) # make sure pickle works 43 | 44 | 45 | class TestImportStringCodeAsModule(unittest.TestCase): 46 | def test_import_string_code_as_module(self): 47 | with open(filepath) as f: 48 | module = functions.\ 49 | import_string_code_as_module(f.read()) 50 | 51 | assert callable(module.extract_main_dataset) 52 | assert module.dummy_variable == 2 53 | 54 | pickle.loads(pickle.dumps(module.extract_main_dataset)) # make sure pickle works 55 | 56 | 57 | class TestVerifyDataset(unittest.TestCase): 58 | def test_correct_dataset(self): 59 | X, y = load_digits(return_X_y=True) 60 | verification_dict = functions.verify_dataset(X, y) 61 | assert verification_dict['features_shape'] == (1797,64) 62 | assert verification_dict['labels_shape'] == (1797,) 63 | 64 | def test_invalid_assertions(self): 65 | self.assertRaises(exceptions.UserError, 66 | functions.verify_dataset, 67 | [[1, 2, 2], [2, 3, 5]], 68 | [1, 2, 3]) 69 | 70 | self.assertRaises(exceptions.UserError, 71 | functions.verify_dataset, 72 | [[1, 2, 2], [2, 3, 5]], 73 | [[1, 2, 3]]) 74 | 75 | self.assertRaises(exceptions.UserError, 76 | functions.verify_dataset, 77 | [[[1, 2, 2]], [[2, 3, 5]]], 78 | [1, 2, 3]) 79 | 80 | 81 | class TestIsValidJSON(unittest.TestCase): 82 | def test_is_valid_json(self): 83 | assert functions.is_valid_json({'x': ['i am serializable', 0.1]}) 84 | assert not functions.is_valid_json({'x': RandomForestClassifier()}) 85 | 86 | 87 | class TestMakeSerializable(unittest.TestCase): 88 | def test_make_serializable(self): 89 | assert functions.is_valid_json({'x': ['i am serializable', 0.1]}) 90 | assert not functions.is_valid_json({'x': RandomForestClassifier()}) 91 | assert functions.make_serializable( 92 | { 93 | 'x': ['i am serializable', 0.1], 94 | 'y': RandomForestClassifier() 95 | } 96 | ) == {'x': ['i am serializable', 0.1]} 97 | 98 | 99 | class GetSampleDataset(unittest.TestCase): 100 | def setUp(self): 101 | self.dataset_properties = { 102 | 'type': 'multiclass', 103 | } 104 | 105 | def test_classification_dataset(self): 106 | X, y, split = functions.get_sample_dataset(self.dataset_properties) 107 | assert X.shape == (100, 20) 108 | assert y.shape == (100,) 109 | assert len(np.unique(y)) == 2 110 | 111 | self.dataset_properties['n_classes'] = 4 112 | self.dataset_properties['n_informative'] = 18 113 | X, y, split = functions.get_sample_dataset(self.dataset_properties) 114 | assert X.shape == (100, 20) 115 | assert y.shape == (100,) 116 | assert len(np.unique(y)) == 4 117 | 118 | self.dataset_properties['n_features'] = 100 119 | X, y, split = functions.get_sample_dataset(self.dataset_properties) 120 | assert X.shape == (100, 100) 121 | assert y.shape == (100,) 122 | assert len(np.unique(y)) == 4 123 | 124 | self.dataset_properties['n_samples'] = 24 125 | X, y, split = functions.get_sample_dataset(self.dataset_properties) 126 | assert X.shape == (24, 100) 127 | assert y.shape == (24,) 128 | assert len(np.unique(y)) == 4 129 | 130 | def test_iris_dataset(self): 131 | X, y, split = functions.get_sample_dataset({'type': 'iris'}) 132 | assert X.shape == (150, 4) 133 | assert y.shape == (150,) 134 | 135 | def test_mnist_dataset(self): 136 | X, y, split = functions.get_sample_dataset({'type': 'mnist'}) 137 | assert X.shape == (1797, 64) 138 | assert y.shape == (1797,) 139 | 140 | def test_breast_cancer_dataset(self): 141 | X, y, split = functions.get_sample_dataset({'type': 'breast_cancer'}) 142 | assert X.shape == (569, 30) 143 | assert y.shape == (569,) 144 | 145 | def test_boston_housing(self): 146 | X, y, split = functions.get_sample_dataset({'type': 'boston'}) 147 | assert X.shape == (506, 13) 148 | assert y.shape == (506,) 149 | 150 | def test_diabetes(self): 151 | X, y, split = functions.get_sample_dataset({'type': 'diabetes'}) 152 | assert X.shape == (442, 10) 153 | assert y.shape == (442,) 154 | 155 | 156 | class TestVerifyEstimatorClass(unittest.TestCase): 157 | def setUp(self): 158 | self.source = ''.join([ 159 | "from sklearn.metrics import accuracy_score\n", 160 | "import numpy as np\n", 161 | "def metric_generator(y_true, y_probas):\n", 162 | " argmax = np.argmax(y_probas, axis=1)\n", 163 | " return accuracy_score(y_true, argmax)" 164 | ]) 165 | self.wrong_source = "metric_generator = ''" 166 | self.dataset_properties = { 167 | 'type': 'multiclass', 168 | } 169 | 170 | def test_verify_estimator_class(self): 171 | np.random.seed(8) 172 | performance_dict, hyperparameters = functions.verify_estimator_class( 173 | RandomForestClassifier(), 174 | 'predict_proba', 175 | dict(Accuracy=self.source), 176 | self.dataset_properties 177 | ) 178 | assert round(performance_dict['Accuracy'], 3) == 0.8 179 | assert hyperparameters == { 180 | 'warm_start': False, 181 | 'oob_score': False, 182 | 'n_jobs': 1, 183 | 'verbose': 0, 184 | 'max_leaf_nodes': None, 185 | 'bootstrap': True, 186 | 'min_samples_leaf': 1, 187 | 'n_estimators': 10, 188 | 'min_samples_split': 2, 189 | 'min_weight_fraction_leaf': 0.0, 190 | 'criterion': 'gini', 191 | 'random_state': None, 192 | 'min_impurity_split': None, 193 | 'min_impurity_decrease': 0.0, 194 | 'max_features': 'auto', 195 | 'max_depth': None, 196 | 'class_weight': None 197 | } 198 | 199 | def test_non_serializable_parameters(self): 200 | pipeline = Pipeline([('pca', PCA()), ('rf', RandomForestClassifier())]) 201 | performance_dict, hyperparameters = functions.verify_estimator_class( 202 | pipeline, 203 | 'predict_proba', 204 | dict(Accuracy=self.source), 205 | self.dataset_properties 206 | ) 207 | assert functions.is_valid_json(hyperparameters) 208 | 209 | def test_assertion_of_invalid_metric_generator(self): 210 | np.random.seed(8) 211 | self.assertRaises( 212 | exceptions.UserError, 213 | functions.verify_estimator_class, 214 | RandomForestClassifier(), 215 | 'predict_proba', 216 | dict(Accuracy=self.wrong_source), 217 | self.dataset_properties 218 | ) 219 | 220 | def test_assertion_meta_feature_generator(self): 221 | np.random.seed(8) 222 | self.assertRaises( 223 | exceptions.UserError, 224 | functions.verify_estimator_class, 225 | RandomForestClassifier(), 226 | 'decision_function', 227 | dict(Accuracy=self.source), 228 | self.dataset_properties 229 | ) 230 | -------------------------------------------------------------------------------- /xcessiv/tests/test_models.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import unittest 3 | from xcessiv import models 4 | from sklearn.ensemble import RandomForestClassifier 5 | 6 | 7 | class TestReturnTrainDataFromJSON(unittest.TestCase): 8 | def setUp(self): 9 | self.extraction = models.Extraction() 10 | self.extraction.main_dataset['source'] = ''.join([ 11 | "from sklearn.datasets import load_digits\n", 12 | "\n", 13 | "\n", 14 | "def extract_main_dataset():\n", 15 | " X, y = load_digits(return_X_y=True)\n", 16 | " return X, y" 17 | ]) 18 | 19 | def test_main_is_train(self): 20 | X, y = self.extraction.return_train_dataset() 21 | assert X.shape == (1797, 64) 22 | assert y.shape == (1797,) 23 | 24 | def test_split_main_for_test(self): 25 | self.extraction.test_dataset['method'] = 'split_from_main' 26 | self.extraction.test_dataset['split_ratio'] = 0.1 27 | self.extraction.test_dataset['split_seed'] = 8 28 | X, y = self.extraction.return_train_dataset() 29 | assert X.shape == (1617, 64) 30 | assert y.shape == (1617,) 31 | 32 | 33 | class TestReturnTestDataFromJSON(unittest.TestCase): 34 | def setUp(self): 35 | self.extraction = models.Extraction() 36 | self.extraction.main_dataset['source'] = ''.join([ 37 | "from sklearn.datasets import load_digits\n", 38 | "\n", 39 | "\n", 40 | "def extract_main_dataset():\n", 41 | " X, y = load_digits(return_X_y=True)\n", 42 | " return X, y" 43 | ]) 44 | self.extraction.test_dataset['method'] = 'split_from_main' 45 | self.extraction.test_dataset['split_ratio'] = 0.1 46 | self.extraction.test_dataset['split_seed'] = 8 47 | 48 | def test_split_main_for_test(self): 49 | X, y = self.extraction.return_test_dataset() 50 | assert X.shape == (180, 64) 51 | assert y.shape == (180,) 52 | 53 | def test_test_dataset_from_source(self): 54 | self.extraction.test_dataset["method"] = "source" 55 | self.extraction.test_dataset["source"] = ''.join([ 56 | "from sklearn.datasets import load_digits\n", 57 | "def extract_test_dataset():\n", 58 | " X, y = load_digits(return_X_y=True)\n", 59 | " return X, y" 60 | ]) 61 | X, y = self.extraction.return_test_dataset() 62 | assert X.shape == (1797, 64) 63 | assert y.shape == (1797,) 64 | 65 | 66 | class TestReturnEstimator(unittest.TestCase): 67 | def setUp(self): 68 | self.base_learner_origin = models.BaseLearnerOrigin( 69 | source=''.join([ 70 | "from sklearn.ensemble import RandomForestClassifier\n", 71 | "base_learner = RandomForestClassifier(random_state=8)" 72 | ]) 73 | ) 74 | 75 | def test_return_estimator_from_json(self): 76 | est = self.base_learner_origin.return_estimator() 77 | assert isinstance(est, RandomForestClassifier) 78 | -------------------------------------------------------------------------------- /xcessiv/tests/test_stacker.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import unittest 3 | import numpy as np 4 | from xcessiv import stacker 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.linear_model import LogisticRegression 7 | from sklearn.model_selection import StratifiedKFold, train_test_split 8 | from sklearn.datasets import load_iris 9 | from sklearn.metrics import accuracy_score 10 | 11 | 12 | class TestStacker(unittest.TestCase): 13 | def setUp(self): 14 | bl1 = RandomForestClassifier(random_state=8) 15 | bl2 = LogisticRegression() 16 | bl3 = RandomForestClassifier(max_depth=10, random_state=10) 17 | 18 | meta_est = LogisticRegression() 19 | 20 | skf = StratifiedKFold(random_state=8).split 21 | 22 | self.stacked_ensemble = stacker.XcessivStackedEnsemble( 23 | [bl1, bl2, bl3], 24 | ['predict', 'predict_proba', 'predict_proba'], 25 | meta_est, 26 | skf 27 | ) 28 | 29 | def test_fit_and_process_using_meta_feature_generator(self): 30 | X, y = load_iris(return_X_y=True) 31 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=8) 32 | 33 | self.stacked_ensemble.fit(X_train, y_train) 34 | 35 | preds = self.stacked_ensemble._process_using_meta_feature_generator(X_test, 'predict') 36 | assert np.round(accuracy_score(y_test, preds), 3) == 0.868 37 | 38 | probas = self.stacked_ensemble._process_using_meta_feature_generator(X_test, 'predict_proba') 39 | preds = np.argmax(probas, axis=1) 40 | assert np.round(accuracy_score(y_test, preds), 3) == 0.868 41 | 42 | def test_get_params(self): 43 | self.stacked_ensemble.get_params() 44 | 45 | def test_set_params(self): 46 | self.stacked_ensemble.set_params(bl0__random_state=20) 47 | assert self.stacked_ensemble.get_params()['bl0__random_state'] == 20 48 | assert self.stacked_ensemble.get_params()['bl0'].get_params()['random_state'] == 20 49 | 50 | self.stacked_ensemble.set_params(**{'secondary-learner__C': 1.5}) 51 | assert self.stacked_ensemble.get_params()['secondary-learner__C'] == 1.5 52 | assert self.stacked_ensemble.get_params()['secondary-learner'].get_params()['C'] == 1.5 53 | -------------------------------------------------------------------------------- /xcessiv/tests/test_views.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division, unicode_literals 2 | import unittest 3 | import json 4 | import os 5 | from xcessiv import app, functions, models, constants 6 | 7 | 8 | class TestCreateNewEnsemble(unittest.TestCase): 9 | def setUp(self): 10 | self.app = app.test_client() 11 | self.test_location = 'test_folder' 12 | 13 | def tearDown(self): 14 | if os.path.exists(os.path.join(self.test_location, 15 | app.config['XCESSIV_NOTEBOOK_NAME'])): 16 | os.remove(os.path.join(self.test_location, 17 | app.config['XCESSIV_NOTEBOOK_NAME'])) 18 | os.rmdir(self.test_location) 19 | 20 | def test_creation(self): 21 | rv = self.app.post( 22 | '/ensemble/', 23 | data=json.dumps( 24 | { 25 | 'ensemble_name': self.test_location 26 | } 27 | ), 28 | content_type='application/json' 29 | ) 30 | assert rv.status_code == 200 31 | assert os.path.exists(os.path.join(self.test_location, 32 | app.config['XCESSIV_NOTEBOOK_NAME'])) 33 | with functions.DBContextManager(self.test_location) as session: 34 | extraction = session.query(models.Extraction).all() 35 | assert len(extraction) == 1 36 | assert extraction[0].main_dataset == constants.DEFAULT_EXTRACTION_MAIN_DATASET 37 | assert extraction[0].test_dataset == constants.DEFAULT_EXTRACTION_TEST_DATASET 38 | assert extraction[0].meta_feature_generation == \ 39 | constants.DEFAULT_EXTRACTION_META_FEATURE_GENERATION 40 | 41 | def test_duplicate(self): 42 | rv = self.app.post( 43 | '/ensemble/', 44 | data=json.dumps( 45 | { 46 | 'ensemble_name': self.test_location 47 | } 48 | ), 49 | content_type='application/json' 50 | ) 51 | 52 | rv = self.app.post( 53 | '/ensemble/', 54 | data=json.dumps( 55 | { 56 | 'ensemble_name': self.test_location 57 | } 58 | ), 59 | content_type='application/json' 60 | ) 61 | 62 | assert rv.status_code == 400 63 | -------------------------------------------------------------------------------- /xcessiv/ui/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | 6 | # testing 7 | /coverage 8 | 9 | # production 10 | /build 11 | 12 | # misc 13 | .DS_Store 14 | .env 15 | npm-debug.log* 16 | yarn-debug.log* 17 | yarn-error.log* 18 | 19 | -------------------------------------------------------------------------------- /xcessiv/ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ui", 3 | "version": "0.1.0", 4 | "private": true, 5 | "proxy": "http://localhost:8080", 6 | "dependencies": { 7 | "bootstrap": "^3.3.7", 8 | "fixed-data-table": "^0.6.4", 9 | "immutable": "^3.8.1", 10 | "jquery": "^3.2.1", 11 | "lodash": "^4.17.4", 12 | "rc-collapse": "^1.6.12", 13 | "react": "^15.4.2", 14 | "react-bootstrap": "^0.31.0", 15 | "react-codemirror": "^0.3.0", 16 | "react-contenteditable": "^2.0.2", 17 | "react-dimensions": "^1.3.0", 18 | "react-dom": "^15.4.2", 19 | "react-height": "^2.2.0", 20 | "react-icons": "^2.2.3", 21 | "react-motion": "^0.4.7", 22 | "react-notification-system": "^0.2.13", 23 | "react-scrollbar": "^0.5.1", 24 | "react-select": "^1.0.0-rc.3", 25 | "react-syntax-highlighter": "^5.1.2", 26 | "react-tabs": "^0.8.3", 27 | "react-virtualized-select": "^3.0.1" 28 | }, 29 | "devDependencies": { 30 | "eslint": "^3.19.0", 31 | "eslint-config-google": "^0.7.1", 32 | "react-scripts": "0.9.5" 33 | }, 34 | "scripts": { 35 | "start": "react-scripts start", 36 | "build": "react-scripts build", 37 | "test": "react-scripts test --env=jsdom", 38 | "eject": "react-scripts eject" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /xcessiv/ui/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reiinakano/xcessiv/a48dff7d370c84eb5c243bde87164c1f5fd096d5/xcessiv/ui/public/favicon.ico -------------------------------------------------------------------------------- /xcessiv/ui/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 16 | Xcessiv 17 | 18 | 19 |
20 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /xcessiv/ui/src/App.css: -------------------------------------------------------------------------------- 1 | .App { 2 | background-color: #eee; 3 | } 4 | 5 | .App-header { 6 | background-color: #222; 7 | height: 150px; 8 | padding: 20px; 9 | color: white; 10 | text-align: center; 11 | } 12 | -------------------------------------------------------------------------------- /xcessiv/ui/src/App.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import './App.css'; 3 | import ContainerBaseLearner from './containers/ContainerBaseLearner'; 4 | import NotificationSystem from 'react-notification-system'; 5 | import { Button, Modal, Form, FormGroup, FormControl, 6 | ControlLabel, Radio, Glyphicon } from 'react-bootstrap'; 7 | 8 | function handleErrors(response) { 9 | if (!response.ok) { 10 | var error = new Error(response.statusText); 11 | 12 | // Unexpected error 13 | if (response.status === 500) { 14 | error.errMessage = 'Unexpected error'; 15 | throw error; 16 | } 17 | return response.json() 18 | .then(errorBody => { 19 | error.errMessage = JSON.stringify(errorBody); 20 | throw error; 21 | }); 22 | } 23 | return response; 24 | } 25 | 26 | class CreateProjectModal extends Component { 27 | constructor(props) { 28 | super(props); 29 | this.state = { 30 | name: '', 31 | selected: null 32 | }; 33 | } 34 | 35 | render() { 36 | return ( 37 | 41 | 42 | Create New Project 43 | 44 | 45 |

Existing folders

46 |
47 | {this.props.folders.map((x) => { 48 | return ( 49 | this.setState({selected: x})}> 54 | {x} 55 | 56 | ) 57 | })} 58 |
59 |
e.preventDefault()}> 60 | 63 | {'Name new project '} 64 | {' '} 65 | this.setState({name: evt.target.value})} 69 | /> 70 | 71 | {' '} 72 | 81 |
82 |
83 | 84 | 99 | 100 | 101 |
102 | ) 103 | } 104 | } 105 | 106 | class NotebookWithToolbar extends Component { 107 | constructor(props) { 108 | super(props); 109 | this.state = { 110 | path: '', 111 | folders: [], 112 | showCreateProjectModal: false 113 | }; 114 | } 115 | 116 | // Get request from server to populate fields 117 | componentDidMount() { 118 | this.getFolders(); 119 | } 120 | 121 | // Get list of folders in current working directory 122 | getFolders() { 123 | fetch('/folders/') 124 | .then(response => response.json()) 125 | .then(json => { 126 | console.log(json); 127 | this.setState({ 128 | folders: json 129 | }); 130 | }); 131 | } 132 | 133 | // Creates new project folder with initialized xcnb.db 134 | createProject(name) { 135 | var payload = {ensemble_name: name}; 136 | fetch( 137 | '/ensemble/', 138 | { 139 | method: "POST", 140 | body: JSON.stringify( payload ), 141 | headers: new Headers({ 142 | 'Content-Type': 'application/json' 143 | }) 144 | } 145 | ) 146 | .then(handleErrors) 147 | .then(response => response.json()) 148 | .then(json => { 149 | console.log(json); 150 | this.props.addNotification({ 151 | title: 'Success', 152 | message: 'Created new project folder', 153 | level: 'success' 154 | }); 155 | this.getFolders(); 156 | }) 157 | .catch(error => { 158 | console.log(error.message); 159 | console.log(error.errMessage); 160 | this.props.addNotification({ 161 | title: error.message, 162 | message: error.errMessage, 163 | level: 'error' 164 | }); 165 | }); 166 | } 167 | 168 | render() { 169 | return ( 170 |
171 | 176 |

{'Current open project folder: ' + this.state.path}

177 | {Boolean(this.state.path) && 178 | this.props.addNotification(notif)} 181 | /> 182 | } 183 | this.createProject(name)} 185 | folders={this.state.folders} 186 | isOpen={this.state.showCreateProjectModal} 187 | onRequestClose={() => this.setState({showCreateProjectModal: false})} 188 | changePath={(path) => this.setState({path})} 189 | addNotification={(notif) => this.props.addNotification(notif)} 190 | /> 191 |
192 | ); 193 | } 194 | } 195 | 196 | function Notebook(props) { 197 | return ( 198 |
199 | props.addNotification(notif)} 202 | /> 203 |
204 | ) 205 | } 206 | 207 | class App extends Component { 208 | 209 | constructor(props) { 210 | super(props); 211 | this._notificationSystem = null; 212 | } 213 | 214 | componentDidMount() { 215 | this._notificationSystem = this.refs.notificationSystem; 216 | } 217 | 218 | render() { 219 | return ( 220 |
221 |
222 |

Xcessiv

223 |
224 | this._notificationSystem.addNotification(notif)} 226 | /> 227 | 228 |
229 | ) 230 | } 231 | } 232 | 233 | export default App; 234 | -------------------------------------------------------------------------------- /xcessiv/ui/src/App.test.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import App from './App'; 4 | 5 | it('renders without crashing', () => { 6 | const div = document.createElement('div'); 7 | ReactDOM.render(, div); 8 | }); 9 | -------------------------------------------------------------------------------- /xcessiv/ui/src/AutomatedRuns/AutomatedRuns.css: -------------------------------------------------------------------------------- 1 | .AutomatedRunsDisplay { 2 | padding-left: 50px; 3 | padding-right: 50px; 4 | } 5 | -------------------------------------------------------------------------------- /xcessiv/ui/src/AutomatedRuns/AutomatedRunsDisplay.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import { Button, Panel } from 'react-bootstrap'; 3 | import './AutomatedRuns.css'; 4 | import 'fixed-data-table/dist/fixed-data-table.min.css'; 5 | import { Table, Column, Cell } from 'fixed-data-table'; 6 | import Dimensions from 'react-dimensions'; 7 | import FaCheck from 'react-icons/lib/fa/check'; 8 | import FaTrash from 'react-icons/lib/fa/trash'; 9 | import FaSpinner from 'react-icons/lib/fa/spinner'; 10 | import FaExclamationCircle from 'react-icons/lib/fa/exclamation-circle' 11 | import FaInfo from 'react-icons/lib/fa/info'; 12 | import FaCogs from 'react-icons/lib/fa/cogs'; 13 | import { DetailsModal, DeleteModal } from './Modals' 14 | 15 | class AutomatedRunsDisplay extends Component { 16 | constructor(props) { 17 | super(props); 18 | this.state = { 19 | open: false, 20 | moreDetailsId: null, 21 | idToDelete: null 22 | }; 23 | } 24 | 25 | render() { 26 | const header = ( 27 | 28 | 35 | 37 | 39 | 41 | 43 |
29 | 34 | {'Succeeded: ' + this.props.automatedRuns.filter((el) => 36 | el.job_status === 'finished').length}{'Failed: ' + this.props.automatedRuns.filter((el) => 38 | el.job_status === 'errored').length}{'Queued: ' + this.props.automatedRuns.filter((el) => 40 | el.job_status === 'queued').length}{'In Progress: ' + this.props.automatedRuns.filter((el) => 42 | el.job_status === 'started').length}
44 | ); 45 | 46 | const automatedRunsReversed = this.props.automatedRuns.slice().reverse(); 47 | 48 | return ( 49 |
50 | 51 | 53 | 59 | { 62 | return ( 63 | 64 | {automatedRunsReversed[props.rowIndex].id} 65 | 66 | ) 67 | }} 68 | width={50} 69 | flexGrow={1} 70 | /> 71 | { 74 | return ( 75 | 76 | {automatedRunsReversed[props.rowIndex].category} 77 | 78 | ) 79 | }} 80 | width={50} 81 | flexGrow={1} 82 | /> 83 | { 86 | return ( 87 | 88 | {automatedRunsReversed[props.rowIndex].base_learner_origin_id} 89 | 90 | ) 91 | }} 92 | width={50} 93 | flexGrow={1} 94 | /> 95 | { 98 | return ( 99 | 100 | {automatedRunsReversed[props.rowIndex].job_id} 101 | 102 | ) 103 | }} 104 | width={200} 105 | flexGrow={1} 106 | /> 107 | { 110 | if (automatedRunsReversed[props.rowIndex] === undefined) { 111 | return () 112 | } 113 | 114 | var status_icon; 115 | if (automatedRunsReversed[props.rowIndex].job_status === 'errored') { 116 | status_icon = 117 | } 118 | else if (automatedRunsReversed[props.rowIndex].job_status === 'finished') { 119 | status_icon = 120 | } 121 | else if (automatedRunsReversed[props.rowIndex].job_status === 'queued') { 122 | status_icon = 'Queued' 123 | } 124 | else { 125 | status_icon = 126 | } 127 | 128 | return ( 129 | 130 | {status_icon} 131 | 132 | ) 133 | }} 134 | width={50} 135 | flexGrow={1} 136 | /> 137 | { 139 | 140 | return ( 141 | 142 | 145 | this.setState({moreDetailsId: automatedRunsReversed[props.rowIndex].id})} 146 | /> 147 | 148 | ) 149 | }} 150 | width={50} 151 | /> 152 | { 154 | 155 | return ( 156 | 157 | this.setState({idToDelete: automatedRunsReversed[props.rowIndex].id})} 160 | /> 161 | 162 | ) 163 | }} 164 | width={50} 165 | /> 166 |
167 |
168 | this.setState({moreDetailsId: null})} 170 | automatedRuns={this.props.automatedRuns} 171 | moreDetailsId={this.state.moreDetailsId} 172 | /> 173 | this.setState({idToDelete: null})} 176 | handleYes={() => this.props.deleteAutomatedRun(this.state.idToDelete)} 177 | /> 178 |
179 | ) 180 | } 181 | } 182 | 183 | module.exports = Dimensions({ 184 | getHeight: function(element) { 185 | return window.innerHeight - 200; 186 | }, 187 | getWidth: function(element) { 188 | var widthOffset = window.innerWidth < 680 ? 0 : 145; 189 | return window.innerWidth - widthOffset; 190 | } 191 | })(AutomatedRunsDisplay); 192 | 193 | export default AutomatedRunsDisplay; 194 | -------------------------------------------------------------------------------- /xcessiv/ui/src/AutomatedRuns/Modals.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import './AutomatedRuns.css'; 3 | import 'react-select/dist/react-select.css'; 4 | import { Modal, Button, Alert } from 'react-bootstrap'; 5 | import CodeMirror from 'react-codemirror'; 6 | import 'codemirror/lib/codemirror.css'; 7 | import 'codemirror/mode/python/python'; 8 | 9 | function DisplayError(props) { 10 | return 11 | {props.description['error_traceback'].join('').split("\n").map((i, index) => { 12 | return
{i}
; 13 | })} 14 |
15 | } 16 | 17 | export class DetailsModal extends Component { 18 | render() { 19 | 20 | const automatedRun = this.props.automatedRuns.find(el => el.id === this.props.moreDetailsId); 21 | 22 | if (automatedRun === undefined) { 23 | return null; 24 | } 25 | 26 | var options = { 27 | lineNumbers: true, 28 | indentUnit: 4, 29 | readOnly: true 30 | }; 31 | 32 | return ( 33 | 38 | 39 | {'Details of Automated Run ID ' + automatedRun.id} 40 | 41 | 42 | Configuration Source 43 | 45 | {'Base Learner Type ID: '} 46 | {automatedRun.base_learner_origin_id} 47 |
48 | {'Job ID: '} 49 | {automatedRun.job_id} 50 | {(automatedRun.job_status === 'errored') && 51 | } 52 |
53 |
54 | ) 55 | } 56 | } 57 | 58 | export class DeleteModal extends Component { 59 | 60 | handleYesAndClose() { 61 | this.props.handleYes(); 62 | this.props.onRequestClose(); 63 | } 64 | 65 | render() { 66 | return ( 67 | 71 | 72 | Delete Automated Run 73 | 74 | 75 |

Are you sure you want to delete record of this automated run?

76 |

This action is irreversible.

77 |
78 | 79 | 82 | 83 | 84 |
85 | ) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /xcessiv/ui/src/BaseLearner/BaseLearner.css: -------------------------------------------------------------------------------- 1 | .BaseLearnerPadding { 2 | padding-top: 30px; 3 | padding-bottom: 30px; 4 | padding-left: 50px; 5 | padding-right: 50px; 6 | } 7 | 8 | .BaseLearner * { 9 | margin: 0; 10 | } 11 | 12 | .BaseLearner table { 13 | border-collapse: collapse; 14 | width: 100%; 15 | } 16 | 17 | .BaseLearner th, td { 18 | text-align: left; 19 | } 20 | 21 | .BaseLearner tr:nth-child(odd) { 22 | cursor: pointer; 23 | } 24 | 25 | .BaseLearner tr:nth-child(odd):hover { 26 | background-color: #f5f5f5 27 | } 28 | 29 | .DualList ul { 30 | columns: 2; 31 | -webkit-columns: 2; 32 | -moz-columns: 2; 33 | } 34 | 35 | .collapse { 36 | background-color: #f5f5f5; 37 | } 38 | 39 | .load-animate { 40 | -animation: spin .7s infinite linear; 41 | -webkit-animation: spin2 .7s infinite linear; 42 | } 43 | 44 | @-webkit-keyframes spin2 { 45 | from { -webkit-transform: rotate(0deg);} 46 | to { -webkit-transform: rotate(360deg);} 47 | } 48 | 49 | @keyframes spin { 50 | from { transform: scale(1) rotate(0deg);} 51 | to { transform: scale(1) rotate(360deg);} 52 | } 53 | 54 | input[type=checkbox] { 55 | transform: scale(1.25); 56 | } -------------------------------------------------------------------------------- /xcessiv/ui/src/BaseLearner/BaseLearnerMoreDetailsModal.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import './BaseLearner.css'; 3 | import 'react-select/dist/react-select.css'; 4 | import { Modal, Panel, Button, Alert } from 'react-bootstrap'; 5 | 6 | function DisplayError(props) { 7 | return 8 | {props.description['error_traceback'].join('').split("\n").map((i, index) => { 9 | return
{i}
; 10 | })} 11 |
12 | } 13 | 14 | class DetailsModal extends Component { 15 | render() { 16 | if (this.props.moreDetailsId === null) { 17 | return null; 18 | } 19 | 20 | const baseLearner = this.props.baseLearners.find(el => el.id === this.props.moreDetailsId); 21 | 22 | if (baseLearner === undefined) { 23 | return null; 24 | } 25 | 26 | return ( 27 | 31 | 32 | {'Details of base learner ID ' + baseLearner.id} 33 | 34 | 35 | Metrics}> 36 |
    37 | {Object.keys(baseLearner.individual_score).map((key) => { 38 | return
  • {key + ': ' + baseLearner.individual_score[key]}
  • 39 | })} 40 |
41 |
42 | Base Learner Hyperparameters}> 43 |
    44 | {Object.keys(baseLearner.hyperparameters).map((key) => { 45 | return ( 46 |
  • 47 | {key + ': ' + baseLearner.hyperparameters[key]} 48 |
  • 49 | ); 50 | })} 51 |
52 |
53 | {'Base Learner Type ID: '} 54 | {baseLearner.base_learner_origin_id} 55 |
56 | {'Job ID: '} 57 | {baseLearner.job_id} 58 | {(baseLearner.job_status === 'errored') && 59 | } 60 |
61 |
62 | ) 63 | } 64 | } 65 | 66 | export class DeleteModal extends Component { 67 | 68 | handleYesAndClose() { 69 | this.props.handleYes(); 70 | this.props.onRequestClose(); 71 | } 72 | 73 | render() { 74 | return ( 75 | 79 | 80 | Delete base learner 81 | 82 | 83 |

Are you sure you want to delete this base learner?

84 |

All ensembles containing this base learner will be lost as well.

85 |

This action is irreversible.

86 |
87 | 88 | 91 | 92 | 93 |
94 | ) 95 | } 96 | } 97 | 98 | export default DetailsModal; 99 | -------------------------------------------------------------------------------- /xcessiv/ui/src/BaseLearnerOrigin/BaseLearnerOrigin.css: -------------------------------------------------------------------------------- 1 | .BaseLearnerOrigin { 2 | padding-top: 30px; 3 | padding-bottom: 30px; 4 | padding-left: 50px; 5 | padding-right: 50px; 6 | } 7 | 8 | .CodeMirror { 9 | height: auto; 10 | } 11 | 12 | .CodeMirror-scroll { 13 | max-height: 500px; 14 | } 15 | 16 | .SplitFormLabel { 17 | padding-top: 10px; 18 | padding-bottom: 10px; 19 | } 20 | 21 | .DeleteButton { 22 | padding-right: 5px; 23 | float: right; 24 | color: gray; 25 | } 26 | 27 | .DualList ul { 28 | columns: 2; 29 | -webkit-columns: 2; 30 | -moz-columns: 2; 31 | } 32 | 33 | .DeleteButton:onhover { 34 | color: black; 35 | } 36 | 37 | .load-animate { 38 | -animation: spin .7s infinite linear; 39 | -webkit-animation: spin2 .7s infinite linear; 40 | } 41 | 42 | @-webkit-keyframes spin2 { 43 | from { -webkit-transform: rotate(0deg);} 44 | to { -webkit-transform: rotate(360deg);} 45 | } 46 | 47 | @keyframes spin { 48 | from { transform: scale(1) rotate(0deg);} 49 | to { transform: scale(1) rotate(360deg);} 50 | } -------------------------------------------------------------------------------- /xcessiv/ui/src/BaseLearnerOrigin/ListBaseLearnerOrigin.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import './BaseLearnerOrigin.css'; 3 | import BaseLearnerOrigin from './BaseLearnerOrigin'; 4 | import { Button, Glyphicon, ButtonGroup } from 'react-bootstrap'; 5 | import { TpotModal } from './BaseLearnerOriginModals' 6 | import FaCogs from 'react-icons/lib/fa/cogs'; 7 | 8 | class ListBaseLearnerOrigin extends Component { 9 | 10 | constructor(props) { 11 | super(props); 12 | this.state = { 13 | showTpotModal: false 14 | }; 15 | } 16 | 17 | getItems() { 18 | 19 | const items = this.props.baseLearnerOrigins.map((el, index) => { 20 | return ( 21 | this.props.updateBaseLearnerOrigin(el.id, newData)} 26 | deleteLearner={() => this.props.deleteBaseLearnerOrigin(el.id)} 27 | createBaseLearner={(source) => this.props.createBaseLearner(el.id, source)} 28 | gridSearch={(source) => this.props.gridSearch(el.id, source)} 29 | randomSearch={(source, n) => this.props.randomSearch(el.id, source, n)} 30 | startBayesianRun={(source) => this.props.startBayesianRun(el.id, source)} 31 | addNotification={(notif) => this.props.addNotification(notif)} 32 | presetBaseLearnerOrigins={this.props.presetBaseLearnerOrigins} 33 | presetMetricGenerators={this.props.presetMetricGenerators} 34 | /> 35 | ); 36 | }); 37 | 38 | return items; 39 | } 40 | 41 | render() { 42 | return
43 |

Base Learner Types

44 | {this.getItems()} 45 | 46 | 50 | 54 | 55 | this.setState({showTpotModal: false})} 57 | handleYes={(source) => this.props.startTpotRun(source)} /> 58 |
; 59 | } 60 | } 61 | 62 | 63 | export default ListBaseLearnerOrigin; 64 | -------------------------------------------------------------------------------- /xcessiv/ui/src/BaseLearnerOrigin/MetricGenerators.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import './BaseLearnerOrigin.css'; 3 | import CodeMirror from 'react-codemirror'; 4 | import 'codemirror/lib/codemirror.css'; 5 | import 'codemirror/mode/python/python'; 6 | import 'rc-collapse/assets/index.css'; 7 | import {omit} from 'lodash'; 8 | import Collapse, { Panel } from 'rc-collapse'; 9 | import { Button, Glyphicon, Modal, Form, FormGroup, 10 | FormControl, ControlLabel } from 'react-bootstrap'; 11 | import Select from 'react-select'; 12 | 13 | const default_metric_generator_code = `def metric_generator(y_true, y_probas): 14 | """This function must return a numerical value given two numpy arrays 15 | containing the ground truth labels and generated meta-features, in that order. 16 | (In this example, \`y_true\` and \`y_probas\`) 17 | """ 18 | return 0.88 19 | ` 20 | 21 | class AddNewModal extends Component { 22 | constructor(props) { 23 | super(props); 24 | this.state = { 25 | name: '' 26 | }; 27 | } 28 | 29 | render() { 30 | return ( 31 | 35 | 36 | Add new metric generator 37 | 38 | 39 |
{ 40 | e.preventDefault(); 41 | this.props.onAdd(this.state.name); 42 | this.props.onRequestClose(); 43 | }}> 44 | 47 | Name new metric 48 | this.setState({name: evt.target.value})} 51 | /> 52 | 53 |
54 |
55 | 56 | 57 | 66 | 67 |
68 | ) 69 | } 70 | } 71 | 72 | class PresetMetricGeneratorsModal extends Component { 73 | constructor(props) { 74 | super(props); 75 | this.state = { 76 | selectedValue: [] 77 | }; 78 | } 79 | 80 | render() { 81 | const options = this.props.presetMetricGenerators.map((obj) => { 82 | return { 83 | label: obj.selection_name, 84 | value: obj 85 | } 86 | }); 87 | return ( 88 | 92 | 93 | Select a preset metric generator 94 | 95 | 96 | this.setState({selectedValue})} 56 | placeholder="Select preset CV method" 57 | /> 58 | 59 | 60 | 69 | 70 | 71 | 72 | ) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /xcessiv/ui/src/DatasetExtraction/StackedEnsembleCV.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import './MainDataExtraction.css'; 3 | import CodeMirror from 'react-codemirror'; 4 | import 'codemirror/lib/codemirror.css'; 5 | import 'codemirror/mode/python/python'; 6 | import { isEqual } from 'lodash'; 7 | import { ClearModal, PresetCVSettingsModal } from './Modals'; 8 | import { Button, ButtonToolbar, Glyphicon } from 'react-bootstrap'; 9 | 10 | class StackedEnsembleCV extends Component { 11 | constructor(props) { 12 | super(props); 13 | this.state = { 14 | unsavedConfig: this.props.config, 15 | showClearModal: false, 16 | showPresetCVSettingsModal: false 17 | }; 18 | } 19 | 20 | componentWillReceiveProps(nextProps) { 21 | if (this.props.config !== nextProps.config) { 22 | this.setState({ 23 | unsavedConfig: nextProps.config 24 | }) 25 | } 26 | } 27 | 28 | handleConfigChange(option, val) { 29 | var config = JSON.parse(JSON.stringify(this.state.unsavedConfig)); 30 | config[option] = val; 31 | this.props.setSame(isEqual(this.props.config, config)); 32 | this.setState({unsavedConfig: config}) 33 | } 34 | 35 | clearChanges() { 36 | this.setState({unsavedConfig: this.props.config}); 37 | this.props.setSame(true); 38 | this.props.addNotification({ 39 | title: 'Success', 40 | message: 'Cleared all unsaved changes', 41 | level: 'success' 42 | }); 43 | } 44 | 45 | render() { 46 | var options = { 47 | lineNumbers: true, 48 | indentUnit: 4 49 | }; 50 | return
51 |
Cross-validation iterator Source Code
52 | this.handleConfigChange('source', src)} 54 | options={options}/> 55 | 56 | 66 | 71 | 75 | 76 | this.setState({showClearModal: false})} 79 | handleYes={() => this.clearChanges()} 80 | /> 81 | this.setState({showPresetCVSettingsModal: false})} 84 | presetCVs={this.props.presetCVs} 85 | apply={(obj) => { 86 | this.handleConfigChange('source', obj.value.source); 87 | }} /> 88 |
89 | } 90 | } 91 | 92 | export default StackedEnsembleCV; 93 | -------------------------------------------------------------------------------- /xcessiv/ui/src/DatasetExtraction/TestDataExtraction.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import './MainDataExtraction.css'; 3 | import CodeMirror from 'react-codemirror'; 4 | import 'codemirror/lib/codemirror.css'; 5 | import 'codemirror/mode/python/python'; 6 | import { isEqual } from 'lodash'; 7 | import $ from 'jquery'; 8 | import { ClearModal } from './Modals'; 9 | import { Button, ButtonToolbar, DropdownButton, MenuItem, Glyphicon, 10 | Form, FormGroup, FormControl, ControlLabel } from 'react-bootstrap'; 11 | 12 | function NoTestMessage(props) { 13 | return

14 | } 15 | 16 | class SplitForm extends Component { 17 | render() { 18 | 19 | return ( 20 |
21 |
22 | 25 | Test Dataset Ratio 26 | this.props.handleConfigChange('split_ratio', parseFloat(evt.target.value))} 33 | /> 34 | 35 | 38 | Random Seed 39 | this.props.handleConfigChange('split_seed', parseInt(evt.target.value, 10))} 43 | /> 44 | 45 |
46 |
47 | ) 48 | } 49 | } 50 | 51 | class SourceForm extends Component { 52 | render() { 53 | var options = { 54 | lineNumbers: true, 55 | indentUnit: 4 56 | }; 57 | return ( 58 |
59 | 61 |
62 | ) 63 | } 64 | } 65 | 66 | class TestDataExtraction extends Component { 67 | constructor(props) { 68 | super(props); 69 | this.state = { 70 | config: { 71 | "method": null, 72 | "split_ratio": 0.1, 73 | "split_seed": 8, 74 | "source": '' 75 | }, 76 | showClearModal: false 77 | }; 78 | } 79 | 80 | fetchTde(path) { 81 | fetch('/ensemble/extraction/test-dataset/?path=' + path) 82 | .then(response => response.json()) 83 | .then(json => { 84 | console.log(json); 85 | this.savedConfig = $.extend({}, this.state.config, json); 86 | this.setState({ 87 | config: this.savedConfig 88 | }); 89 | }); 90 | } 91 | 92 | componentDidMount() { 93 | this.fetchTde(this.props.path); 94 | } 95 | 96 | componentWillReceiveProps(nextProps) { 97 | if (this.props.path !== nextProps.path) { 98 | this.fetchTde(nextProps.path); 99 | } 100 | } 101 | 102 | handleConfigChange(option, val) { 103 | console.log(option); 104 | console.log(val); 105 | var config = JSON.parse(JSON.stringify(this.state.config)); 106 | config[option] = val; 107 | this.props.setSame(isEqual(config, this.savedConfig)); 108 | this.setState({config}); 109 | } 110 | 111 | clearChanges() { 112 | this.setState({config: this.savedConfig}); 113 | this.props.setSame(true); 114 | this.props.addNotification({ 115 | title: 'Success', 116 | message: 'Cleared all unsaved changes', 117 | level: 'success' 118 | }); 119 | } 120 | 121 | // Save all changes to server 122 | saveSetup() { 123 | var payload = this.state.config; 124 | 125 | fetch( 126 | '/ensemble/extraction/test-dataset/?path=' + this.props.path, 127 | { 128 | method: "PATCH", 129 | body: JSON.stringify( payload ), 130 | headers: new Headers({ 131 | 'Content-Type': 'application/json' 132 | }) 133 | } 134 | ) 135 | .then(response => response.json()) 136 | .then(json => { 137 | console.log(json); 138 | this.savedConfig = json; 139 | this.props.setSame(true); 140 | this.setState({ 141 | config: json 142 | }); 143 | this.props.addNotification({ 144 | title: 'Success', 145 | message: 'Saved test dataset extraction method', 146 | level: 'success' 147 | }); 148 | }); 149 | } 150 | 151 | render() { 152 | const options = { 153 | null: 'No test dataset', 154 | split_from_main: 'Split from main dataset', 155 | source: 'Extract with source code' 156 | } 157 | 158 | return
159 |
Choose how to extract your test dataset
160 | this.handleConfigChange('method', x)}> 164 | {options[null]} 165 | {options['split_from_main']} 166 | {options['source']} 167 | 168 | {this.state.config.method === 'split_from_main' && 169 | this.handleConfigChange(option, val)}/> 172 | } 173 | {this.state.config.method === null && } 174 | {this.state.config.method === 'source' && 175 | this.handleConfigChange('source', x)} /> 177 | } 178 | 179 | 187 | 192 | 193 | this.setState({showClearModal: false})} 196 | handleYes={() => this.clearChanges()} 197 | /> 198 |
199 | } 200 | } 201 | 202 | export default TestDataExtraction; 203 | -------------------------------------------------------------------------------- /xcessiv/ui/src/Ensemble/Ensemble.css: -------------------------------------------------------------------------------- 1 | .Ensemble { 2 | padding-top: 30px; 3 | padding-bottom: 30px; 4 | padding-left: 50px; 5 | padding-right: 50px; 6 | } 7 | 8 | .Ensemble table { table-layout: fixed; } 9 | 10 | .CodeMirror { z-index: 0 } 11 | 12 | .Ensemble .Select {z-index: 100} 13 | 14 | .load-animate { 15 | -animation: spin .7s infinite linear; 16 | -webkit-animation: spin2 .7s infinite linear; 17 | } 18 | 19 | @-webkit-keyframes spin2 { 20 | from { -webkit-transform: rotate(0deg);} 21 | to { -webkit-transform: rotate(360deg);} 22 | } 23 | 24 | @keyframes spin { 25 | from { transform: scale(1) rotate(0deg);} 26 | to { transform: scale(1) rotate(360deg);} 27 | } -------------------------------------------------------------------------------- /xcessiv/ui/src/Ensemble/EnsembleBuilder.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import './Ensemble.css'; 3 | import 'react-select/dist/react-select.css' 4 | import 'react-virtualized/styles.css' 5 | import 'react-virtualized-select/styles.css' 6 | import VirtualizedSelect from 'react-virtualized-select' 7 | import Select from 'react-select' 8 | import CodeMirror from 'react-codemirror'; 9 | import 'codemirror/lib/codemirror.css'; 10 | import 'codemirror/mode/python/python'; 11 | import { Button, Glyphicon } from 'react-bootstrap'; 12 | import FaCogs from 'react-icons/lib/fa/cogs'; 13 | import { GreedyRunModal } from './EnsembleMoreDetailsModal' 14 | 15 | 16 | const defaultSourceParams = [ 17 | '"""This code block should ', 18 | 'contain the hyperparameters to be used for ', 19 | 'the secondary base learner in the variable `params`"""\n', 20 | 'params = {}' 21 | ].join('') 22 | 23 | class EnsembleBuilder extends Component { 24 | 25 | constructor(props) { 26 | super(props); 27 | this.state = { 28 | selectedValue: null, 29 | source: defaultSourceParams, 30 | showGreedyModal: false 31 | }; 32 | } 33 | 34 | render() { 35 | var options = { 36 | lineNumbers: true, 37 | indentUnit: 4 38 | }; 39 | 40 | const buttonDisabled = (!this.state.selectedValue || 41 | !(this.props.checkedOptions.length > 0)); 42 | 43 | return ( 44 |
45 |

Create a stacked ensemble

46 | 47 | 57 | 65 |
48 | this.props.setCheckedBaseLearners( 53 | selectValue.map((val) => val.value))} 54 | placeholder="Insert/Check base learners to add to the ensemble" 55 | /> 56 | 58 |
66 | this.setState({source: src})} 69 | options={options} 70 | /> 71 | 81 | 88 | this.setState({showGreedyModal: false})} 90 | handleYes={(id, source) => this.props.startGreedyRun(id, source)} 91 | optionsBaseLearnerOrigins={this.props.optionsBaseLearnerOrigins}/> 92 |
93 | ) 94 | } 95 | } 96 | 97 | 98 | export default EnsembleBuilder; 99 | -------------------------------------------------------------------------------- /xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js: -------------------------------------------------------------------------------- 1 | import React, {Component} from 'react'; 2 | import './Ensemble.css'; 3 | import 'react-select/dist/react-select.css'; 4 | import { Modal, Panel, Button, Alert, Form, 5 | FormGroup, ControlLabel, FormControl } from 'react-bootstrap'; 6 | import CodeMirror from 'react-codemirror'; 7 | import 'codemirror/lib/codemirror.css'; 8 | import 'codemirror/mode/python/python'; 9 | import Select from 'react-select' 10 | import 'react-select/dist/react-select.css' 11 | 12 | 13 | const defaultGreedyRunSource = `secondary_learner_hyperparameters = {} # hyperparameters of secondary learner 14 | 15 | metric_to_optimize = 'Accuracy' # metric to optimize 16 | 17 | invert_metric = False # Whether or not to invert metric e.g. optimizing a loss 18 | 19 | max_num_base_learners = 5 # Maximum size of ensemble to consider (the higher this is, the longer the run will take) 20 | ` 21 | 22 | 23 | function DisplayError(props) { 24 | return 25 | {props.description['error_traceback'].join('').split("\n").map((i, index) => { 26 | return
{i}
; 27 | })} 28 |
29 | } 30 | 31 | class DetailsModal extends Component { 32 | render() { 33 | if (this.props.moreDetailsId === null) { 34 | return null; 35 | } 36 | 37 | const stackedEnsemble = this.props.stackedEnsembles.find(el => el.id === this.props.moreDetailsId); 38 | 39 | if (stackedEnsemble === undefined) { 40 | return null; 41 | } 42 | 43 | return ( 44 | 48 | 49 | {'Details of ensemble ID ' + stackedEnsemble.id} 50 | 51 | 52 | Base Learners}> 53 |
    54 | {stackedEnsemble.base_learner_ids.map((id) => { 55 | return ( 56 |
  • {id}
  • 57 | ) 58 | })} 59 |
60 |
61 | Metrics}> 62 |
    63 | {Object.keys(stackedEnsemble.individual_score).map((key) => { 64 | return
  • {key + ': ' + stackedEnsemble.individual_score[key]}
  • 65 | })} 66 |
67 |
68 | Secondary Learner Hyperparameters}> 69 |
    70 | {Object.keys(stackedEnsemble.secondary_learner_hyperparameters).map((key) => { 71 | return ( 72 |
  • 73 | {key + ': ' + stackedEnsemble.secondary_learner_hyperparameters[key]} 74 |
  • 75 | ); 76 | })} 77 |
78 |
79 | {'Base Learner Type ID: '} 80 | {stackedEnsemble.base_learner_origin_id} 81 |
82 | {'Job ID: '} 83 | {stackedEnsemble.job_id} 84 | {(stackedEnsemble.job_status === 'errored') && 85 | } 86 |
87 |
88 | ) 89 | } 90 | } 91 | 92 | export class DeleteModal extends Component { 93 | 94 | handleYesAndClose() { 95 | this.props.handleYes(); 96 | this.props.onRequestClose(); 97 | } 98 | 99 | render() { 100 | return ( 101 | 105 | 106 | Delete ensemble 107 | 108 | 109 |

Are you sure you want to delete this ensemble?

110 |

This action is irreversible.

111 |
112 | 113 | 116 | 117 | 118 |
119 | ) 120 | } 121 | } 122 | 123 | export class ExportModal extends Component { 124 | constructor(props) { 125 | super(props); 126 | this.state = { 127 | name: '' 128 | }; 129 | } 130 | 131 | render() { 132 | 133 | return ( 134 | 138 | 139 | Export ensemble as Python file 140 | 141 | 142 |
{ 143 | e.preventDefault(); 144 | this.handleYesAndClose(); 145 | }}> 146 | 149 | Name to use as filename 150 | this.setState({name: evt.target.value})} 153 | /> 154 | 155 |
156 |
157 | 158 | 164 | 170 | 171 | 172 |
173 | ) 174 | } 175 | } 176 | 177 | export class GreedyRunModal extends Component { 178 | constructor(props) { 179 | super(props); 180 | this.state = { 181 | source: defaultGreedyRunSource, 182 | selectedValue: null 183 | }; 184 | } 185 | 186 | handleYesAndClose() { 187 | this.props.handleYes(this.state.selectedValue.value, this.state.source); 188 | this.props.onRequestClose(); 189 | } 190 | 191 | render() { 192 | var options = { 193 | lineNumbers: true, 194 | indentUnit: 4 195 | }; 196 | 197 | return ( 198 | 203 | 204 | Greedy Forward Model Selection 205 | 206 | 207 |

{'Designate configuration for greedy forward model selection'}

208 | this.setState({source: src})} 210 | options={options} 211 | /> 212 |