├── .circleci └── config.yml ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── app.py ├── automatminer ├── README.md ├── __init__.py ├── automl │ ├── __init__.py │ ├── adaptors.py │ ├── base.py │ ├── config │ │ ├── __init__.py │ │ └── tpot_configs.py │ └── tests │ │ ├── __init__.py │ │ ├── mini_automl_df.csv │ │ ├── test_adaptors.py │ │ └── test_base.py ├── base.py ├── featurization │ ├── __init__.py │ ├── base.py │ ├── core.py │ ├── sets.py │ └── tests │ │ ├── __init__.py │ │ ├── mp_data_with_dos_bandstructure.pickle │ │ ├── test_base.py │ │ ├── test_core.py │ │ └── test_sets.py ├── pipeline.py ├── preprocessing │ ├── __init__.py │ ├── core.py │ ├── feature_selection.py │ └── tests │ │ ├── __init__.py │ │ ├── test_core.py │ │ └── test_featurized_df.csv ├── presets.py ├── tests │ ├── __init__.py │ ├── test_base.py │ ├── test_pipeline.py │ └── test_presets.py └── utils │ ├── __init__.py │ ├── log.py │ ├── ml.py │ ├── pkg.py │ └── tests │ ├── __init__.py │ ├── test_log.py │ ├── test_ml.py │ └── test_pkg.py ├── automatminer_dev ├── README.md ├── __init__.py ├── config.py ├── graphnet │ ├── __init__.py │ ├── cgcnn.py │ └── megnet.py ├── local │ └── dummy.py ├── matbench │ ├── __init__.py │ ├── castelli.py │ ├── dielectric.py │ ├── expt_gap.py │ ├── expt_is_metal.py │ ├── get_info.py │ ├── glass.py │ ├── jdft2d.py │ ├── mp_eform.py │ ├── mp_elasticity.py │ ├── mp_gaps.py │ ├── phonons.py │ └── steels.py ├── submit.py ├── tasks │ ├── __init__.py │ ├── bench.py │ └── single.py └── workflows │ ├── __init__.py │ ├── bench.py │ ├── single.py │ └── util.py ├── automatminer_web ├── README.md ├── __init__.py ├── app.py ├── assets │ └── ammw.css ├── index.py └── info.py ├── dev_scripts ├── run_code_style_check.sh ├── run_intensive_circleci.sh ├── run_tests.sh ├── setup_env.sh └── setup_env_dev.sh ├── docs ├── .nojekyll ├── Makefile ├── _images │ ├── cv_nested.png │ ├── dataframe_pipe.png │ ├── forum.png │ ├── logo.png │ ├── matbench_pie_charts.png │ ├── matminer_examples.png │ └── pipe.png ├── _sources │ ├── advanced.rst.txt │ ├── automatminer.automl.config.rst.txt │ ├── automatminer.automl.rst.txt │ ├── automatminer.automl.tests.rst.txt │ ├── automatminer.featurization.rst.txt │ ├── automatminer.featurization.tests.rst.txt │ ├── automatminer.preprocessing.rst.txt │ ├── automatminer.preprocessing.tests.rst.txt │ ├── automatminer.rst.txt │ ├── automatminer.tests.rst.txt │ ├── automatminer.utils.rst.txt │ ├── automatminer.utils.tests.rst.txt │ ├── basic.rst.txt │ ├── datasets.rst.txt │ ├── index.rst.txt │ ├── installation.rst.txt │ ├── license.rst.txt │ ├── modules.rst.txt │ ├── tutorials.rst.txt │ └── using.rst.txt ├── _static │ ├── alabaster.css │ ├── basic.css │ ├── custom.css │ ├── cv_nested.png │ ├── dataframe_pipe.png │ ├── doctools.js │ ├── documentation_options.js │ ├── favicon.ico │ ├── file.png │ ├── forum.png │ ├── jquery-3.2.1.js │ ├── jquery.js │ ├── language_data.js │ ├── logo.png │ ├── logo_header.png │ ├── logo_lowres.png │ ├── matbench_pie_charts.png │ ├── matminer_examples.png │ ├── minus.png │ ├── nature.css │ ├── nature.css-e │ ├── pipe.png │ ├── plus.png │ ├── pygments.css │ ├── searchtools.js │ ├── underscore-1.3.1.js │ └── underscore.js ├── advanced.html ├── automatminer.automl.config.html ├── automatminer.automl.html ├── automatminer.automl.tests.html ├── automatminer.featurization.html ├── automatminer.featurization.tests.html ├── automatminer.html ├── automatminer.preprocessing.html ├── automatminer.preprocessing.tests.html ├── automatminer.tests.html ├── automatminer.utils.html ├── automatminer.utils.tests.html ├── basic.html ├── datasets.html ├── genindex.html ├── index.html ├── installation.html ├── license.html ├── modules.html ├── modules.rst ├── objects.inv ├── py-modindex.html ├── search.html ├── searchindex.js ├── source │ ├── _static │ │ ├── custom.css │ │ ├── cv_nested.png │ │ ├── dataframe_pipe.png │ │ ├── favicon.ico │ │ ├── forum.png │ │ ├── logo.png │ │ ├── logo_header.png │ │ ├── logo_lowres.png │ │ ├── matbench_pie_charts.png │ │ ├── matminer_examples.png │ │ ├── minus.png │ │ ├── nature.css │ │ ├── nature.css-e │ │ └── pipe.png │ ├── _templates │ │ ├── class.rst │ │ ├── function.rst │ │ └── layout.html │ ├── advanced.rst │ ├── automatminer.automl.config.rst │ ├── automatminer.automl.rst │ ├── automatminer.automl.tests.rst │ ├── automatminer.featurization.rst │ ├── automatminer.featurization.tests.rst │ ├── automatminer.preprocessing.rst │ ├── automatminer.preprocessing.tests.rst │ ├── automatminer.rst │ ├── automatminer.tests.rst │ ├── automatminer.utils.rst │ ├── automatminer.utils.tests.rst │ ├── basic.rst │ ├── conf.py │ ├── datasets.rst │ ├── index.rst │ ├── installation.rst │ ├── license.rst │ ├── modules.rst │ └── tutorials.rst ├── tutorials.html └── using.html ├── pyproject.toml ├── requirements.txt ├── requirements_dev.txt ├── requirements_web.txt ├── setup.cfg ├── setup.py ├── setup_dev.py ├── setup_web.py └── tasks.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | jobs: 4 | 5 | py372: 6 | working_directory: ~/py372_automatminer 7 | docker: 8 | - image: circleci/python:3.7.2 9 | environment: 10 | SKIP_INTENSIVE: 1 11 | steps: 12 | - checkout 13 | 14 | # Download and cache dependencies 15 | # Try commenting this out if there are ContextualVersionConflict errors 16 | # - restore_cache: 17 | # keys: 18 | # - v1-dependencies-{{ checksum "requirements.txt" }} 19 | # fallback to using the latest cache if no exact match is found 20 | # - v1-dependencies- 21 | 22 | - run: 23 | name: setup env and run tests 24 | command: | 25 | 26 | source dev_scripts/setup_env.sh 27 | source dev_scripts/run_tests.sh 28 | 29 | no_output_timeout: 10m 30 | 31 | - save_cache: 32 | paths: 33 | - ./test_env 34 | key: v1-dependencies-{{ checksum "requirements.txt" }} 35 | 36 | 37 | py367: 38 | working_directory: ~/py367_automatminer 39 | docker: 40 | - image: circleci/python:3.6.7 41 | environment: 42 | SKIP_INTENSIVE: 1 43 | steps: 44 | - checkout 45 | 46 | # Download and cache dependencies 47 | # Try commenting this out if there are ContextualVersionConflict errors 48 | # - restore_cache: 49 | # keys: 50 | # - v1-dependencies-{{ checksum "requirements.txt" }} 51 | # fallback to using the latest cache if no exact match is found 52 | # - v1-dependencies- 53 | 54 | - run: 55 | name: setup env and run tests 56 | command: | 57 | 58 | source dev_scripts/setup_env.sh 59 | source dev_scripts/run_tests.sh 60 | 61 | no_output_timeout: 10m 62 | 63 | - save_cache: 64 | paths: 65 | - ./test_env 66 | key: v1-dependencies-{{ checksum "requirements.txt" }} 67 | 68 | 69 | code_style: 70 | working_directory: ~/py372_automatminer 71 | docker: 72 | - image: circleci/python:3.7.2 73 | steps: 74 | - checkout 75 | 76 | # Download and cache dependencies 77 | - restore_cache: 78 | keys: 79 | - v1-dependencies-{{ checksum "requirements.txt" }} 80 | # fallback to using the latest cache if no exact match is found 81 | - v1-dependencies- 82 | 83 | - run: 84 | name: setup env and run tests 85 | command: | 86 | source dev_scripts/setup_env_dev.sh 87 | source dev_scripts/run_code_style_check.sh 88 | 89 | no_output_timeout: 10m 90 | 91 | - save_cache: 92 | paths: 93 | - ./test_env 94 | key: v1-dependencies-{{ checksum "requirements.txt" }} 95 | 96 | 97 | workflows: 98 | version: 2 99 | run_tests_on_commit: 100 | jobs: 101 | - py372 102 | - py367 103 | - code_style 104 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=automatminer 3 | 4 | [report] 5 | exclude_lines = 6 | # Ignore coverage of code that requires the module to be executed. 7 | if __name__ == .__main__.: 8 | ignore_errors=True 9 | omit = 10 | *dev_scripts* 11 | *examples* 12 | *.cicleci* 13 | *site-packages* 14 | *tests* 15 | *versioneer.py 16 | *_version.py 17 | *.md 18 | *.gitignore 19 | *.txt 20 | *setup.py 21 | *__init__* 22 | *.egg-info* 23 | *docs* 24 | *docs_rst* 25 | tasks.py 26 | setup.py 27 | *.git* 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | For submitting help issues, please make a new topic on the Discourse forum: 2 | https://matsci.org/c/matminer/ 3 | 4 | The Github issues is no longer used except for internal development purposes. 5 | If you are unable to use the Discourse forum, you may submit an issue here. 6 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | 5 | 6 | * Feature 1 7 | * Feature 2 8 | * Fix 1 9 | * Fix 2 10 | 11 | ## TODO (if any) 12 | 13 | 15 | 16 | * Feature 1 supports a, but not b. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # doc builds 4 | docs_old/_build/* 5 | docs_old/_build/*/* 6 | docs_old/_build/*/*/* 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Packages 12 | *.egg 13 | *.egg-info 14 | dist 15 | build 16 | eggs 17 | .eggs/* 18 | parts 19 | bin 20 | var 21 | sdist 22 | develop-eggs 23 | .installed.cfg 24 | lib 25 | lib64 26 | 27 | # Installer logs 28 | pip-log.txt 29 | 30 | # Unit test / coverage reports 31 | .coverage 32 | .tox 33 | nosetests.xml 34 | 35 | # Translations 36 | *.mo 37 | 38 | # Mr Developer 39 | .mr.developer.cfg 40 | .project 41 | .pydevproject 42 | 43 | # IPython checkpoints 44 | *-checkpoint.ipynb 45 | 46 | # Pycharm 47 | .idea/* 48 | 49 | # log files 50 | *.log* 51 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-isort 3 | rev: v4.3.21 4 | hooks: 5 | - id: isort 6 | language_version: python3.7 7 | - repo: https://github.com/ambv/black 8 | rev: stable 9 | hooks: 10 | - id: black 11 | language_version: python3.7 12 | - repo: https://github.com/pre-commit/pre-commit-hooks 13 | rev: v2.4.0 14 | hooks: 15 | - id: flake8 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to automatminer 2 | 3 | We love your input! We want to make contributing to automatminer as easy and transparent as possible, whether it's: 4 | 5 | - Reporting a bug 6 | - Discussing the current state of the code 7 | - Submitting a fix 8 | - Proposing or implementing new features 9 | - Becoming a maintainer 10 | 11 | ## Reporting bugs, getting help, and discussion 12 | 13 | At any time, feel free to start a thread on the automatminer [Discourse forum](https://discuss.matsci.org/c/matminer). 14 | 15 | If you are making a bug report, incorporate as many elements of the following as possible to ensure a timely response and avoid the need for followups: 16 | 17 | - A quick summary and/or background 18 | - Steps to reproduce - be specific! **Provide sample code.** 19 | - What you expected would happen, compared to what actually happens 20 | - The full stack trace of any errors you encounter 21 | - Notes (possibly including why you think this might be happening, or steps you tried that didn't work) 22 | 23 | We love thorough bug reports as this means the development team can make quick and meaningful fixes. When we confirm your bug report, we'll move it to the GitHub issues where its progress can be further tracked. 24 | 25 | ## Contributing code modifications or additions through GitHub 26 | 27 | We use GitHub to host code, to track issues and feature requests, as well as accept pull requests. 28 | 29 | Pull requests are the best way to propose changes to the codebase. Follow the [GitHub flow](https://www.atlassian.com/git/tutorials/comparing-workflows/forking-workflow) for more information on this procedure. 30 | 31 | The basic procedure for making a PR is: 32 | 33 | - Fork the repo on GitHub and clone it to your machine. 34 | 35 | ```sh 36 | git clone https://github.com//automatminer && cd automatminer 37 | ``` 38 | 39 | - Install both regular and development dependencies and setup the `git` pre-commit hook. 40 | 41 | ```sh 42 | pip install -r requirements.txt requirement && pre-commit install 43 | ``` 44 | 45 | This step is important as your changes may otherwise contain style violations that will throw errors when running our CI on your pull request. 46 | 47 | - Test your changes by running our full test suite 48 | 49 | ```sh 50 | python -m unittest 51 | ``` 52 | 53 | - Commit your improvements and push to your GitHub fork. 54 | 55 | - When you're finished, go to your fork and make a pull request. It will automatically update if you need to make further changes. 56 | 57 | ### How to Make a **Great** Pull Request 58 | 59 | We have a few tips for writing good PRs that are accepted into the main repo: 60 | 61 | - Use the Google Code style for all of your code. Find an example [here](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html). 62 | - Your code should have (4) spaces instead of tabs. 63 | - If needed, update the documentation. 64 | - **Write tests** for new features! Good tests are 100%, absolutely necessary for good code. We use the [Python `unittest` framework](https://docs.python.org/3/library/unittest) -- see some of the other tests in this repo for examples, or review the [Hitchhiker's guide to python](https://docs.python-guide.org/writing/tests) for some good resources on writing good tests. 65 | - Understand your contributions will fall under the same license as this repo. 66 | 67 | When you submit your PR, our CI service will automatically run your tests. 68 | We welcome good discussion on the best ways to write your code, and the comments on your PR are an excellent area for discussion. 69 | 70 | #### References 71 | 72 | This document was adapted from the open-source contribution guidelines for Facebook's Draft, as well as briandk's [contribution template](https://gist.github.com/briandk/3d2e8b3ec8daf5a27a62). 73 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | automatminer Copyright (c) 2018, The Regents of the University of 2 | California, through Lawrence Berkeley National Laboratory (subject 3 | to receipt of any required approvals from the U.S. Dept. of Energy). 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions 8 | are met: 9 | 10 | (1) Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | (2) Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following 15 | disclaimer in the documentation and/or other materials provided with 16 | the distribution. 17 | 18 | (3) Neither the name of the University of California, Lawrence 19 | Berkeley National Laboratory, U.S. Dept. of Energy nor the names of 20 | its contributors may be used to endorse or promote products derived 21 | from this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 31 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 33 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 | POSSIBILITY OF SUCH DAMAGE. 35 | 36 | You are under no obligation whatsoever to provide any bug fixes, 37 | patches, or upgrades to the features, functionality or performance 38 | of the source code ("Enhancements") to anyone; however, if you 39 | choose to make your Enhancements available either publicly, or 40 | directly to Lawrence Berkeley National Laboratory or its 41 | contributors, without imposing a separate written license agreement 42 | for such Enhancements, then you hereby grant the following license: 43 | a non-exclusive, royalty-free perpetual license to install, use, 44 | modify, prepare derivative works, incorporate into other computer 45 | software, distribute, and sublicense such enhancements or derivative 46 | works thereof, in binary and source code form. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include CHANGELOG.md 3 | include CONTRIBUTING.md 4 | recursive-include automatminer *.txt *.py *.yaml *.json *.csv *.p *.pickle 5 | recursive-exclude benchdev * -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | logo 2 | 3 | automatminer is an automatic prediction engine for materials properties. 4 | 5 | 6 | | Tests | Code Coverage | Codacy | Release | 7 | |:----------:|:-------------:|:------:|:------:| 8 | | [![CircleCI](https://img.shields.io/circleci/project/github/hackingmaterials/automatminer/master.svg)](https://circleci.com/gh/hackingmaterials/automatminer) | [![Codacy Badge](https://img.shields.io/codacy/coverage/aa63dd7aa85e480bbe0e924a02ad1540.svg?colorB=brightgreen)](https://www.codacy.com/app/ardunn/automatminer) | [![Codacy Badge](https://img.shields.io/codacy/grade/aa63dd7aa85e480bbe0e924a02ad1540.svg)](https://www.codacy.com/app/ardunn/automatminer) | [![PyPI version](https://img.shields.io/pypi/v/automatminer.svg?colorB=blue)](https://pypi.org/project/automatminer/) | 9 | 10 | - [**Website (including documentation)**](https://hackingmaterials.lbl.gov/automatminer/) 11 | - [**Help/Support**](https://discuss.matsci.org/c/matminer) 12 | - [**Source**](https://github.com/hackingmaterials/automatminer) 13 | 14 | If you're interested in the benchmarking datasets, see our dedicated package [Matbench](https://github.com/hackingmaterials/matbench): 15 | - [**Leaderboard and Docs**](https://matbench.materialsproject.org) 16 | - [**Source**](https://github.com/materialsproject/matbench) 17 | 18 | You may also be interested in the parent code of automatminer, matminer: 19 | - [**Matminer**](https://github.com/hackingmaterials/matminer) 20 | 21 | If you find `automatminer` or Matbench useful. useful, please consider citing [our paper](https://doi.org/10.1038/s41524-020-00406-3): 22 | 23 | ``` 24 | Dunn, A., Wang, Q., Ganose, A., Dopp, D., Jain, A. Benchmarking Materials Property 25 | Prediction Methods: The Matbench Test Set and Automatminer Reference Algorithm. npj 26 | Computational Materials 6, 138 (2020). https://doi.org/10.1038/s41524-020-00406-3 27 | ``` 28 | 29 | `automatminer` is `pip` installable. Please use versions `1.0.0` forward. 30 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from automatminer_web.index import app 2 | 3 | server = app.server 4 | 5 | if __name__ == '__main__': 6 | print("starting...") 7 | app.run_server(debug=True) 8 | -------------------------------------------------------------------------------- /automatminer/README.md: -------------------------------------------------------------------------------- 1 | # Automatminer core code 2 | 3 | This is the automatminer core code. 4 | -------------------------------------------------------------------------------- /automatminer/__init__.py: -------------------------------------------------------------------------------- 1 | from automatminer.automl import SinglePipelineAdaptor, TPOTAdaptor # noqa 2 | from automatminer.featurization import AutoFeaturizer # noqa 3 | from automatminer.pipeline import MatPipe # noqa 4 | from automatminer.preprocessing import DataCleaner, FeatureReducer # noqa 5 | from automatminer.presets import get_preset_config # noqa 6 | 7 | __author__ = "Alex Dunn, Qi Wang, Alex Ganose, Alireza Faghaninia, Anubhav Jain" 8 | __author_email__ = "ardunn@lbl.gov" 9 | __license__ = "Modified BSD" 10 | 11 | # Version is MAJOR.MINOR.PATCH.YYYYMMDD 12 | __version__ = "1.0.3.20200727" 13 | -------------------------------------------------------------------------------- /automatminer/automl/__init__.py: -------------------------------------------------------------------------------- 1 | from .adaptors import TPOTAdaptor, SinglePipelineAdaptor # noqa 2 | -------------------------------------------------------------------------------- /automatminer/automl/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base classes for automl. 3 | """ 4 | 5 | import abc 6 | import logging 7 | from typing import List 8 | 9 | import numpy as np 10 | import pandas as pd 11 | from automatminer.base import DFTransformer 12 | from automatminer.utils.log import AMM_LOG_PREDICT_STR, log_progress 13 | from automatminer.utils.pkg import AutomatminerError, check_fitted 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class DFMLAdaptor(DFTransformer): 19 | """ 20 | A base class to adapt from an AutoML backend to a sklearn-style fit/predict 21 | scheme and add a few extensions for pandas dataframes. 22 | 23 | When implementing a base class adaptor, make sure to use @check_fitted 24 | and @set_fitted if necessary! 25 | """ 26 | 27 | @property 28 | @abc.abstractmethod 29 | def fitted_target(self) -> str: 30 | """ 31 | The target (a string) on which the adaptor was fit on. 32 | Returns: 33 | (str): The fitted target label. 34 | """ 35 | pass 36 | 37 | @property 38 | @abc.abstractmethod 39 | def features(self) -> (List, np.ndarray): 40 | """ 41 | The features being used for machine learning. 42 | 43 | Returns: 44 | ([str]): The feature labels 45 | """ 46 | pass 47 | 48 | @property 49 | @abc.abstractmethod 50 | def backend(self): 51 | """ 52 | The AutoML backend object. Does not need to implement any methods for 53 | compatibility with higher level classes. If no AutoML backend is present 54 | e.g., SinglePipelineAdaptor, backend = None. 55 | 56 | Does not need to be serializable, as matpipe.save will not save 57 | backends. 58 | """ 59 | pass 60 | 61 | @property 62 | @abc.abstractmethod 63 | def best_pipeline(self): 64 | """ 65 | The best ML pipeline found by the backend. Can be any type though 66 | BaseEstimator is preferred. 67 | 68 | 1. MUST implement a .predict method unless DFMLAdaptor.predict is 69 | overridden! 70 | 71 | 2. MUST be serializable! 72 | 73 | Should be as close to the algorithm as possible - i.e., instead of 74 | calling TPOTClassifier.fit, calls TPOTClassifier.fitted_pipeline_, so 75 | that examining the true form of models is more straightforward. 76 | """ 77 | pass 78 | 79 | @check_fitted 80 | def serialize(self) -> None: 81 | """ 82 | Assign the adaptor components to be serializable. 83 | 84 | For example, TPOTBase-based backends are not serializable themselves. 85 | The adaptor attributes need to be reassigned in order to serialize the 86 | entire pipeline as pickle. 87 | 88 | If the backend serializes without extra effort, there is no need to 89 | override this method. 90 | 91 | Returns: 92 | None 93 | """ 94 | return None 95 | 96 | @check_fitted 97 | def deserialize(self) -> None: 98 | """ 99 | Invert the operations in serialize, if necessary. Useful if you are 100 | going to keep using this pipeline after saving it and want to retain 101 | the full functionality before the main python process ends. 102 | 103 | If the backend serializes without extra effort, there is no need to 104 | override this method. 105 | 106 | Returns: 107 | None 108 | """ 109 | return None 110 | 111 | @check_fitted 112 | @log_progress(logger, AMM_LOG_PREDICT_STR) 113 | def predict( 114 | self, df: pd.DataFrame, target: str, output_col=None 115 | ) -> pd.DataFrame: 116 | """ 117 | Predict the target property of materials given a df of features. This 118 | base method is widely applicanble across different AutoML backends. 119 | 120 | The predictions are appended to the dataframe in a column named according 121 | to output_col. Default value is "{target_name} predicted" 122 | 123 | Args: 124 | df (pandas.DataFrame): Contains all features needed for ML (i.e., 125 | all features contained in the training dataframe. 126 | target (str): The property to be predicted. Should match the target 127 | used for fitting. May or may not be present in the argument 128 | dataframe. 129 | 130 | Returns: 131 | (pandas.DataFrame): The argument dataframe plus a column containing 132 | the predictions of the target. 133 | 134 | """ 135 | if target != self.fitted_target: 136 | raise AutomatminerError( 137 | "Argument dataframe target ({}) is different from the fitted " 138 | "dataframe target! ({})".format(target, self.fitted_target) 139 | ) 140 | elif not all([f in df.columns for f in self.features]): 141 | not_in_model = [f for f in self.features if f not in df.columns] 142 | not_in_df = [f for f in df.columns if f not in self.features] 143 | raise AutomatminerError( 144 | "Features used to build model are different from df columns! " 145 | "Features located in model not located in df: \n{} \n " 146 | "Features located in df not in model: \n{}" 147 | "".format(not_in_df, not_in_model) 148 | ) 149 | else: 150 | X = df[self.features].values # rectify feature order 151 | y_pred = self.best_pipeline.predict(X) 152 | df[output_col or (target + " predicted")] = y_pred 153 | 154 | log_msg = "Prediction finished successfully." 155 | try: 156 | logger.info(self._log_prefix + log_msg) 157 | except AttributeError: 158 | pass 159 | return df 160 | 161 | def transform(self, df: pd.DataFrame, target: str) -> pd.DataFrame: 162 | return self.predict(df, target) 163 | -------------------------------------------------------------------------------- /automatminer/automl/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer/automl/config/__init__.py -------------------------------------------------------------------------------- /automatminer/automl/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer/automl/tests/__init__.py -------------------------------------------------------------------------------- /automatminer/automl/tests/test_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for base classes for automl. 3 | """ 4 | 5 | import unittest 6 | 7 | import pandas as pd 8 | from sklearn.exceptions import NotFittedError 9 | 10 | from automatminer.automl.base import DFMLAdaptor 11 | from automatminer.utils.pkg import check_fitted, set_fitted 12 | 13 | 14 | class TestAdaptorBad(DFMLAdaptor): 15 | """ 16 | A test adaptor for automl backends, implemented incorrectly. 17 | """ 18 | 19 | def __init__(self): 20 | pass 21 | 22 | 23 | class TestAdaptorGood(DFMLAdaptor): 24 | """ 25 | A test adaptor for automl backends, implemented correctly. 26 | """ 27 | 28 | def __init__(self, config_attr): 29 | self.config_attr = config_attr 30 | self.target = None 31 | self._ml_data = None 32 | self._best_pipeline = None 33 | self._backend = None 34 | self._features = None 35 | self._fitted_target = None 36 | super(DFMLAdaptor, self).__init__() 37 | 38 | @set_fitted 39 | def fit(self, df, target): 40 | """ 41 | Determine the target of the dataframe. 42 | 43 | Args: 44 | df (pandas.DataFrame): The dataframe to be transformed. 45 | target (str): The fit target 46 | 47 | Returns: 48 | TestTransformer 49 | """ 50 | if target in df.columns: 51 | self.target = target 52 | else: 53 | raise ValueError("Target {} not in dataframe.".format(target)) 54 | 55 | self._fitted_target = target 56 | self._best_pipeline = "pipeline1" 57 | self._ml_data = {"y": df[target], "X": df.drop(columns=[target])} 58 | self._backend = "mybackend" 59 | self._features = self._ml_data["X"].columns.tolist() 60 | return self 61 | 62 | @check_fitted 63 | def predict(self, df, target): 64 | """ 65 | Drop the target set during fitting. 66 | 67 | Args: 68 | df (pandas.DataFrame): The dataframe to be transformed. 69 | target (str): The transform target (not the same as fit target 70 | necessarily) 71 | 72 | Returns: 73 | df (pandas.DataFrame): The transformed dataframe. 74 | """ 75 | df = df.drop(columns=self.target) 76 | return df 77 | 78 | @property 79 | def backend(self): 80 | return self._backend 81 | 82 | @property 83 | def features(self): 84 | return self._features 85 | 86 | @property 87 | def best_pipeline(self): 88 | return self._best_pipeline 89 | 90 | @property 91 | def fitted_target(self): 92 | return self._fitted_target 93 | 94 | 95 | class TestBaseAutoMLTransformers(unittest.TestCase): 96 | def setUp(self): 97 | self.df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) 98 | 99 | def test_DFMLAdaptor(self): 100 | tag = TestAdaptorGood(config_attr=5) 101 | 102 | with self.assertRaises(NotFittedError): 103 | tag.transform(self.df, "a") 104 | 105 | with self.assertRaises(NotFittedError): 106 | tag.predict(self.df, "a") 107 | 108 | tag.fit(self.df, "a") 109 | self.assertTrue(hasattr(tag, "features")) 110 | self.assertTrue(hasattr(tag, "best_pipeline")) 111 | self.assertTrue(hasattr(tag, "backend")) 112 | self.assertTrue(hasattr(tag, "fitted_target")) 113 | self.assertTrue(tag.is_fit) 114 | self.assertTrue(tag.best_pipeline == "pipeline1") 115 | self.assertTrue(tag.backend == "mybackend") 116 | self.assertTrue(tag.features[0] == "b") 117 | 118 | predicted = tag.predict(self.df, "b") 119 | self.assertTrue("b" in predicted) 120 | self.assertTrue("c" in predicted) 121 | self.assertTrue("a" not in predicted) 122 | 123 | predicted2 = tag.fit_transform(self.df, "c") 124 | self.assertTrue("b" in predicted2) 125 | self.assertTrue("a" in predicted2) 126 | self.assertTrue("c" not in predicted2) 127 | 128 | with self.assertRaises(TypeError): 129 | TestAdaptorBad() 130 | 131 | 132 | if __name__ == "__main__": 133 | unittest.main() 134 | -------------------------------------------------------------------------------- /automatminer/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base classes, mixins, and other inheritables. 3 | """ 4 | import abc 5 | from sklearn.base import BaseEstimator 6 | 7 | __authors__ = ["Alex Dunn ", "Alex Ganose "] 8 | 9 | 10 | class DFTransformer(abc.ABC, BaseEstimator): 11 | """A base class to allow easy transformation in the same way as 12 | TransformerMixin and BaseEstimator in sklearn, but for pandas dataframes. 13 | 14 | When implementing a base class adaptor, make sure to use @check_fitted 15 | and @set_fitted if necessary! 16 | """ 17 | 18 | def __init__(self): 19 | self.is_fit = False 20 | 21 | @abc.abstractmethod 22 | def fit(self, df, target, **fit_kwargs): 23 | """ 24 | Fits the transformer to a dataframe, given a target. 25 | 26 | Args: 27 | df (pandas.DataFrame): The pandas dataframe to be fit. 28 | target (str): the target string specifying the ML target. 29 | fit_kwargs: Keyword paramters for fitting 30 | 31 | Returns: 32 | (DataFrameTransformer) This object (self) 33 | 34 | """ 35 | pass 36 | 37 | @abc.abstractmethod 38 | def transform(self, df, target, **transform_kwargs): 39 | """ 40 | Transforms a dataframe. 41 | 42 | Args: 43 | df (pandas.DataFrame): The pandas dataframe to be fit. 44 | target (str): the target string specifying the ML target. 45 | transform_kwargs: Keyword paramters for transforming 46 | 47 | Returns: 48 | (pandas.DataFrame): The transformed dataframe. 49 | 50 | """ 51 | pass 52 | 53 | def fit_transform(self, df, target): 54 | """ 55 | Combines the fitting and transformation of a dataframe. 56 | 57 | Args: 58 | df (pandas.DataFrame): The pandas dataframe to be fit. 59 | target (str): the target string specifying the ML target. 60 | 61 | Returns: 62 | (pandas.DataFrame): The transformed dataframe. 63 | 64 | """ 65 | return self.fit(df, target).transform(df, target) 66 | 67 | @property 68 | def _log_prefix(self): 69 | """ 70 | The class's log prefix. 71 | 72 | Without log_prefix: 73 | 2019.10.15 WARNING Some log message. 74 | 75 | with log prefix: 76 | 2019.10.15 WARNING DataCleaner: Some log message. 77 | 78 | Returns: 79 | (str): The log prefix. 80 | 81 | """ 82 | return self.__class__.__name__ + ": " 83 | -------------------------------------------------------------------------------- /automatminer/featurization/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import AutoFeaturizer # noqa 2 | -------------------------------------------------------------------------------- /automatminer/featurization/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base classes for sets of featurizers. 3 | """ 4 | 5 | import abc 6 | from typing import List 7 | 8 | __authors__ = ["Alex Dunn ", "Alex Ganose "] 9 | 10 | 11 | class FeaturizerSet(abc.ABC): 12 | """Abstract class for defining sets of featurizers. 13 | 14 | All FeaturizerSets should implement at least fours sets of featurizers: 15 | 16 | - express - The "go-to" set of featurizers 17 | - heavy - A more expensive and complete (though not necessarily 18 | better) version of express. 19 | - all - All featurizers available for the intended featurization type(s) 20 | - debug - An ultra-minimal set of featurizers for debugging purposes. 21 | 22 | Each set returned is a list of matminer featurizer objects. The choice of 23 | featurizers for a given set is at the discrtetion of the implementor. 24 | 25 | Args: 26 | exclude (list of str, optional): A list of featurizer class names that 27 | will be excluded from the set of featurizers returned. 28 | """ 29 | 30 | def __init__(self, exclude=None): 31 | self.exclude = exclude if exclude else [] 32 | 33 | def __call__(self, *args, **kwargs): 34 | return self.all 35 | 36 | @property 37 | @abc.abstractmethod 38 | def express(self) -> List: 39 | """A focused set of featurizers which should: 40 | 41 | * be reasonably fast to featurize 42 | * be not prone to errors/nans 43 | * provide informative learning features 44 | * do not include many irrelevant features making ML expensive 45 | * have each featurizer return a vector 46 | * allow the recognized type (structure, composition, etc.) as input. 47 | """ 48 | pass 49 | 50 | @property 51 | @abc.abstractmethod 52 | def heavy(self) -> List: 53 | """A more expensive and complete (though not necessarily better) 54 | version of express. 55 | 56 | Similar to express, all featurizers selected should return useful 57 | learning features. However the selected featurizers may now: 58 | 59 | * generate many (thousands+) features 60 | * be expensive to featurize (1s+ per item) 61 | * be prone to NaNs on certain datasets 62 | """ 63 | pass 64 | 65 | @property 66 | @abc.abstractmethod 67 | def all(self) -> List: 68 | """All featurizers available for this featurization type. These 69 | featurizers are allowed to: 70 | 71 | * have multiple, highly similar versions of the same featurizer, 72 | * not work on standard versions of the input types (e.g., SiteDOS works 73 | on the DOS for a single site, not structure 74 | * return non-vectorized outputs (e.g., matrices, other data types). 75 | """ 76 | pass 77 | 78 | @property 79 | @abc.abstractmethod 80 | def debug(self) -> List: 81 | """An ultra-minimal set of featurizers for debugging.""" 82 | pass 83 | 84 | def _get_featurizers(self, featurizers: List) -> List: 85 | """Utility function for getting featurizers not in the ignore list.""" 86 | return [f for f in featurizers if f.__class__.__name__ not in self.exclude] 87 | -------------------------------------------------------------------------------- /automatminer/featurization/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer/featurization/tests/__init__.py -------------------------------------------------------------------------------- /automatminer/featurization/tests/test_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the featurization base classes. 3 | """ 4 | 5 | import unittest 6 | 7 | import matminer.featurizers.composition as cf 8 | 9 | from automatminer.featurization.base import FeaturizerSet 10 | 11 | 12 | class TestAutoFeaturizer(unittest.TestCase): 13 | def test_fsets(self): 14 | """Test the base behavior of ABC FeaturizerSet.""" 15 | 16 | class BadFeaturizerSet(FeaturizerSet): 17 | def __init__(self, exclude=None): 18 | super(BadFeaturizerSet, self).__init__(exclude=exclude) 19 | self._fast = [cf.ElementProperty.from_preset("matminer")] 20 | 21 | @property 22 | def express(self): 23 | return self._get_featurizers() 24 | 25 | with self.assertRaises(TypeError): 26 | BadFeaturizerSet() 27 | 28 | class GoodFeaturizerSet(FeaturizerSet): 29 | def __init__(self, exclude=None): 30 | super(GoodFeaturizerSet, self).__init__(exclude=exclude) 31 | self._express = [cf.ElementProperty.from_preset("matminer")] 32 | self._debug = self.express 33 | self._all = self.express 34 | self._heavy = self.express 35 | 36 | @property 37 | def express(self): 38 | return self._get_featurizers(self._express) 39 | 40 | @property 41 | def debug(self): 42 | return self._get_featurizers(self._debug) 43 | 44 | @property 45 | def all(self): 46 | return self._get_featurizers(self._all) 47 | 48 | @property 49 | def heavy(self): 50 | return self._get_featurizers(self._heavy) 51 | 52 | GoodFeaturizerSet() 53 | -------------------------------------------------------------------------------- /automatminer/featurization/tests/test_sets.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import inspect 3 | 4 | from matminer.featurizers.base import BaseFeaturizer 5 | import matminer.featurizers.composition as cf 6 | import matminer.featurizers.structure as sf 7 | import matminer.featurizers.dos as dosf 8 | import matminer.featurizers.bandstructure as bf 9 | 10 | from automatminer.featurization.sets import ( 11 | AllFeaturizers, 12 | StructureFeaturizers, 13 | CompositionFeaturizers, 14 | BSFeaturizers, 15 | DOSFeaturizers, 16 | ) 17 | 18 | try: 19 | import dscribe 20 | except ImportError: 21 | dscribe = None 22 | 23 | 24 | class TestAllFeaturizers(unittest.TestCase): 25 | """ 26 | Class to ensure the featurizers available in featurizer files in matminer 27 | match exactly to those defined to AllFeaturizers class. This test is meant 28 | to catch events when a new featurizer is defined but not listed inside 29 | AllFeaturizers (e.g. by mistake). 30 | """ 31 | 32 | def setUp(self): 33 | self.allfs = AllFeaturizers() 34 | 35 | @staticmethod 36 | def get_featurizers(module, ignore=None): 37 | """Get a list of featurizers class names defined in a module. 38 | 39 | Args: 40 | module (module): A python module. 41 | ignore (`list` of `str`, optional): Class names to ignore. 42 | 43 | Returns: 44 | (`list` of `str`): List of all featurizer class names. 45 | """ 46 | ignore = ignore if ignore else [] 47 | 48 | def is_featurizer(class_object): 49 | return ( 50 | issubclass(class_object, BaseFeaturizer) 51 | and not class_object == BaseFeaturizer 52 | ) 53 | 54 | # getmembers returns list of (class_name, class_object) 55 | classes = [ 56 | n 57 | for n, c in inspect.getmembers(module, inspect.isclass) 58 | if is_featurizer(c) and c.__module__ == module.__name__ 59 | ] 60 | featurizers = [c for c in classes if c not in ignore] 61 | return featurizers 62 | 63 | def _test_features_implemented(self, test_feats, true_feats): 64 | """Check two lists of featurizers are the same. 65 | 66 | Note that `test_feats` is a list of objects and `true_feats` is a 67 | list of class names as strings. 68 | """ 69 | test_feats = [c.__class__.__name__ for c in test_feats] 70 | 71 | for featurizer_name in true_feats: 72 | self.assertTrue( 73 | featurizer_name in test_feats, 74 | ( 75 | "{} matminer featurizer not in implemented in " "automatminer" 76 | ).format(featurizer_name), 77 | ) 78 | 79 | def test_composition_featurizers(self): 80 | true_feats = TestAllFeaturizers.get_featurizers(cf) 81 | test_feats = self.allfs.composition 82 | self._test_features_implemented(test_feats, true_feats) 83 | 84 | def test_structure_featurizers(self): 85 | ignore = ["StructureComposition", "CGCNNFeaturizer"] 86 | if not dscribe: 87 | ignore += ["SOAP"] 88 | true_feats = self.get_featurizers(sf, ignore) 89 | test_feats = self.allfs.structure 90 | self._test_features_implemented(test_feats, true_feats) 91 | 92 | def test_dos_featurizers(self): 93 | true_feats = self.get_featurizers(dosf) 94 | test_feats = self.allfs.dos 95 | self._test_features_implemented(test_feats, true_feats) 96 | 97 | def test_bandstructure_featurizers(self): 98 | true_feats = self.get_featurizers(bf) 99 | test_feats = self.allfs.bandstructure 100 | self._test_features_implemented(test_feats, true_feats) 101 | 102 | 103 | class TestFeaturizerSets(unittest.TestCase): 104 | def setUp(self): 105 | self.required_attrs = ["express", "heavy", "debug", "all"] 106 | self.c = CompositionFeaturizers() 107 | self.s = StructureFeaturizers() 108 | self.b = BSFeaturizers() 109 | self.d = DOSFeaturizers() 110 | 111 | def test_sets_not_empty(self): 112 | for attr in self.required_attrs: 113 | for ftype in [self.c, self.s, self.b, self.d]: 114 | print(ftype.__class__.__name__, attr) 115 | self.assertNotEqual(getattr(ftype, attr), []) 116 | 117 | 118 | if __name__ == "__main__": 119 | unittest.main() 120 | -------------------------------------------------------------------------------- /automatminer/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import DataCleaner, FeatureReducer # noqa 2 | -------------------------------------------------------------------------------- /automatminer/preprocessing/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer/preprocessing/tests/__init__.py -------------------------------------------------------------------------------- /automatminer/presets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configurations for MatPipe. 3 | """ 4 | 5 | __author__ = ["Alex Dunn "] 6 | 7 | import os 8 | 9 | from automatminer.automl import SinglePipelineAdaptor, TPOTAdaptor 10 | from automatminer.featurization import AutoFeaturizer 11 | from automatminer.preprocessing import DataCleaner, FeatureReducer 12 | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor 13 | # from xgboost import XGBClassifier, XGBRegressor 14 | 15 | 16 | def get_preset_config(preset: str = "express", **powerups) -> dict: 17 | """ 18 | Preset configs for MatPipe. 19 | 20 | USER: 21 | "express" - Good for quick benchmarks with moderate accuracy. 22 | "express_single" - Same as express but uses XGB trees as single models 23 | instead of automl TPOT. Good for even more express results. 24 | "production": Used for making production predictions and benchmarks. 25 | Balances accuracy and timeliness. 26 | "heavy" - When high accuracy is required, and you have access to 27 | (very) powerful computing resources. May be buggier and more difficult 28 | to run than production. 29 | 30 | DEBUG: 31 | "debug" - Debugging with automl enabled. 32 | "debug_single" - Debugging with a single model. 33 | 34 | Args: 35 | preset (str): The name of the preset config you'd like to use. 36 | **powerups: Various modifications as kwargs. 37 | cache_src (str): A file path. If specified, Autofeaturizer will use 38 | feature caching with a file stored at this location. See 39 | Autofeaturizer's cache_src argument for more information. 40 | n_jobs (int): The number of parallel process to use when running. 41 | Particularly important for AutoFeaturixer and TPOTAdaptor. 42 | 43 | Returns: 44 | (dict) The desired preset config. 45 | """ 46 | caching_kwargs = {"cache_src": powerups.get("cache_src", None)} 47 | n_jobs_kwargs = {"n_jobs": powerups.get("n_jobs", os.cpu_count())} 48 | 49 | if preset not in get_available_presets(): 50 | raise ValueError("{} unknown preset.".format(preset)) 51 | 52 | elif preset == "production": 53 | config = { 54 | "learner": TPOTAdaptor( 55 | max_time_mins=1440, max_eval_time_mins=20, **n_jobs_kwargs 56 | ), 57 | "reducer": FeatureReducer( 58 | reducers=("corr", "tree"), tree_importance_percentile=0.99 59 | ), 60 | "autofeaturizer": AutoFeaturizer( 61 | preset="express", **caching_kwargs, **n_jobs_kwargs 62 | ), 63 | "cleaner": DataCleaner(), 64 | } 65 | elif preset == "heavy": 66 | config = { 67 | "learner": TPOTAdaptor(max_time_mins=2880, **n_jobs_kwargs), 68 | "reducer": FeatureReducer(reducers=("corr", "rebate")), 69 | "autofeaturizer": AutoFeaturizer( 70 | preset="heavy", **caching_kwargs, **n_jobs_kwargs 71 | ), 72 | "cleaner": DataCleaner(), 73 | } 74 | elif preset == "express": 75 | config = { 76 | "learner": TPOTAdaptor( 77 | max_time_mins=60, population_size=20, **n_jobs_kwargs 78 | ), 79 | "reducer": FeatureReducer( 80 | reducers=("corr", "tree"), tree_importance_percentile=0.99 81 | ), 82 | "autofeaturizer": AutoFeaturizer( 83 | preset="express", **caching_kwargs, **n_jobs_kwargs 84 | ), 85 | "cleaner": DataCleaner(), 86 | } 87 | elif preset == "express_single": 88 | rf_args = {"n_estimators": 500, "max_depth": 5} 89 | rf_args.update(n_jobs_kwargs) 90 | config = { 91 | "learner": SinglePipelineAdaptor( 92 | regressor=RandomForestRegressor(**rf_args), 93 | classifier=RandomForestClassifier(**rf_args), 94 | ), 95 | "reducer": FeatureReducer(reducers=("corr",)), 96 | "autofeaturizer": AutoFeaturizer( 97 | preset="express", **caching_kwargs, **n_jobs_kwargs 98 | ), 99 | "cleaner": DataCleaner(), 100 | } 101 | elif preset == "debug": 102 | if "n_jobs" not in powerups: 103 | n_jobs_kwargs["n_jobs"] = 2 104 | 105 | config = { 106 | "learner": TPOTAdaptor( 107 | max_time_mins=1, 108 | max_eval_time_mins=1, 109 | population_size=10, 110 | **n_jobs_kwargs 111 | ), 112 | "reducer": FeatureReducer(reducers=("corr", "tree")), 113 | "autofeaturizer": AutoFeaturizer( 114 | preset="debug", **caching_kwargs, **n_jobs_kwargs 115 | ), 116 | "cleaner": DataCleaner(), 117 | } 118 | elif preset == "debug_single": 119 | rf_kwargs = {"n_estimators": 10, "n_jobs": n_jobs_kwargs["n_jobs"]} 120 | config = { 121 | "learner": SinglePipelineAdaptor( 122 | classifier=RandomForestClassifier(**rf_kwargs), 123 | regressor=RandomForestRegressor(**rf_kwargs), 124 | ), 125 | "reducer": FeatureReducer(reducers=("corr",)), 126 | "autofeaturizer": AutoFeaturizer( 127 | preset="debug", **caching_kwargs, **n_jobs_kwargs 128 | ), 129 | "cleaner": DataCleaner(), 130 | } 131 | return config 132 | 133 | 134 | def get_available_presets(): 135 | """ 136 | Return all available presets for MatPipes. 137 | 138 | Returns: 139 | ([str]): A list of preset names. 140 | """ 141 | return [ 142 | "production", 143 | "heavy", 144 | "express", 145 | "express_single", 146 | "debug", 147 | "debug_single", 148 | ] 149 | -------------------------------------------------------------------------------- /automatminer/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer/tests/__init__.py -------------------------------------------------------------------------------- /automatminer/tests/test_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the base classes. 3 | """ 4 | import unittest 5 | 6 | import pandas as pd 7 | from sklearn.exceptions import NotFittedError 8 | 9 | from automatminer.base import DFTransformer 10 | from automatminer.utils.pkg import check_fitted, set_fitted 11 | 12 | 13 | class TestTransformerGood(DFTransformer): 14 | """ 15 | A test transformer and logger. 16 | 17 | Args: 18 | config_attr: Some attr to be set at initialization 19 | """ 20 | 21 | def __init__(self, config_attr): 22 | self.config_attr = config_attr 23 | self.target = None 24 | super(TestTransformerGood, self).__init__() 25 | 26 | @set_fitted 27 | def fit(self, df, target): 28 | """ 29 | Determine the target of the dataframe. 30 | 31 | Args: 32 | df (pandas.DataFrame): The dataframe to be transformed. 33 | target (str): The fit target 34 | 35 | Returns: 36 | TestTransformer 37 | """ 38 | if target in df.columns: 39 | self.target = target 40 | else: 41 | raise ValueError("Target {} not in dataframe.".format(target)) 42 | return self 43 | 44 | @check_fitted 45 | def transform(self, df, target): 46 | """ 47 | Drop the target set during fitting. 48 | 49 | Args: 50 | df (pandas.DataFrame): The dataframe to be transformed. 51 | target (str): The transform target (not the same as fit target 52 | necessarily) 53 | 54 | Returns: 55 | df (pandas.DataFrame): The transformed dataframe. 56 | """ 57 | df = df.drop(columns=self.target) 58 | return df 59 | 60 | 61 | class TestTransformerBad(DFTransformer): 62 | """ 63 | A test transformer, implemented incorrectly. 64 | """ 65 | 66 | def __init__(self): 67 | pass 68 | 69 | 70 | class TestBaseTransformers(unittest.TestCase): 71 | def setUp(self): 72 | self.df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) 73 | 74 | def test_DFTransformer(self): 75 | ttg = TestTransformerGood(5) 76 | self.assertTrue(hasattr(ttg, "config_attr")) 77 | self.assertTrue(ttg.config_attr, 5) 78 | with self.assertRaises(NotFittedError): 79 | ttg.transform(self.df, "b") 80 | 81 | ttg.fit(self.df, "a") 82 | self.assertTrue(ttg.config_attr, 5) 83 | 84 | test = ttg.transform(self.df, "b") 85 | self.assertTrue("b" in test.columns) 86 | self.assertTrue("c" in test.columns) 87 | self.assertTrue("a" not in test.columns) 88 | 89 | test = ttg.fit_transform(self.df, "c") 90 | self.assertTrue("c" not in test.columns) 91 | self.assertTrue("a" in test.columns) 92 | self.assertTrue("b" in test.columns) 93 | 94 | with self.assertRaises(TypeError): 95 | TestTransformerBad() 96 | 97 | def test_DFTransformer_BaseEstimator_behavior(self): 98 | ttg = TestTransformerGood(5) 99 | ttg_nested = TestTransformerGood(ttg) 100 | 101 | self.assertEqual(ttg.get_params()["config_attr"], 5) 102 | self.assertEqual(ttg_nested.get_params()["config_attr__config_attr"], 5) 103 | 104 | ttg.set_params(config_attr=6) 105 | self.assertEqual(ttg.get_params()["config_attr"], 6) 106 | self.assertEqual(ttg_nested.get_params()["config_attr__config_attr"], 6) 107 | 108 | ttg_nested.set_params(config_attr__config_attr=7) 109 | self.assertEqual(ttg.get_params()["config_attr"], 7) 110 | self.assertEqual(ttg_nested.get_params()["config_attr__config_attr"], 7) 111 | 112 | 113 | if __name__ == "__main__": 114 | unittest.main() 115 | -------------------------------------------------------------------------------- /automatminer/tests/test_presets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Testing the preset configurations for MatPipe. 3 | 4 | Mainly ensuring all args can be passed to matpipe constituent parts correctly. 5 | """ 6 | import unittest 7 | 8 | from automatminer import MatPipe 9 | from automatminer.presets import get_preset_config 10 | 11 | AF_KEY = "autofeaturizer" 12 | DC_KEY = "cleaner" 13 | FR_KEY = "reducer" 14 | ML_KEY = "learner" 15 | KEYSET = [AF_KEY, DC_KEY, FR_KEY, ML_KEY] 16 | 17 | 18 | class TestMatPipePresets(unittest.TestCase): 19 | def test_production(self): 20 | prod = get_preset_config("production") 21 | for k in KEYSET: 22 | self.assertTrue(k in prod.keys()) 23 | MatPipe(**prod) 24 | 25 | def test_debug(self): 26 | debug = get_preset_config("debug") 27 | for k in KEYSET: 28 | self.assertTrue(k in debug.keys()) 29 | MatPipe(**debug) 30 | 31 | def test_debug_single(self): 32 | debug_single = get_preset_config("debug_single") 33 | for k in KEYSET: 34 | self.assertTrue(k in debug_single.keys()) 35 | MatPipe(**debug_single) 36 | 37 | def test_express(self): 38 | express = get_preset_config("express") 39 | for k in KEYSET: 40 | self.assertTrue(k in express.keys()) 41 | MatPipe(**express) 42 | 43 | def test_express_single(self): 44 | express_single = get_preset_config("express_single") 45 | for k in KEYSET: 46 | self.assertTrue(k in express_single.keys()) 47 | MatPipe(**express_single) 48 | 49 | def test_heavy(self): 50 | heavy = get_preset_config("heavy") 51 | for k in KEYSET: 52 | self.assertTrue(k in heavy.keys()) 53 | MatPipe(**heavy) 54 | 55 | def test_caching_powerup(self): 56 | cache_src = "./somefile.json" 57 | prod = get_preset_config("production", cache_src=cache_src) 58 | self.assertEqual(prod[AF_KEY].cache_src, cache_src) 59 | MatPipe(**prod) 60 | 61 | def test_n_jobs_powerup(self): 62 | n_jobs = 1 63 | prod = get_preset_config("production", n_jobs=n_jobs) 64 | self.assertEqual(prod[AF_KEY].n_jobs, n_jobs) 65 | self.assertEqual(prod[ML_KEY].tpot_kwargs["n_jobs"], n_jobs) 66 | MatPipe(**prod) 67 | 68 | def test_missing(self): 69 | with self.assertRaises(ValueError): 70 | get_preset_config("QWERTYUIOP1234567890") 71 | -------------------------------------------------------------------------------- /automatminer/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer/utils/__init__.py -------------------------------------------------------------------------------- /automatminer/utils/log.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for logging. 3 | """ 4 | 5 | import os 6 | import sys 7 | import logging 8 | import datetime 9 | 10 | AMM_LOGGER_BASENAME = "automatminer" 11 | AMM_LOG_FIT_STR = "fitting" 12 | AMM_LOG_TRANSFORM_STR = "transforming" 13 | AMM_LOG_PREDICT_STR = "predicting" 14 | 15 | AMM_DEFAULT_LOGGER = True 16 | 17 | 18 | def initialize_logger(logger_name, log_dir=".", level=None) -> logging.Logger: 19 | """Initialize the default logger with stdout and file handlers. 20 | 21 | Args: 22 | logger_name (str): The package name. 23 | log_dir (str): Path to the folder where the log file will be written. 24 | level (int): The log level. For example logging.DEBUG. 25 | Returns: 26 | (Logger): A logging instance with customized formatter and handlers. 27 | """ 28 | level = level or logging.INFO 29 | 30 | logger = logging.getLogger(logger_name) 31 | logger.handlers = [] # reset logging handlers if they already exist 32 | 33 | formatter = logging.Formatter( 34 | fmt="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" 35 | ) 36 | 37 | logpath = os.path.join(log_dir, logger_name) 38 | if os.path.exists(logpath + ".log"): 39 | logpath += "_" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 40 | logpath += ".log" 41 | 42 | handler = logging.FileHandler(logpath, mode="w") 43 | handler.setFormatter(formatter) 44 | screen_handler = logging.StreamHandler(stream=sys.stdout) 45 | screen_handler.setFormatter(formatter) 46 | 47 | logger.setLevel(level) 48 | logger.addHandler(screen_handler) 49 | logger.addHandler(handler) 50 | return logger 51 | 52 | 53 | def initialize_null_logger(name) -> logging.Logger: 54 | """Initialize the a dummy logger which will swallow all logging commands. 55 | Returns: 56 | (Logger): The package name. 57 | (Logger): A dummy logging instance with no output. 58 | """ 59 | logger = logging.getLogger(name + "_null") 60 | logger.addHandler(logging.NullHandler()) 61 | return logger 62 | 63 | 64 | def log_progress(logger, operation): 65 | """ 66 | Decorator to auto-log progress before and after executing a method, such 67 | as fit and transform. Should only be applied to DataFrameTransformers. 68 | 69 | For example, 70 | 71 | INFO: Beginning AutoFeaturizer fitting. 72 | ... autofeaturizer logs ... 73 | INFO: Finished AutoFeaturizer fitting. 74 | 75 | Args: 76 | logger (logging.Logger): A logger object to help log progress. 77 | operation (str): Some info about the operation you want to log. 78 | 79 | Returns: 80 | A wrapper for the input method. 81 | """ 82 | 83 | def decorator_wrapper(meth): 84 | def wrapper(*args, **kwargs): 85 | """ 86 | Wrapper for a method to log. 87 | 88 | Args: 89 | operation (str): The operation to be logging. 90 | 91 | Return: 92 | result: The method result. 93 | """ 94 | self = args[0] 95 | logger.info("{}Starting {}.".format(self._log_prefix, operation)) 96 | result = meth(*args, **kwargs) 97 | logger.info("{}Finished {}.".format(self._log_prefix, operation)) 98 | return result 99 | 100 | return wrapper 101 | 102 | return decorator_wrapper 103 | -------------------------------------------------------------------------------- /automatminer/utils/ml.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tools and utils for machine learning. 3 | """ 4 | 5 | import warnings 6 | 7 | import pandas as pd 8 | from automatminer.utils.pkg import AutomatminerError 9 | 10 | AMM_REG_NAME = "regression" 11 | AMM_CLF_NAME = "classification" 12 | 13 | 14 | def is_greater_better(scoring_function) -> bool: 15 | """ 16 | Determines whether scoring_function being greater is more favorable/better. 17 | Args: 18 | scoring_function (str): the name of the scoring function supported by 19 | TPOT and sklearn. Please see below for more information. 20 | Returns (bool): Whether the scoring metric should be considered better if 21 | it is larger or better if it is smaller 22 | """ 23 | desired_high_metrics = { 24 | "accuracy", 25 | "adjusted_rand_score", 26 | "average_precision", 27 | "balanced_accuracy", 28 | "f1", 29 | "f1_macro", 30 | "f1_micro", 31 | "f1_samples", 32 | "f1_weighted", 33 | "precision", 34 | "precision_macro", 35 | "precision_micro", 36 | "precision_samples", 37 | "precision_weighted", 38 | "recall", 39 | "recall_macro", 40 | "recall_micro", 41 | "recall_samples", 42 | "recall_weighted", 43 | "roc_auc", 44 | "r2", 45 | "r2_score", 46 | "neg_median_absolute_error", 47 | "neg_mean_absolute_error", 48 | "neg_mean_squared_error", 49 | } 50 | 51 | desired_low_metrics = { 52 | "median_absolute_error", 53 | "mean_absolute_error", 54 | "mean_squared_error", 55 | } 56 | 57 | # Check to ensure no metrics are accidentally placed in both sets 58 | if desired_high_metrics.intersection(desired_low_metrics): 59 | raise AutomatminerError( 60 | "Error, there is a metric in both desired" 61 | " high and desired low metrics" 62 | ) 63 | 64 | if ( 65 | scoring_function not in desired_high_metrics 66 | and scoring_function not in desired_low_metrics 67 | ): 68 | warnings.warn( 69 | 'The scoring_function: "{}" not found; continuing assuming' 70 | " greater score is better".format(scoring_function) 71 | ) 72 | 73 | # True if not in either set or only in desired_high, 74 | # False if in desired_low or both sets 75 | return scoring_function not in desired_low_metrics 76 | 77 | 78 | def regression_or_classification(series) -> str: 79 | """ 80 | Determine if a series (target column) is numeric or categorical, to 81 | decide on the problem as regression or classification. 82 | 83 | Args: 84 | series (pandas.Series): The target column. 85 | 86 | Returns: 87 | (str): "regression" or "classification" 88 | """ 89 | if series.dtypes.name == "bool": 90 | return AMM_CLF_NAME 91 | else: 92 | unique = series.unique().tolist() 93 | if len(unique) == 2 and all([un in [0, 1] for un in unique]): 94 | return AMM_CLF_NAME 95 | else: 96 | try: 97 | pd.to_numeric(series, errors="raise") 98 | return AMM_REG_NAME 99 | except (ValueError, TypeError): 100 | return AMM_CLF_NAME 101 | -------------------------------------------------------------------------------- /automatminer/utils/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer/utils/tests/__init__.py -------------------------------------------------------------------------------- /automatminer/utils/tests/test_log.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test logging related utils. 3 | """ 4 | 5 | import logging 6 | import os 7 | import unittest 8 | 9 | from automatminer.utils.log import initialize_logger, initialize_null_logger 10 | 11 | run_dir = os.getcwd() 12 | 13 | 14 | class TestLogTools(unittest.TestCase): 15 | def test_logger_initialization(self): 16 | logger_base_name = "TESTING" 17 | log = initialize_logger(logger_base_name, level=logging.DEBUG) 18 | log.info("Test logging.") 19 | log.debug("Test debug.") 20 | log.warning("Test warning.") 21 | 22 | # test the log is written to run dir (e.g. where the script was called 23 | # from and not the location of this test file 24 | log_file = os.path.join(run_dir, logger_base_name + ".log") 25 | self.assertTrue(os.path.isfile(log_file)) 26 | 27 | with open(log_file, "r") as f: 28 | lines = f.readlines() 29 | 30 | self.assertTrue("logging" in lines[0]) 31 | self.assertTrue("debug" in lines[1]) 32 | self.assertTrue("warning" in lines[2]) 33 | 34 | null = initialize_null_logger("matbench_null") 35 | null.info("Test null log 1.") 36 | null.debug("Test null log 2.") 37 | null.warning("Test null log 3.") 38 | 39 | null_log_file = os.path.join(run_dir, logger_base_name + "_null.log") 40 | self.assertFalse(os.path.isfile(null_log_file)) 41 | 42 | def tearDown(self): 43 | logfile = os.path.join(run_dir, "TESTING.log") 44 | os.remove(logfile) 45 | 46 | 47 | if __name__ == "__main__": 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /automatminer/utils/tests/test_ml.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for machine learning related utils. 3 | """ 4 | 5 | import unittest 6 | 7 | import pandas as pd 8 | from automatminer.utils.ml import ( 9 | AMM_CLF_NAME, 10 | AMM_REG_NAME, 11 | is_greater_better, 12 | regression_or_classification, 13 | ) 14 | 15 | 16 | class TestMLTools(unittest.TestCase): 17 | def test_is_greater_better(self): 18 | self.assertTrue(is_greater_better("accuracy")) 19 | self.assertTrue(is_greater_better("r2_score")) 20 | self.assertTrue(is_greater_better("neg_mean_squared_error")) 21 | self.assertFalse(is_greater_better("mean_squared_error")) 22 | 23 | def test_regression_or_classification(self): 24 | s = pd.Series(data=["4", "5", "6"]) 25 | self.assertTrue(regression_or_classification(s) == AMM_REG_NAME) 26 | 27 | s = pd.Series(data=[1, 2, 3]) 28 | self.assertTrue(regression_or_classification(s) == AMM_REG_NAME) 29 | 30 | s = pd.Series(data=["a", "b", "c"]) 31 | self.assertTrue(regression_or_classification(s) == AMM_CLF_NAME) 32 | 33 | s = pd.Series(data=["a1", "b", "c"]) 34 | self.assertTrue(regression_or_classification(s) == AMM_CLF_NAME) 35 | 36 | # binary classification 37 | s = pd.Series(data=[0, 1, 0, 0, 1]) 38 | self.assertTrue(regression_or_classification(s) == AMM_CLF_NAME) 39 | 40 | s = pd.Series(data=[0.0, 1.0, 0.0, 0.0, 1.0]) 41 | self.assertTrue(regression_or_classification(s) == AMM_CLF_NAME) 42 | 43 | s = pd.Series(data=[0, 1, 0, 0, 2]) 44 | self.assertTrue(regression_or_classification(s) == AMM_REG_NAME) 45 | 46 | 47 | if __name__ == "__main__": 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /automatminer/utils/tests/test_pkg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Assorted package utils. 3 | """ 4 | import os 5 | import unittest 6 | 7 | import pandas as pd 8 | from automatminer import __version__ 9 | from automatminer.base import DFTransformer 10 | from automatminer.utils.pkg import ( 11 | AMM_SUPPORTED_EXTS, 12 | check_fitted, 13 | compare_columns, 14 | get_version, 15 | save_dict_to_file, 16 | set_fitted, 17 | ) 18 | from sklearn.exceptions import NotFittedError 19 | 20 | 21 | class MyTransformer(DFTransformer): 22 | def __init__(self): 23 | super(MyTransformer, self).__init__() 24 | 25 | @set_fitted 26 | def fit(self, df, target): 27 | return df 28 | 29 | @check_fitted 30 | def transform(self, df, target): 31 | return df 32 | 33 | 34 | class TestPackageTools(unittest.TestCase): 35 | def setUp(self) -> None: 36 | self.remant_base_path = os.path.dirname(__file__) 37 | self.remant_file_prefix = "saved" 38 | 39 | def test_compare_columns(self): 40 | df1 = pd.DataFrame({"a": [1, 2], "b": [2, 3]}) 41 | df2 = pd.DataFrame({"b": [3, 4], "c": [4, 5]}) 42 | comparison = compare_columns(df1, df2) 43 | self.assertTrue(comparison["mismatch"]) 44 | self.assertListEqual(comparison["df1_not_in_df2"], ["a"]) 45 | self.assertListEqual(comparison["df2_not_in_df1"], ["c"]) 46 | 47 | comparison2 = compare_columns(df1, df1) 48 | self.assertFalse(comparison2["mismatch"]) 49 | 50 | comparison3 = compare_columns(df1, df2, ignore=["c"]) 51 | self.assertTrue(comparison3["mismatch"]) 52 | self.assertListEqual(comparison3["df1_not_in_df2"], ["a"]) 53 | self.assertListEqual(comparison3["df2_not_in_df1"], []) 54 | 55 | def test_fitting_decorations(self): 56 | df = pd.DataFrame({"a": [1, 2], "b": [2, 3]}) 57 | mt = MyTransformer() 58 | 59 | self.assertFalse(mt.is_fit) 60 | mt.fit(df, "") 61 | self.assertTrue(mt.is_fit) 62 | df = mt.transform(df, "") 63 | 64 | mt2 = MyTransformer() 65 | self.assertRaises(NotFittedError, mt2.transform, [df, ""]) 66 | 67 | def test_save_dict_to_file(self): 68 | test_dict = {"a": "A", "b": 1, "c": [1, "q"], "d": {"m": [3, 4]}} 69 | for ext in AMM_SUPPORTED_EXTS: 70 | filename = self._get_remnant_path(ext) 71 | save_dict_to_file(test_dict, filename=filename) 72 | self.assertTrue(os.path.isfile(filename)) 73 | 74 | def test_get_version(self): 75 | v = get_version() 76 | self.assertEqual(v, __version__) 77 | 78 | def tearDown(self) -> None: 79 | remnants = [self._get_remnant_path(ext) for ext in AMM_SUPPORTED_EXTS] 80 | for remnant in remnants: 81 | if os.path.exists(remnant): 82 | os.remove(remnant) 83 | 84 | def _get_remnant_path(self, ext): 85 | relative_fname = self.remant_file_prefix + ext 86 | filename = os.path.join(self.remant_base_path, relative_fname) 87 | return filename 88 | 89 | 90 | if __name__ == "__main__": 91 | unittest.main() 92 | -------------------------------------------------------------------------------- /automatminer_dev/README.md: -------------------------------------------------------------------------------- 1 | # Automatminer benchmarking dev 2 | 3 | `automatminer_dev` is a collection of dev tools for executing hundreds 4 | or thousands of machine learning benchmarks on parallel computing 5 | resources with Fireworks. This is not part of the main automatminer 6 | code, and as such, is not 7 | 1. maintained as closely as the main code 8 | 2. tested as rigorously as the main code 9 | 3. documented as completely as the main code 10 | 11 | In addition to installing matminer and automatminer, you will need to 12 | install the automatminer_dev package: 13 | ```bash 14 | python setup_dev.py develop 15 | ``` 16 | You will also need to install the requirements in `requirements_dev.txt` 17 | 18 | ##### So don't expect a response or much help on the forum regarding dev (for now, at least.) 19 | 20 | The available workflows include: 21 | * nested CV benchmarks parallelized across folds (by node) 22 | * multiple nested CV benchmarks (i.e., on many different data sets) 23 | * plain ol' fitting operations (i.e., fitting a model for production) 24 | 25 | Also, this `automatminer_dev` folder is dependent upon specific 26 | environment variables, file locations, and variables which are 27 | documented throughout the code. If you want to run your own version of 28 | these dev tools, you'll need to set your own versions of these variables 29 | specific to your computing platform, most of which you can do through 30 | `config.py`. 31 | 32 | Finally, the results of these workflows are saved to a private database. 33 | If you want to use your own databse, you'll need to substitute the code 34 | from the private database with your own (should only be a couple of 35 | lines from `config.py` if you're familiar with pymongo and MongoDB). 36 | The general setup of this database is as follows (by collection): 37 | 38 | #### Automatminer results collections 39 | - `automatminer_pipes`: individual MatPipes; most typically, folds 40 | - `automatminer_benchmarks`: a collection of pipes, making up an entire benchmark (one complete ML result on one dataset by nested CV) 41 | - `automatminer_builds`: a collection of benchmarks, making up an entire build (a collection of ML results on many datasets by nested CV) 42 | 43 | #### Fireworks specific collections 44 | - `fireworks`: the FireWorks collection containing individual fireworks (jobs) 45 | - `workflows`: the FireWorks collection containing workflows of several jobs 46 | - `launches`: the FireWorks collection containing info from different runs of jobs 47 | - `fw_*`: FireWorks operations you probably don't need to worry about 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /automatminer_dev/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /automatminer_dev/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | The environment variables you need for this all to work are: 3 | 4 | - AMM_BENCH_DIR: where to store benchmarks 5 | - AMM_DATASET_DIR: where to store datasets 6 | - AMM_CODE_DIR: where to run tests 7 | - AMM_SINGLE_FIT_DIR: where to store models (single fits on datasets) 8 | """ 9 | from automatminer.utils.ml import AMM_CLF_NAME, AMM_REG_NAME 10 | 11 | from hmte.db import get_connection 12 | 13 | # Private production 14 | LP = get_connection("hackingmaterials", write=True, connection_type="launchpad") 15 | 16 | # Debugging locally 17 | # LP = LaunchPad(name="automatminer") 18 | 19 | # Constants for running benchmarks and builds 20 | KFOLD_DEFAULT = {"shuffle": True, "random_state": 18012019, "n_splits": 5} 21 | RUN_TESTS_CMD = "cd $AMM_CODE_DIR && coverage run setup.py test" 22 | EXPORT_COV_CMD = "coverage xml && python-codacy-coverage -r coverage.xml" 23 | 24 | # Real benchmark sets 25 | 26 | LOG_KVRH = { 27 | "name": "log_kvrh", 28 | "data_file": "matbench_log_kvrh.json.gz", 29 | "target": "log10(K_VRH)", 30 | "problem_type": AMM_REG_NAME, 31 | "clf_pos_label": None, 32 | } 33 | 34 | LOG_GVRH = { 35 | "name": "log_gvrh", 36 | "data_file": "matbench_log_gvrh.json.gz", 37 | "target": "log10(G_VRH)", 38 | "problem_type": AMM_REG_NAME, 39 | "clf_pos_label": None, 40 | } 41 | 42 | DIELECTRIC = { 43 | "name": "dielectric", 44 | "data_file": "matbench_dielectric.json.gz", 45 | "target": "n", 46 | "problem_type": AMM_REG_NAME, 47 | "clf_pos_label": None, 48 | } 49 | 50 | JDFT2D = { 51 | "name": "jdft2d", 52 | "data_file": "matbench_jdft2d.json.gz", 53 | "target": "exfoliation_en", 54 | "problem_type": AMM_REG_NAME, 55 | "clf_pos_label": None, 56 | } 57 | 58 | MP_GAP = { 59 | "name": "mp_gap", 60 | "data_file": "matbench_mp_gap.json.gz", 61 | "target": "gap pbe", 62 | "problem_type": AMM_REG_NAME, 63 | "clf_pos_label": None, 64 | } 65 | 66 | MP_IS_METAL = { 67 | "name": "mp_is_metal", 68 | "data_file": "matbench_mp_is_metal.json.gz", 69 | "target": "is_metal", 70 | "problem_type": AMM_CLF_NAME, 71 | "clf_pos_label": True, 72 | } 73 | 74 | MP_E_FORM = { 75 | "name": "mp_e_form", 76 | "data_file": "matbench_mp_e_form.json.gz", 77 | "target": "e_form", 78 | "problem_type": AMM_REG_NAME, 79 | "clf_pos_label": None, 80 | } 81 | 82 | PEROVSKITES = { 83 | "name": "perovskites", 84 | "data_file": "matbench_perovskites.json.gz", 85 | "target": "e_form", 86 | "problem_type": AMM_REG_NAME, 87 | "clf_pos_label": None, 88 | } 89 | 90 | GLASS = { 91 | "name": "glass", 92 | "data_file": "matbench_glass.json.gz", 93 | "target": "gfa", 94 | "problem_type": AMM_CLF_NAME, 95 | "clf_pos_label": True, 96 | } 97 | 98 | EXPT_IS_METAL = { 99 | "name": "expt_is_metal", 100 | "data_file": "matbench_expt_is_metal.json.gz", 101 | "target": "is_metal", 102 | "problem_type": AMM_CLF_NAME, 103 | "clf_pos_label": True, 104 | } 105 | 106 | EXPT_GAP = { 107 | "name": "expt_gap", 108 | "data_file": "matbench_expt_gap.json.gz", 109 | "target": "gap expt", 110 | "problem_type": AMM_REG_NAME, 111 | "clf_pos_label": None, 112 | } 113 | 114 | PHONONS = { 115 | "name": "phonons", 116 | "data_file": "matbench_phonons.json.gz", 117 | "target": "last phdos peak", 118 | "problem_type": AMM_REG_NAME, 119 | "clf_pos_label": None, 120 | } 121 | 122 | STEELS = { 123 | "name": "steels", 124 | "data_file": "matbench_steels.json.gz", 125 | "target": "yield strength", 126 | "problem_type": AMM_REG_NAME, 127 | "clf_pos_label": None, 128 | } 129 | 130 | BENCHMARK_DEBUG_SET = [JDFT2D, PHONONS, EXPT_IS_METAL, STEELS] 131 | BENCHMARK_FULL_SET = [ 132 | LOG_KVRH, 133 | LOG_GVRH, 134 | DIELECTRIC, 135 | JDFT2D, 136 | MP_GAP, 137 | MP_IS_METAL, 138 | MP_E_FORM, 139 | PEROVSKITES, 140 | GLASS, 141 | EXPT_IS_METAL, 142 | EXPT_GAP, 143 | STEELS, 144 | PHONONS, 145 | ] 146 | -------------------------------------------------------------------------------- /automatminer_dev/graphnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer_dev/graphnet/__init__.py -------------------------------------------------------------------------------- /automatminer_dev/local/dummy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.dummy import DummyClassifier, DummyRegressor 5 | from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold 6 | from automatminer.utils.ml import regression_or_classification 7 | from automatminer.utils.ml import AMM_CLF_NAME, AMM_REG_NAME 8 | from automatminer_dev.config import BENCHMARK_FULL_SET, GLASS, EXPT_IS_METAL, EXPT_GAP 9 | from matminer.utils.io import load_dataframe_from_json 10 | 11 | 12 | benchmark_dir = os.environ["AMM_DATASET_DIR"] 13 | 14 | bmarks = BENCHMARK_FULL_SET 15 | bmarks = [GLASS, EXPT_GAP, EXPT_IS_METAL] 16 | 17 | for p in bmarks: 18 | pname = p["name"] 19 | print("Loading {}".format(pname)) 20 | df = load_dataframe_from_json(os.path.join(benchmark_dir, p["data_file"])) 21 | target = p["target"] 22 | ltype = p["problem_type"] 23 | if ltype == AMM_REG_NAME: 24 | kf = KFold(n_splits=5, random_state=18012019, shuffle=True) 25 | estimator = DummyRegressor(strategy="mean") 26 | scoring = "neg_mean_absolute_error" 27 | multiplier = -1 28 | elif ltype == AMM_CLF_NAME: 29 | kf = StratifiedKFold(n_splits=5, random_state=18012019, shuffle=True) 30 | estimator = DummyClassifier(strategy="stratified") 31 | multiplier = 1 32 | scoring = "roc_auc" 33 | else: 34 | raise ValueError("problem type {} is not known.".format(ltype)) 35 | 36 | cvs = cross_val_score( 37 | estimator, df.drop(columns=[target]), y=df[target], scoring=scoring, cv=kf 38 | ) 39 | 40 | cvs = multiplier * cvs 41 | mean_cvs = np.mean(cvs) 42 | print(pname, mean_cvs) 43 | 44 | 45 | # for p in bmarks: 46 | # pname = p["name"] 47 | # print("Loading {}".format(pname)) 48 | # df = load_dataframe_from_json(os.path.join(benchmark_dir, p["data_file"])) 49 | # target = p["target"] 50 | # ltype = p["problem_type"] 51 | # 52 | # data = df[target] 53 | # mad = data.mad() 54 | # print(f"Mean average deviation for {p} is {mad}") -------------------------------------------------------------------------------- /automatminer_dev/matbench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer_dev/matbench/__init__.py -------------------------------------------------------------------------------- /automatminer_dev/matbench/castelli.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - castelli 4 | 5 | From matminer's dataset library. 6 | """ 7 | 8 | from matminer.datasets.dataset_retrieval import load_dataset 9 | from matminer.data_retrieval.retrieve_MP import MPDataRetrieval 10 | 11 | 12 | import pandas as pd 13 | 14 | # pd.set_option('display.height', 1000) 15 | pd.set_option("display.max_rows", 500) 16 | pd.set_option("display.max_columns", 500) 17 | pd.set_option("display.width", 1000) 18 | 19 | mpdr = MPDataRetrieval() 20 | 21 | df = load_dataset("castelli_perovskites") 22 | df = df[["structure", "e_form"]] 23 | df = df.reset_index(drop=True) 24 | 25 | print(df) 26 | df.to_pickle("castelli.pickle.gz") 27 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/dielectric.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - castelli 4 | 5 | Regenerating from the newest Materials Project calculations 6 | """ 7 | 8 | from matminer.datasets.dataset_retrieval import load_dataset 9 | from matminer.data_retrieval.retrieve_MP import MPDataRetrieval 10 | from pymatgen import Element 11 | 12 | import pandas as pd 13 | import numpy as np 14 | 15 | # pd.set_option('display.height', 1000) 16 | pd.set_option("display.max_rows", 500) 17 | pd.set_option("display.max_columns", 500) 18 | pd.set_option("display.width", 1000) 19 | 20 | mpdr = MPDataRetrieval() 21 | 22 | 23 | # df = load_dataset("dielectric_constant") 24 | 25 | df = mpdr.get_dataframe( 26 | criteria={"has": "diel"}, 27 | properties=[ 28 | "material_id", 29 | "diel.n", 30 | "formation_energy_per_atom", 31 | "e_above_hull", 32 | "structure", 33 | ], 34 | index_mpid=False, 35 | ) 36 | df = df[(df["e_above_hull"] < 0.150) & (df["formation_energy_per_atom"] < 0.150)] 37 | df = df.rename(columns={"diel.n": "n"}) 38 | df = df[(df["n"] >= 1)] 39 | df = df.dropna() 40 | 41 | df = df[["structure", "n"]] 42 | 43 | # See if there is anything wrong with the Lu containing entries. 44 | numLu = 0 45 | for i, s in enumerate(df["structure"]): 46 | if Element("Lu") in s.composition.elements: 47 | print(s.composition.formula, df["n"].iloc[i]) 48 | numLu += 1 49 | print(numLu) 50 | 51 | df = df.reset_index(drop=True) 52 | 53 | print(df) 54 | print(df.describe()) 55 | # df.to_pickle("dielectric.pickle.gz") 56 | 57 | # df = pd.read_pickle("dielectric.pickle.gz") 58 | df["is_noble"] = [ 59 | any([e.is_noble_gas for e in s.composition.elements]) for s in df["structure"] 60 | ] 61 | dfnoble = df[df["is_noble"]] 62 | print("Size of noble gas containing:", dfnoble.shape) 63 | 64 | df = df[~df["is_noble"]] 65 | df = df.drop(columns=["is_noble"]) 66 | df = df.reset_index(drop=True) 67 | print(df) 68 | df.to_pickle("dielectric.pickle.gz") 69 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/expt_gap.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - expt_gap 4 | 5 | From matminer's dataset library. 6 | 7 | 8 | To combat duplicate compositions, we don't keep any compositions with a range 9 | of bandgaps more than 0.1eV. Then, we find the mean band gap for each composition 10 | and keep the value closest to the mean. 11 | """ 12 | from matminer.datasets.dataset_retrieval import load_dataset 13 | from matminer.utils.io import store_dataframe_as_json 14 | from matminer.featurizers.conversions import StrToComposition 15 | from tqdm import tqdm 16 | import numpy as np 17 | 18 | 19 | import pandas as pd 20 | 21 | pd.set_option("display.max_rows", 500) 22 | pd.set_option("display.max_columns", 500) 23 | pd.set_option("display.width", 1000) 24 | pd.set_option("precision", 8) 25 | 26 | df = load_dataset("expt_gap") 27 | df = df.rename(columns={"formula": "composition"}) 28 | 29 | 30 | # print("Ground Truth") 31 | # print(df[df["composition"] == "ZrW2"]) # should be 0.00 32 | # print(df[df["composition"] == "ZrSe2"]) # should be 2.00 33 | # raise ValueError 34 | 35 | 36 | excluded_compositions = [] 37 | 38 | 39 | # Prevent differences in order of formula symbols from corrupting the actual number of unique compositions 40 | df = StrToComposition(target_col_id="composition_obj").featurize_dataframe( 41 | df, "composition" 42 | ) 43 | df["composition"] = [c.reduced_formula for c in df["composition_obj"]] 44 | df = df.drop(columns=["composition_obj"]) 45 | 46 | unique = df["composition"].unique() 47 | print("Number of unique compositions:", len(unique)) 48 | # raise ValueError 49 | 50 | new_df_dict = {"composition": [], "gap expt": []} 51 | for c in tqdm(unique): 52 | df_per_comp_gaps = df[df["composition"] == c] 53 | per_comp_gaps = df_per_comp_gaps["gap expt"] 54 | measurement_range = max(per_comp_gaps) - min(per_comp_gaps) 55 | if measurement_range > 0.1: 56 | # print(df_per_comp_gaps) 57 | # big_diff += 1 58 | excluded_compositions.append(c) 59 | else: 60 | mean_gap = per_comp_gaps.mean() 61 | gap_diffs = per_comp_gaps - mean_gap 62 | min_gap_diff = gap_diffs.min() 63 | min_gap_diff_index = gap_diffs.tolist().index(min_gap_diff) 64 | actual_gap_diff = per_comp_gaps.tolist()[min_gap_diff_index] 65 | # if len(per_comp_gaps) > 1: 66 | # print(f"{c} decided on {actual_gap_diff} from \n {per_comp_gaps} \n\n") 67 | new_df_dict["composition"].append(c) 68 | new_df_dict["gap expt"].append(actual_gap_diff) 69 | 70 | 71 | df_new = pd.DataFrame(new_df_dict) 72 | df_new = df_new.sort_values(by="composition") 73 | df_new = df_new.reset_index(drop=True) 74 | 75 | 76 | store_dataframe_as_json(df_new, "expt_gap.json.gz", compression="gz") 77 | 78 | print(df_new) 79 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/expt_is_metal.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - expt_is_metal 4 | 5 | 6 | This is mainly a check to make sure there are no compositions for which multiple 7 | reports differ on whether a composition is metallic or not. 8 | 9 | The problem compositions (those having multiple conflicting reports are printed 10 | out at the end. It appears there are none. 11 | 12 | From matminer's dataset library. 13 | """ 14 | from matminer.datasets.dataset_retrieval import load_dataset 15 | from matminer.utils.io import store_dataframe_as_json 16 | from matminer.featurizers.conversions import StrToComposition 17 | from tqdm import tqdm 18 | 19 | 20 | import pandas as pd 21 | 22 | pd.set_option("display.max_rows", 500) 23 | pd.set_option("display.max_columns", 500) 24 | pd.set_option("display.width", 1000) 25 | 26 | df = load_dataset("expt_gap") 27 | df = df.rename(columns={"formula": "composition"}) 28 | print(df) 29 | df["is_metal"] = df["gap expt"] == 0 30 | df = df.drop(columns=["gap expt"]) 31 | 32 | # print("Ground truth") 33 | # print(df[df["composition"]=="ZrSe3"]) # should be False in final dataframe also 34 | # print(df[df["composition"]=="ZrW2"]) # should be True in final dataframe also 35 | # print(df["is_metal"].value_counts()) # proportion is about 2500 metals to 4k nonmetals 36 | # raise ValueError 37 | 38 | df = StrToComposition(target_col_id="composition_obj").featurize_dataframe( 39 | df, "composition" 40 | ) 41 | df["composition"] = [c.reduced_formula for c in df["composition_obj"]] 42 | df = df.drop(columns=["composition_obj"]) 43 | 44 | unique = df["composition"].unique() 45 | print("Number of unique compositions:", len(unique)) 46 | 47 | problem_compositions = [] 48 | new_df_dict = {"composition": [], "is_metal": []} 49 | for c in tqdm(unique): 50 | df_per_comp_is_metal = df[df["composition"] == c] 51 | per_comp_is_metal = df_per_comp_is_metal["is_metal"] 52 | any_metals = any(per_comp_is_metal) 53 | all_metals = any(per_comp_is_metal) 54 | is_metal = None 55 | if not all_metals and any_metals: 56 | print(f"Problem composition {c}: {df_per_comp_is_metal}\n") 57 | problem_compositions.append(c) 58 | continue 59 | elif all_metals and any_metals: 60 | print(f"All metals: {c}") 61 | is_metal = 1 62 | elif not all_metals and not any_metals: 63 | print(f"No metals: {c}") 64 | is_metal = 0 65 | elif all_metals and not any_metals: 66 | raise ValueError("Impossible combination of metals.") 67 | 68 | new_df_dict["composition"].append(c) 69 | new_df_dict["is_metal"].append(is_metal) 70 | 71 | df_new = pd.DataFrame(new_df_dict) 72 | df_new = df_new.sort_values(by="composition") 73 | df_new = df_new.reset_index(drop=True) 74 | 75 | df_new["is_metal"] = df_new["is_metal"] == 1 76 | 77 | store_dataframe_as_json(df_new, "expt_is_metal.json.gz", compression="gz") 78 | 79 | print(df_new) 80 | print(df_new["is_metal"].value_counts()) 81 | print(f"Problem compositions: {problem_compositions}") 82 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/get_info.py: -------------------------------------------------------------------------------- 1 | from matminer.datasets.dataset_retrieval import load_dataset, get_available_datasets, get_all_dataset_info 2 | datasets = get_available_datasets(print_format=None) 3 | 4 | for dataset in datasets: 5 | if "matbench_" in dataset: 6 | df = load_dataset(dataset) 7 | 8 | target_col = [col for col in df.columns if col not in ["structure", "composition"]][0] 9 | print(f" * - :code:`{dataset}`\n - :code:`{target_col}`\n - {df.shape[0]}") 10 | 11 | 12 | # print(get_all_dataset_info("matbench_steels")) -------------------------------------------------------------------------------- /automatminer_dev/matbench/glass.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - glass 4 | 5 | This is mainly a check to make sure there are no compositions for which multiple 6 | reports differ on whether a composition is gfa or not. 7 | 8 | The problem compositions (those having multiple conflicting reports) are printed 9 | out at the end. It appears there are none. 10 | """ 11 | 12 | from matminer.datasets.dataset_retrieval import load_dataset 13 | from matminer.utils.io import store_dataframe_as_json 14 | from matminer.featurizers.conversions import StrToComposition 15 | from tqdm import tqdm 16 | 17 | import pandas as pd 18 | 19 | # pd.set_option('display.height', 1000) 20 | pd.set_option("display.max_rows", 500) 21 | pd.set_option("display.max_columns", 500) 22 | pd.set_option("display.width", 1000) 23 | 24 | 25 | df = load_dataset("glass_ternary_landolt") 26 | 27 | df = df.rename(columns={"formula": "composition"}) 28 | df = df[["composition", "gfa"]] 29 | 30 | df = StrToComposition(target_col_id="composition_obj").featurize_dataframe( 31 | df, "composition" 32 | ) 33 | df["composition"] = [c.reduced_formula for c in df["composition_obj"]] 34 | df = df.drop(columns=["composition_obj"]) 35 | 36 | # print("Ground truth") 37 | # print(df[df["composition"]=="ZrTi9"]) # should be False in final dataframe also!! 38 | # print(df[df["composition"]=="ZrVCo8"]) # should be True in final dataframe also! 39 | # print(df["gfa"].value_counts()) # proportion is about 5000 GFA 2054 no GFA 40 | # raise ValueError 41 | 42 | unique = df["composition"].unique() 43 | print(len(df)) 44 | print(len(unique)) 45 | 46 | problem_compositions = [] 47 | new_df_dict = {"composition": [], "gfa": []} 48 | for c in tqdm(unique): 49 | df_per_comp_gfa = df[df["composition"] == c] 50 | per_comp_gfa = df_per_comp_gfa["gfa"] 51 | any_gfa = any(per_comp_gfa) 52 | all_gfa = any(per_comp_gfa) 53 | gfa = None 54 | if any_gfa and not all_gfa: 55 | print(f"Problem composition {c}: {df_per_comp_gfa}\n") 56 | problem_compositions.append(c) 57 | continue 58 | elif all_gfa and any_gfa: 59 | print(f"All gfa: {c}") 60 | gfa = 1 61 | elif not all_gfa and not any_gfa: 62 | print(f"No gfa: {c}") 63 | gfa = 0 64 | elif all_gfa and not any_gfa: 65 | raise ValueError("Impossible combination of gfa values.") 66 | 67 | new_df_dict["composition"].append(c) 68 | new_df_dict["gfa"].append(gfa) 69 | 70 | df_new = pd.DataFrame(new_df_dict) 71 | df_new = df_new.sort_values(by="composition") 72 | df_new = df_new.reset_index(drop=True) 73 | 74 | # convert to bools 75 | df_new["gfa"] = df_new["gfa"] == 1 76 | 77 | 78 | print(df_new) 79 | print(df_new["gfa"].value_counts()) 80 | print(f"Problem compositions: {problem_compositions}") 81 | 82 | store_dataframe_as_json(df_new, "glass.json.gz", compression="gz") 83 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/jdft2d.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - jdft2d 4 | 5 | From matminer's dataset library. 6 | """ 7 | 8 | from matminer.datasets.dataset_retrieval import load_dataset 9 | 10 | 11 | import pandas as pd 12 | 13 | # pd.set_option('display.height', 1000) 14 | pd.set_option("display.max_rows", 500) 15 | pd.set_option("display.max_columns", 500) 16 | pd.set_option("display.width", 1000) 17 | 18 | df = load_dataset("jarvis_dft_2d") 19 | 20 | df = df[["structure", "exfoliation_en"]] 21 | df = df.reset_index(drop=True) 22 | 23 | print(df) 24 | df.to_pickle("jdft2d.pickle.gz") 25 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/mp_eform.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - mp_e_form 4 | 5 | Generated from the materials project. 6 | """ 7 | 8 | from pymatgen import MPRester 9 | from matminer.datasets.dataset_retrieval import load_dataset 10 | from matminer.data_retrieval.retrieve_MP import MPDataRetrieval 11 | import pandas as pd 12 | import numpy as np 13 | from tqdm import tqdm 14 | 15 | 16 | pd.set_option("display.max_rows", 500) 17 | pd.set_option("display.max_columns", 500) 18 | pd.set_option("display.width", 1000) 19 | 20 | chunksize = 1000 21 | 22 | mpdr = MPDataRetrieval() 23 | mpr = MPRester() 24 | 25 | 26 | def chunks(l, n): 27 | """Yield successive n-sized chunks from l.""" 28 | for i in range(0, len(l), n): 29 | yield l[i : i + n] 30 | 31 | 32 | df = mpdr.get_dataframe( 33 | criteria={"formation_energy_per_atom": {"$lt": 2.5}}, 34 | properties=["material_id", "warnings"], 35 | index_mpid=False, 36 | ) 37 | 38 | print(df["warnings"].astype(str).value_counts()) 39 | 40 | structures = pd.DataFrame( 41 | {"structure": [], "material_id": [], "formation_energy_per_atom": []} 42 | ) 43 | 44 | for chunk in tqdm(chunks(range(len(df)), chunksize)): 45 | print(chunk[0], chunk[-1]) 46 | mpids = df.loc[chunk[0] : chunk[-1], "material_id"].tolist() 47 | stchunk = mpdr.get_dataframe( 48 | criteria={"material_id": {"$in": mpids}}, 49 | properties=["structure", "material_id", "formation_energy_per_atom"], 50 | index_mpid=False, 51 | ) 52 | structures = pd.concat([stchunk, structures]) 53 | 54 | 55 | df = pd.merge(structures, df) 56 | df = df.dropna() 57 | 58 | # df.to_pickle("mp.pickle") 59 | 60 | 61 | df = df.rename(columns={"formation_energy_per_atom": "e_form"}) 62 | df = df[["structure", "e_form"]] 63 | df = df.reset_index(drop=True) 64 | # df.to_pickle("mp_e_form.pickle.gz") 65 | # 66 | # print(df) 67 | 68 | # df = pd.read_pickle("mp_e_form.pickle.gz") 69 | # print(df.shape) 70 | df["is_noble"] = [ 71 | any([e.is_noble_gas for e in s.composition.elements]) for s in df["structure"] 72 | ] 73 | dfnoble = df[df["is_noble"]] 74 | print("Size of noble gas containing:", dfnoble.shape) 75 | 76 | df = df[~df["is_noble"]] 77 | df = df.drop(columns=["is_noble"]) 78 | df = df.reset_index(drop=True) 79 | print(df) 80 | df.to_pickle("mp_e_form.pickle.gz") 81 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/mp_elasticity.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - elasticity_K_VRH 4 | - elasticity_log10(K_VRH) 5 | - elasticity_G_VRH 6 | - elasticity_log10(G_VRH) 7 | 8 | From matminer's dataset library. 9 | """ 10 | 11 | from matminer.datasets.dataset_retrieval import load_dataset 12 | from matminer.data_retrieval.retrieve_MP import MPDataRetrieval 13 | import pandas as pd 14 | import numpy as np 15 | 16 | pd.set_option("display.max_rows", 500) 17 | pd.set_option("display.max_columns", 500) 18 | pd.set_option("display.width", 1000) 19 | 20 | mpdr = MPDataRetrieval() 21 | 22 | df = mpdr.get_dataframe( 23 | criteria={ 24 | "e_above_hull": {"$lt": 0.150}, 25 | "formation_energy_per_atom": {"$lt": 0.150}, 26 | "elasticity": {"$exists": 1, "$ne": None}, 27 | }, 28 | # "elements": }, 29 | properties=[ 30 | "material_id", 31 | "structure", 32 | "elasticity.K_VRH", 33 | "elasticity.G_VRH", 34 | "elasticity.G_Voigt", 35 | "elasticity.K_Voigt", 36 | "elasticity.G_Reuss", 37 | "elasticity.K_Reuss", 38 | "warnings", 39 | ], 40 | index_mpid=False, 41 | ) 42 | 43 | df = df.rename( 44 | columns={ 45 | "elasticity.K_VRH": "K_VRH", 46 | "elasticity.G_VRH": "G_VRH", 47 | "elasticity.G_Voigt": "G_Voigt", 48 | "elasticity.K_Voigt": "K_Voigt", 49 | "elasticity.G_Reuss": "G_Reuss", 50 | "elasticity.K_Reuss": "K_Reuss", 51 | } 52 | ) 53 | 54 | df = df[ 55 | (df["K_VRH"] > 0.0) 56 | & (df["G_VRH"] > 0.0) 57 | & (df["G_Voigt"] > 0.0) 58 | & (df["K_Voigt"] > 0.0) 59 | & (df["K_Reuss"] > 0.0) 60 | & (df["G_Reuss"] > 0.0) 61 | ] 62 | df = df[ 63 | (df["K_Reuss"] <= df["K_VRH"]) 64 | & (df["K_VRH"] <= df["K_Voigt"]) 65 | & (df["G_Reuss"] <= df["G_VRH"]) 66 | & (df["G_VRH"] <= df["G_Voigt"]) 67 | ] 68 | 69 | print(df["warnings"].astype(str).value_counts()) 70 | 71 | df["log10(K_VRH)"] = np.log10(df["K_VRH"]) 72 | df["log10(G_VRH)"] = np.log10(df["G_VRH"]) 73 | 74 | df = df.reset_index(drop=True) 75 | 76 | for target in ["K_VRH", "G_VRH", "log10(K_VRH)", "log10(G_VRH)"]: 77 | dftemp = df[["structure", target]] 78 | dftemp.to_pickle("elasticity_{}.pickle.gz".format(target)) 79 | 80 | for s in df["structure"]: 81 | if any([e.is_noble_gas for e in s.composition.elements]): 82 | print(s.composition) 83 | 84 | print(df) 85 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/mp_gaps.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - mp_gaps 4 | - mp_is_metal 5 | 6 | From matminer's dataset library. 7 | """ 8 | 9 | from pymatgen import MPRester 10 | from matminer.datasets.dataset_retrieval import load_dataset 11 | from matminer.data_retrieval.retrieve_MP import MPDataRetrieval 12 | import pandas as pd 13 | import numpy as np 14 | from tqdm import tqdm 15 | 16 | 17 | pd.set_option("display.max_rows", 500) 18 | pd.set_option("display.max_columns", 500) 19 | pd.set_option("display.width", 1000) 20 | 21 | chunksize = 1000 22 | 23 | mpdr = MPDataRetrieval() 24 | mpr = MPRester() 25 | 26 | 27 | def chunks(l, n): 28 | """Yield successive n-sized chunks from l.""" 29 | for i in range(0, len(l), n): 30 | yield l[i : i + n] 31 | 32 | 33 | df = mpdr.get_dataframe( 34 | criteria={ 35 | "e_above_hull": {"$lt": 0.150}, 36 | "formation_energy_per_atom": {"$lt": 0.150}, 37 | "band_gap": {"$exists": 1, "$ne": None}, 38 | }, 39 | properties=["material_id", "warnings"], 40 | index_mpid=False, 41 | ) 42 | 43 | print(df["warnings"].astype(str).value_counts()) 44 | 45 | 46 | structures = pd.DataFrame({"structure": [], "material_id": [], "band_gap": []}) 47 | 48 | for chunk in tqdm(chunks(range(len(df)), chunksize)): 49 | print(chunk[0], chunk[-1]) 50 | mpids = df.loc[chunk[0] : chunk[-1], "material_id"].tolist() 51 | stchunk = mpdr.get_dataframe( 52 | criteria={"material_id": {"$in": mpids}}, 53 | properties=["structure", "material_id", "band_gap"], 54 | index_mpid=False, 55 | ) 56 | structures = pd.concat([stchunk, structures]) 57 | df = pd.merge(structures, df) 58 | df = df.dropna() 59 | # df.to_pickle("mp_gap_dumb.pickle") 60 | 61 | 62 | # df = pd.read_pickle("mp_gap_dumb.pickle") 63 | 64 | df = df.rename(columns={"band_gap": "gap pbe"}) 65 | df["is_metal"] = df["gap pbe"] == 0 66 | df = df.reset_index(drop=True) 67 | 68 | 69 | # df = pd.read_pickle("mp_is_metal.pickle.gz") 70 | # print(df.shape) 71 | df["is_noble"] = [ 72 | any([e.is_noble_gas for e in s.composition.elements]) for s in df["structure"] 73 | ] 74 | dfnoble = df[df["is_noble"]] 75 | print("Size of noble gas containing:", dfnoble.shape) 76 | 77 | df = df[~df["is_noble"]] 78 | # df = df.drop(columns=["is_noble"]) 79 | df = df.reset_index(drop=True) 80 | print(df) 81 | # df.to_pickle("mp_is_metal.pickle.gz") 82 | for target in ["gap pbe", "is_metal"]: 83 | dftemp = df[["structure", target]] 84 | dftemp.to_pickle("mp_{}.pickle.gz".format(target)) 85 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/phonons.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - phonons 4 | 5 | From matminer's dataset library. 6 | """ 7 | 8 | from matminer.datasets.dataset_retrieval import load_dataset 9 | from matminer.data_retrieval.retrieve_MP import MPDataRetrieval 10 | 11 | 12 | import pandas as pd 13 | 14 | # pd.set_option('display.height', 1000) 15 | pd.set_option("display.max_rows", 500) 16 | pd.set_option("display.max_columns", 500) 17 | pd.set_option("display.width", 1000) 18 | 19 | mpdr = MPDataRetrieval() 20 | 21 | 22 | df = load_dataset("phonon_dielectric_mp") 23 | 24 | print(df) 25 | 26 | mpids = df["mpid"].tolist() 27 | dfe = mpdr.get_dataframe( 28 | criteria={"material_id": {"$in": mpids}}, 29 | properties=["e_above_hull", "formation_energy_per_atom", "material_id"], 30 | index_mpid=False, 31 | ) 32 | dfe = dfe.rename(columns={"material_id": "mpid"}) 33 | 34 | df = pd.merge(df, dfe, how="inner") 35 | 36 | 37 | df = df[(df["e_above_hull"] < 0.150) & (df["formation_energy_per_atom"] < 0.150)] 38 | df = df[["structure", "last phdos peak"]] 39 | df = df.reset_index(drop=True) 40 | 41 | print(df) 42 | 43 | df.to_pickle("phonons.pickle.gz") 44 | 45 | 46 | df = pd.read_pickle("phonons.pickle.gz") 47 | for s in df["structure"]: 48 | if any([e.is_noble_gas for e in s.composition.elements]): 49 | print(s.composition) 50 | 51 | print(df) 52 | -------------------------------------------------------------------------------- /automatminer_dev/matbench/steels.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file makes the following benchmarking datasets: 3 | - steels 4 | 5 | From matminer's dataset library. 6 | """ 7 | 8 | from matminer.datasets.dataset_retrieval import load_dataset 9 | 10 | 11 | if __name__ == "__main__": 12 | df = load_dataset("steel_strength") 13 | df = df[["formula", "yield strength"]] 14 | df = df.rename(columns={"formula": "composition"}) 15 | print(df) 16 | df.to_pickle("steels.pickle.gz") 17 | -------------------------------------------------------------------------------- /automatminer_dev/submit.py: -------------------------------------------------------------------------------- 1 | from automatminer_dev.config import ( 2 | LP, 3 | KFOLD_DEFAULT, 4 | RUN_TESTS_CMD, 5 | BENCHMARK_DEBUG_SET, 6 | BENCHMARK_FULL_SET, 7 | ) 8 | from automatminer_dev.workflows.bench import wf_evaluate_build, wf_benchmark 9 | from automatminer_dev.workflows.single import wf_single_fit, wf_run_test 10 | 11 | """ 12 | Running benchmarks 13 | """ 14 | 15 | 16 | if __name__ == "__main__": 17 | 18 | N_JOBS = 10 19 | 20 | pipe_config = { 21 | "learner_name": "TPOTAdaptor", 22 | # "learner_kwargs": {"generations": 100, "population_size": 100, "memory": "auto", "n_jobs": 10, "max_eval_time_mins": 5}, 23 | # "learner_kwargs": {"max_time_mins": 1440, "max_eval_time_mins": 20, "population_size": 100, "memory": "auto", "n_jobs": 10}, 24 | "learner_kwargs": { 25 | "max_time_mins": 1440, 26 | "max_eval_time_mins": 20, 27 | "population_size": 200, 28 | "memory": "auto", 29 | "n_jobs": N_JOBS, 30 | }, 31 | # "reducer_kwargs": {"reducers": ("corr",)}, 32 | "reducer_kwargs": { 33 | "reducers": ("corr", "tree"), 34 | "tree_importance_percentile": 0.99, 35 | }, 36 | # "reducer_kwargs": {"reducers": ("corr", "tree",), "tree_importance_percentile": 0.85}, 37 | # "reducer_kwargs": {"reducers": ("pca",), "n_pca_features": 0.3}, 38 | # "reducer_kwargs": {"reducers": ("rebate",), "n_rebate_features": 0.3}, 39 | # "reducer_kwargs": {"reducers": ()}, 40 | "autofeaturizer_kwargs": {"preset": "heavy", "n_jobs": N_JOBS, "do_precheck": False}, 41 | # "autofeaturizer_kwargs": {"preset": "heavy", "n_jobs": 20}, 42 | # "cleaner_kwargs": {"max_na_frac": 0.01, "feature_na_method": "mean", "na_method_fit": "drop", "na_method_transform": "mean"}, 43 | "cleaner_kwargs": { 44 | "max_na_frac": 0.25, 45 | "feature_na_method": "drop", 46 | "na_method_fit": "mean", 47 | "na_method_transform": "mean", 48 | }, 49 | } 50 | 51 | pipe_config_debug = { 52 | "autofeaturizer_kwargs": {"preset": "debug", "n_jobs": N_JOBS}, 53 | "reducer_kwargs": {"reducers": ()}, 54 | "learner_name": "rf", 55 | "learner_kwargs": {"n_estimators": 500}, 56 | "cleaner_kwargs": { 57 | "max_na_frac": 0.01, 58 | "feature_na_method": "drop", 59 | "na_method_fit": "mean", 60 | "na_method_transform": "mean", 61 | }, 62 | } 63 | 64 | tags = [ 65 | # "data_full", 66 | # "drop_mean", 67 | # "af_best", 68 | # "af_debug", 69 | # "rf", 70 | # "debug", 71 | # "no_reduction" 72 | # "tpot_limited_mem" 73 | # "corr_only", 74 | # "drop_mean", 75 | # "af_fast", 76 | # "tpot_generations", 77 | "debug" 78 | ] 79 | 80 | from automatminer_dev.config import EXPT_IS_METAL, EXPT_GAP, MP_E_FORM, GLASS 81 | worker = "lrc" 82 | # wf = wf_benchmark(worker, pipe_config, **EXPT_IS_METAL, cache=True, tags=tags, prepend_name="heavy featurization") 83 | # wf = wf_benchmark(worker, pipe_config, **EXPT_GAP, cache=True, tags=tags, prepend_name="heavy featurization") 84 | # wf = wf_benchmark(worker, pipe_config, **GLASS, cache=True, tags=tags, prepend_name="heavy featurization") 85 | 86 | 87 | 88 | # wf = wf_benchmark(worker, pipe_config, **EXPT_GAP, cache=True, tags=tags) 89 | # wf = wf_benchmark(worker, pipe_config, **EXPT_IS_METAL, cache=True, tags=tags) 90 | wf = wf_benchmark(worker, pipe_config_debug, **GLASS, cache=True, tags=tags, prepend_name="glass rf") 91 | 92 | 93 | 94 | # wf = wf_evaluate_build( 95 | # "cori", 96 | # "24 hr tpot express 99% reducing with all mean cleaning samples", 97 | # BENCHMARK_FULL_SET, 98 | # pipe_config, 99 | # include_tests=False, 100 | # cache=True, 101 | # tags=tags, 102 | # ) 103 | 104 | # wf = wf_run_test("local", "initial_test") 105 | 106 | # wf = wf_evaluate_build("local", "test_local", BENCHMARK_DEBUG_SET, pipe_config_debug) 107 | 108 | # wf = wf_evaluate_build("cori", "rf run for comparison to the paper", BENCHMARK_FULL_SET, 109 | # pipe_config_debug, include_tests=False, cache=True, tags=tags) 110 | 111 | # LP.reset(password=None, require_password=False) 112 | LP.add_wf(wf) 113 | -------------------------------------------------------------------------------- /automatminer_dev/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer_dev/tasks/__init__.py -------------------------------------------------------------------------------- /automatminer_dev/tasks/single.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | from fireworks import FireTaskBase, explicit_serialize 5 | from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor 6 | from matminer.utils.io import load_dataframe_from_json 7 | 8 | from automatminer.featurization import AutoFeaturizer 9 | from automatminer.preprocessing import DataCleaner, FeatureReducer 10 | from automatminer.automl.adaptors import TPOTAdaptor, SinglePipelineAdaptor 11 | from automatminer.pipeline import MatPipe 12 | 13 | 14 | @explicit_serialize 15 | class RunSingleFit(FireTaskBase): 16 | _fw_name = "RunSinglePipe" 17 | 18 | def run_task(self, fw_spec): 19 | # Read data from fw_spec 20 | pipe_config_dict = fw_spec["pipe_config"] 21 | target = fw_spec["target"] 22 | data_file = fw_spec["data_file"] 23 | learner_name = pipe_config_dict["learner_name"] 24 | learner_kwargs = pipe_config_dict["learner_kwargs"] 25 | reducer_kwargs = pipe_config_dict["reducer_kwargs"] 26 | cleaner_kwargs = pipe_config_dict["cleaner_kwargs"] 27 | autofeaturizer_kwargs = pipe_config_dict["autofeaturizer_kwargs"] 28 | 29 | # Modify data_file based on computing resource 30 | data_dir = os.environ["AMM_DATASET_DIR"] 31 | data_file = os.path.join(data_dir, data_file) 32 | 33 | # Modify save_dir based on computing resource 34 | bench_dir = os.environ["AMM_SINGLE_FIT_DIR"] 35 | base_save_dir = fw_spec["base_save_dir"] 36 | base_save_dir = os.path.join(bench_dir, base_save_dir) 37 | 38 | if not os.path.exists(base_save_dir): 39 | os.makedirs(base_save_dir) 40 | 41 | # Set up pipeline config 42 | if learner_name == "TPOTAdaptor": 43 | learner = TPOTAdaptor(**learner_kwargs) 44 | elif learner_name == "rf": 45 | warnings.warn( 46 | "Learner kwargs passed into RF regressor/classifiers bc. rf being used." 47 | ) 48 | learner = SinglePipelineAdaptor( 49 | regressor=RandomForestRegressor(**learner_kwargs), 50 | classifier=RandomForestClassifier(**learner_kwargs), 51 | ) 52 | else: 53 | raise ValueError("{} not supported yet!" "".format(learner_name)) 54 | pipe_config = { 55 | "learner": learner, 56 | "reducer": FeatureReducer(**reducer_kwargs), 57 | "cleaner": DataCleaner(**cleaner_kwargs), 58 | "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs), 59 | } 60 | pipe = MatPipe(**pipe_config) 61 | 62 | # Set up dataset 63 | # Dataset should already be set up correctly as json beforehand. 64 | # this includes targets being converted to classification, removing 65 | # extra columns, having the names of featurization cols set to the 66 | # same as the matpipe config, etc. 67 | df = load_dataframe_from_json(data_file) 68 | 69 | pipe.fit(df, target) 70 | pipe.save(os.path.join(base_save_dir, "pipe.p")) 71 | -------------------------------------------------------------------------------- /automatminer_dev/workflows/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/automatminer_dev/workflows/__init__.py -------------------------------------------------------------------------------- /automatminer_dev/workflows/single.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | import subprocess 4 | from datetime import datetime 5 | 6 | from fireworks import Firework, Workflow 7 | from paramiko import SSHClient 8 | from scp import SCPClient 9 | from matminer.utils.io import store_dataframe_as_json 10 | 11 | from automatminer_dev.tasks.single import RunSingleFit 12 | from automatminer_dev.workflows.util import get_test_fw, get_last_commit 13 | from automatminer_dev.workflows.util import ( 14 | get_last_commit, 15 | get_time_str, 16 | VALID_FWORKERS, 17 | ) 18 | 19 | 20 | def wf_single_fit( 21 | fworker, fit_name, pipe_config, name, df, target, tags=None 22 | ): 23 | """ 24 | Submit a dataset to be fit for a single pipeline (i.e., to train on a 25 | dataset for real predictions). 26 | """ 27 | 28 | # todo this is not working probably 29 | warnings.warn( 30 | "Single fitted MatPipe not being stored in automatminer db " 31 | "collections. Please consult fw_spec to find the benchmark " 32 | "on {}".format(fworker) 33 | ) 34 | if fworker not in VALID_FWORKERS: 35 | raise ValueError("fworker must be in {}".format(VALID_FWORKERS)) 36 | 37 | data_file = None 38 | 39 | now = get_time_str() 40 | base_save_dir = now + "_single_fit" 41 | 42 | spec = { 43 | "pipe_config": pipe_config, 44 | "base_save_dir": base_save_dir, 45 | "data_file": data_file, 46 | "target": target, 47 | "automatminer_commit": get_last_commit(), 48 | "tags": tags if tags else [], 49 | "_fworker": fworker, 50 | } 51 | 52 | fw_name = "{} single fit".format(name) 53 | wf_name = "single fit: {} ({}) [{}]".format(name, fit_name, fworker) 54 | 55 | fw = Firework(RunSingleFit(), spec=spec, name=fw_name) 56 | wf = Workflow([fw], metadata={"tags": tags}, name=wf_name) 57 | return wf 58 | 59 | 60 | def wf_run_test(fworker, test_name): 61 | commit = get_last_commit() 62 | wf_name = "run tests: {} [{}]".format(test_name, commit) 63 | 64 | add_to_spec = { 65 | "commit": commit, 66 | } 67 | 68 | test_fw = get_test_fw(fworker, add_to_spec=add_to_spec) 69 | wf = Workflow([test_fw], metadata={"tags": "test"}, name=wf_name) 70 | return wf 71 | 72 | 73 | # This does work 74 | def transfer_data(df, worker, now): 75 | this_dir = os.path.dirname(os.path.abspath(__file__)) 76 | user_folder = os.path.join(this_dir, "user_dfs") 77 | if not os.path.exists(user_folder): 78 | os.makedirs(user_folder) 79 | filename = "user_df_" + now + ".json" 80 | filepath = os.path.join(user_folder, filename) 81 | store_dataframe_as_json(df, filepath) 82 | 83 | if worker != "local": 84 | if worker == "cori": 85 | o = subprocess.check_output( 86 | ['bash', '-c', '. ~/.bash_profile; cori_get_password'] 87 | ) 88 | user = os.environ["CORI_USER"] 89 | host = "lrc-login.lbl.gov" 90 | elif worker == "lrc": 91 | o = subprocess.check_output( 92 | ['bash', '-c', '. ~/.bash_profile; lrc_get_password'] 93 | ) 94 | user = os.environ["LRC_USER"] 95 | host = "lrc-login.lbl.gov" 96 | else: 97 | raise ValueError(f"Worker {worker} not valid!") 98 | 99 | o_utf = o.decode("utf-8") 100 | o_all = o_utf.split("\n") 101 | o_all.remove("") 102 | password = o_all[-1] 103 | 104 | ssh = SSHClient() 105 | ssh.load_system_host_keys() 106 | ssh.connect(host, username=user, password=password, look_for_keys=False) 107 | 108 | with SCPClient(ssh.get_transport()) as scp: 109 | scp.put(filepath, recursive=True, 110 | remote_path="/global/home/users/ardunn") 111 | else: 112 | pass 113 | 114 | 115 | if __name__ == "__main__": 116 | import pandas as pd 117 | from matminer.datasets import load_dataset 118 | from automatminer_dev.workflows.util import get_time_str 119 | 120 | df = load_dataset("matbench_jdft2d") 121 | transfer_data(df, "lrc", get_time_str()) 122 | -------------------------------------------------------------------------------- /automatminer_dev/workflows/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | 4 | from fireworks import Firework, ScriptTask 5 | import git 6 | import automatminer 7 | 8 | from automatminer_dev.config import RUN_TESTS_CMD, EXPORT_COV_CMD 9 | 10 | VALID_FWORKERS = ["local", "cori", "lrc"] 11 | 12 | 13 | def get_last_commit(): 14 | file = automatminer.__file__ 15 | top_dir = os.path.join(os.path.dirname(file), "../") 16 | repo = git.Repo(top_dir) 17 | return str(repo.head.commit) 18 | 19 | 20 | def get_time_str(): 21 | return datetime.datetime.now().strftime("%Y.%m.%d_%H:%M:%S") 22 | 23 | 24 | def get_test_fw(fworker, build_id=None, add_to_spec=None): 25 | spec = {"_fworker": fworker} 26 | 27 | if not build_id: 28 | build_id = "no_build" 29 | 30 | if add_to_spec: 31 | spec.update(add_to_spec) 32 | 33 | run_test = ScriptTask(script=RUN_TESTS_CMD) 34 | export_coverage = ScriptTask(script=EXPORT_COV_CMD) 35 | fw_test = Firework( 36 | [run_test, export_coverage], 37 | spec=spec, 38 | name="run tests ({})".format(build_id) 39 | ) 40 | return fw_test 41 | -------------------------------------------------------------------------------- /automatminer_web/README.md: -------------------------------------------------------------------------------- 1 | # Automatminer web 2 | 3 | `automatminer_web` is a plotly dash dashboard for. 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /automatminer_web/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" -------------------------------------------------------------------------------- /automatminer_web/app.py: -------------------------------------------------------------------------------- 1 | import dash 2 | from flask_caching import Cache 3 | 4 | """ 5 | A safe place for the dash app to hang out. 6 | """ 7 | 8 | app = dash.Dash( 9 | __name__, 10 | ) 11 | app.css.config.serve_locally = True 12 | app.scripts.config.serve_locally = True 13 | app.config.suppress_callback_exceptions = True 14 | app.title = "automatminer" 15 | cache = Cache(app.server, config={'CACHE_TYPE': 'simple'}) -------------------------------------------------------------------------------- /automatminer_web/assets/ammw.css: -------------------------------------------------------------------------------- 1 | .ammw-dark-bg { 2 | color: "white", 3 | background-color: #101010 4 | } -------------------------------------------------------------------------------- /automatminer_web/index.py: -------------------------------------------------------------------------------- 1 | import dash_core_components as dcc 2 | import dash_html_components as html 3 | from dash.dependencies import Input, Output, State 4 | 5 | from automatminer_web.app import app 6 | 7 | external_font = \ 8 | html.Link( 9 | href="https://fonts.googleapis.com/css?family=Ubuntu&display=swap", 10 | rel="stylesheet", 11 | className="is-hidden" 12 | ) 13 | 14 | external_bulma = \ 15 | html.Link( 16 | href="https://cdn.jsdelivr.net/npm/bulma@0.8.0/css/bulma.css", 17 | rel="stylesheet", 18 | className="is-hidden" 19 | ) 20 | external_stylesheets = html.Div(children=[external_bulma, external_font]) 21 | location = dcc.Location(id="url", refresh=False) 22 | app_container = html.Div(id="app_container") 23 | 24 | 25 | app.layout = html.Div( 26 | [ 27 | external_stylesheets, 28 | location, 29 | app_container 30 | ], 31 | className="ammw-dark-bg" 32 | ) 33 | 34 | 35 | # Top level callbacks 36 | ####################### 37 | # callbacks for loading different apps or are present on every page 38 | 39 | @app.callback( 40 | Output('app_container', 'children'), 41 | [Input('url', 'pathname')] 42 | ) 43 | def display_page(path): 44 | if str(path).strip() in ["/", "/search"] or not path: 45 | return html.Div("404", className="has-text-centered") 46 | -------------------------------------------------------------------------------- /automatminer_web/info.py: -------------------------------------------------------------------------------- 1 | import dash_core_components as dcc 2 | import dash_html_components as html 3 | 4 | 5 | 6 | # def info_html -------------------------------------------------------------------------------- /dev_scripts/run_code_style_check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Check PEP8 compliance with Flake8 3 | 4 | formatting_errors=$(flake8 automatminer) 5 | # https://unix.stackexchange.com/questions/146942/how-can-i-test-if-a-variable-is-empty-or-contains-only-spaces 6 | if [ -z "${formatting_errors// }" ] 7 | then 8 | echo "Code is well-formatted." 9 | exit 0 10 | else 11 | >&2 echo "Code misformatted!" 12 | >&2 echo "$formatting_errors" 13 | exit 1 14 | fi 15 | -------------------------------------------------------------------------------- /dev_scripts/run_intensive_circleci.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # Submit and run an intensive test to circleci. 5 | 6 | curl \ 7 | --user ${CIRCLE_CI_TOKEN}: \ 8 | --header "Content-Type: application/json" \ 9 | --data "{\"build_parameters\": {\"CIRCLE_JOB\": \"py372\", \"SKIP_INTENSIVE\": \"0\"}}" \ 10 | --request POST "https://circleci.com/api/v1.1/project/github/hackingmaterials/automatminer/tree/master" -------------------------------------------------------------------------------- /dev_scripts/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | coverage run setup.py test 4 | coverage xml 5 | if [[ ! -z "$CODACY_PROJECT_TOKEN" ]]; then 6 | python-codacy-coverage -r coverage.xml 7 | else 8 | echo "No Codacy Project Token Defined, skipping..." 9 | fi -------------------------------------------------------------------------------- /dev_scripts/setup_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "SKIPPING INTENSIVE TESTS? $SKIP_INTENSIVE" 4 | python3 -m venv test_env 5 | . test_env/bin/activate 6 | pip install -q --upgrade pip 7 | pip install -q wheel 8 | 9 | pip install -e . 10 | pip install -q coverage 11 | pip install -q codacy-coverage -------------------------------------------------------------------------------- /dev_scripts/setup_env_dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python3 -m venv test_env 4 | . test_env/bin/activate 5 | pip install --quiet -r requirements_dev.txt 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/.nojekyll -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = automatminer 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_images/cv_nested.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_images/cv_nested.png -------------------------------------------------------------------------------- /docs/_images/dataframe_pipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_images/dataframe_pipe.png -------------------------------------------------------------------------------- /docs/_images/forum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_images/forum.png -------------------------------------------------------------------------------- /docs/_images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_images/logo.png -------------------------------------------------------------------------------- /docs/_images/matbench_pie_charts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_images/matbench_pie_charts.png -------------------------------------------------------------------------------- /docs/_images/matminer_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_images/matminer_examples.png -------------------------------------------------------------------------------- /docs/_images/pipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_images/pipe.png -------------------------------------------------------------------------------- /docs/_sources/automatminer.automl.config.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.automl.config package 2 | ================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.automl.config.tpot\_configs module 8 | ----------------------------------------------- 9 | 10 | .. automodule:: automatminer.automl.config.tpot_configs 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: automatminer.automl.config 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.automl.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.automl package 2 | =========================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.automl.config 10 | automatminer.automl.tests 11 | 12 | Submodules 13 | ---------- 14 | 15 | automatminer.automl.adaptors module 16 | ----------------------------------- 17 | 18 | .. automodule:: automatminer.automl.adaptors 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | automatminer.automl.base module 24 | ------------------------------- 25 | 26 | .. automodule:: automatminer.automl.base 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: automatminer.automl 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.automl.tests.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.automl.tests package 2 | ================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.automl.tests.test\_adaptors module 8 | ----------------------------------------------- 9 | 10 | .. automodule:: automatminer.automl.tests.test_adaptors 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | automatminer.automl.tests.test\_base module 16 | ------------------------------------------- 17 | 18 | .. automodule:: automatminer.automl.tests.test_base 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: automatminer.automl.tests 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.featurization.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.featurization package 2 | ================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.featurization.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | automatminer.featurization.base module 15 | -------------------------------------- 16 | 17 | .. automodule:: automatminer.featurization.base 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | automatminer.featurization.core module 23 | -------------------------------------- 24 | 25 | .. automodule:: automatminer.featurization.core 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | automatminer.featurization.sets module 31 | -------------------------------------- 32 | 33 | .. automodule:: automatminer.featurization.sets 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: automatminer.featurization 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.featurization.tests.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.featurization.tests package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.featurization.tests.test\_base module 8 | -------------------------------------------------- 9 | 10 | .. automodule:: automatminer.featurization.tests.test_base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | automatminer.featurization.tests.test\_core module 16 | -------------------------------------------------- 17 | 18 | .. automodule:: automatminer.featurization.tests.test_core 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | automatminer.featurization.tests.test\_sets module 24 | -------------------------------------------------- 25 | 26 | .. automodule:: automatminer.featurization.tests.test_sets 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: automatminer.featurization.tests 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.preprocessing.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.preprocessing package 2 | ================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.preprocessing.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | automatminer.preprocessing.core module 15 | -------------------------------------- 16 | 17 | .. automodule:: automatminer.preprocessing.core 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | automatminer.preprocessing.feature\_selection module 23 | ---------------------------------------------------- 24 | 25 | .. automodule:: automatminer.preprocessing.feature_selection 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: automatminer.preprocessing 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.preprocessing.tests.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.preprocessing.tests package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.preprocessing.tests.test\_core module 8 | -------------------------------------------------- 9 | 10 | .. automodule:: automatminer.preprocessing.tests.test_core 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: automatminer.preprocessing.tests 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer package 2 | ==================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.automl 10 | automatminer.featurization 11 | automatminer.preprocessing 12 | automatminer.tests 13 | automatminer.utils 14 | 15 | Submodules 16 | ---------- 17 | 18 | automatminer.base module 19 | ------------------------ 20 | 21 | .. automodule:: automatminer.base 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | automatminer.pipeline module 27 | ---------------------------- 28 | 29 | .. automodule:: automatminer.pipeline 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | automatminer.presets module 35 | --------------------------- 36 | 37 | .. automodule:: automatminer.presets 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | 43 | Module contents 44 | --------------- 45 | 46 | .. automodule:: automatminer 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.tests.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.tests package 2 | ========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.tests.test\_base module 8 | ------------------------------------ 9 | 10 | .. automodule:: automatminer.tests.test_base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | automatminer.tests.test\_pipeline module 16 | ---------------------------------------- 17 | 18 | .. automodule:: automatminer.tests.test_pipeline 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | automatminer.tests.test\_presets module 24 | --------------------------------------- 25 | 26 | .. automodule:: automatminer.tests.test_presets 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: automatminer.tests 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.utils.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.utils package 2 | ========================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.utils.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | automatminer.utils.log module 15 | ----------------------------- 16 | 17 | .. automodule:: automatminer.utils.log 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | automatminer.utils.ml module 23 | ---------------------------- 24 | 25 | .. automodule:: automatminer.utils.ml 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | automatminer.utils.pkg module 31 | ----------------------------- 32 | 33 | .. automodule:: automatminer.utils.pkg 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: automatminer.utils 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/_sources/automatminer.utils.tests.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer.utils.tests package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.utils.tests.test\_log module 8 | ----------------------------------------- 9 | 10 | .. automodule:: automatminer.utils.tests.test_log 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | automatminer.utils.tests.test\_ml module 16 | ---------------------------------------- 17 | 18 | .. automodule:: automatminer.utils.tests.test_ml 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | automatminer.utils.tests.test\_pkg module 24 | ----------------------------------------- 25 | 26 | .. automodule:: automatminer.utils.tests.test_pkg 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: automatminer.utils.tests 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. raw:: html 2 | 3 | 25 | 26 | .. title:: automatminer documentation 27 | 28 | .. image:: _static/logo.png 29 | :alt: server 30 | :align: center 31 | :width: 600px 32 | 33 | 34 | `Automatminer `_ is a tool for 35 | *automatically* creating **complete** machine 36 | learning pipelines for materials science, including automatic featurization 37 | with `matminer `_, feature 38 | reduction, and an AutoML backend. Put in a materials dataset, get out a machine 39 | that predicts materials properties. 40 | 41 | How it works 42 | ------------ 43 | 44 | Automatminer automatically decorates a dataset using hundreds of descriptor 45 | techniques from matminer's descriptor library, picks the most useful 46 | features for learning, and runs a separate AutoML pipeline. 47 | Once a pipeline has been fit, it can be summarized in a text file, saved to 48 | disk, or used to make predictions on new materials. 49 | 50 | .. image:: _static/pipe.png 51 | :alt: server 52 | :align: center 53 | 54 | Automatminer uses `pandas `_ dataframes for all of 55 | its working objects. Put dataframes in, get dataframes out. 56 | 57 | 58 | .. image:: _static/dataframe_pipe.png 59 | :alt: server 60 | :align: center 61 | :width: 800px 62 | 63 | 64 | Here's an example of training on known data, and extending the model to out of 65 | sample data. 66 | 67 | .. code-block:: python 68 | 69 | from automatminer.pipeline import MatPipe 70 | 71 | # Fit a pipeline to training data to predict band gap 72 | pipe = MatPipe() 73 | pipe.fit(train_df, "band gap") 74 | 75 | # Predict bandgap of some unknown materials 76 | predicted_df = pipe.predict(unknown_df) 77 | 78 | Overview 79 | -------- 80 | 81 | **Automatminer can work with many kinds of data:** 82 | 83 | - both computational and experimental data 84 | - small (~100 samples) to moderate (~100k samples) sized datasets 85 | - crystalline datasets 86 | - composition-only (i.e., unknown phases) datasets 87 | - datasets containing electronic bandstructures or density of states 88 | 89 | **Many kinds of target properties:** 90 | 91 | - electronic 92 | - mechanical 93 | - thermodynamic 94 | - any other kind of property 95 | 96 | **And many featurization (descriptor) techniques:** 97 | 98 | See `matminer's Table of Featurizers `_ 99 | for a full (and growing) list. 100 | 101 | 102 | **Automatminer is designed to be easy to use and reproducible** 103 | 104 | - Save pipelines which are portable across machines 105 | - Fit a complete pipeline with 1 line of code 106 | - Predict on new samples with 1 line of code 107 | - Presets for easy setup 108 | 109 | **Automatminer is automatic and accurate** 110 | 111 | - No hand tuning required 112 | - Comparable in accuracy to hand-tuned models in benchmark tests 113 | 114 | 115 | User manual 116 | -------------- 117 | 118 | .. toctree:: 119 | :maxdepth: 2 120 | 121 | installation.rst 122 | basic.rst 123 | advanced.rst 124 | datasets.rst 125 | tutorials.rst 126 | license.rst 127 | 128 | 129 | .. toctree:: 130 | :hidden: 131 | :maxdepth: 2 132 | 133 | Python API 134 | 135 | What's new? 136 | ----------- 137 | 138 | Track changes to automatminer through the `changelog 139 | `_. 140 | 141 | Contributing / Contact / Support 142 | -------------------------------- 143 | 144 | Want to see something added or changed? Some ways to get involved are: 145 | 146 | - Help us improve the documentation – tell us where you got stuck and improve 147 | the install process for everyone. 148 | - Let us know if you'd like to see certain features. 149 | - Point us to areas of the code that are difficult to understand or use. 150 | - Contribute code! You can do this by forking 151 | `Automatminer on Github `_ 152 | and submitting a pull request. 153 | - Post to our `support forum `_. Don't be shy, we look forward to feedback! 154 | 155 | See our `contribution guidelines 156 | `_ 157 | for more inspect. For a list of contributors, see our 158 | `GitHub page `_ 159 | 160 | Citing Automatminer or MatBench 161 | -------------------------------- 162 | If you find Automatminer or the MatBench benchmarks helpful in your research, 163 | please consider citing our `publication in npj Computational Materials `_: 164 | 165 | 166 | .. code-block:: text 167 | 168 | Dunn, A., Wang, Q., Ganose, A., Dopp, D., Jain, A. Benchmarking Materials Property Prediction 169 | Methods: The Matbench Test Set and Automatminer Reference Algorithm. npj Computational Materials 170 | 6, 138 (2020). https://doi.org/10.1038/s41524-020-00406-3 171 | 172 | 173 | API documentation 174 | ------------------ 175 | 176 | Autogenerated API documentation. Beware! Only for the brave. 177 | 178 | - :ref:`modindex` 179 | - :ref:`genindex` 180 | - :ref:`search` 181 | 182 | 183 | 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /docs/_sources/installation.rst.txt: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Automatminer supports Python 3.6.7 and Python 3.7.1+ on MacOS and Linux. Windows 5 | users may be able to install Automatminer (and we will try to help you as much 6 | as possible on the `forum `_), but it is not officially 7 | supported. 8 | 9 | 10 | From PyPi (using pip) 11 | --------------------- 12 | 13 | You can install the latest released version of automatminer through pip 14 | 15 | .. code-block:: bash 16 | 17 | pip install automatminer 18 | 19 | 20 | From source 21 | ----------- 22 | 23 | To install Automatminer from source, first clone the repository from GitHub, 24 | then use pip to install: 25 | 26 | .. code-block:: bash 27 | 28 | git clone https://github.com/hackingmaterials/automatminer.git 29 | cd automatminer 30 | pip install . 31 | 32 | If not installing from inside a virtual environment or conda environment, you 33 | may need to specify to install as a *user* via: 34 | 35 | .. code-block:: bash 36 | 37 | pip install . --user 38 | 39 | -------------------------------------------------------------------------------- /docs/_sources/license.rst.txt: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | Automatminer is developed under a BSD-style License (an open-source license), 5 | reproduced below: 6 | 7 | .. include:: ../../LICENSE 8 | :literal: 9 | -------------------------------------------------------------------------------- /docs/_sources/modules.rst.txt: -------------------------------------------------------------------------------- 1 | automatminer 2 | ============ 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | automatminer 8 | -------------------------------------------------------------------------------- /docs/_sources/tutorials.rst.txt: -------------------------------------------------------------------------------- 1 | Tutorials and Support 2 | ===================== 3 | 4 | Tutorials 5 | --------- 6 | 7 | There are jupyter notebook tutorials for Automatminer hosted on the 8 | `matminer examples `_ 9 | repository on Github. 10 | 11 | .. image:: _static/matminer_examples.png 12 | :alt: server 13 | :align: center 14 | :width: 600px 15 | 16 | 17 | Support 18 | ------- 19 | 20 | Online support for all of our codes can be found on our 21 | `support forum `_. This is the place 22 | to troubleshoot any problems you might have with the software developers. 23 | 24 | .. image:: _static/forum.png 25 | :alt: server 26 | :align: center 27 | :width: 600px -------------------------------------------------------------------------------- /docs/_sources/using.rst.txt: -------------------------------------------------------------------------------- 1 | Using Automatminer 2 | ================== 3 | 4 | 5 | Training a pipeline 6 | --------------------- 7 | 8 | 9 | Making predictions 10 | ------------------- 11 | 12 | 13 | 14 | Running a benchmark 15 | ------------------- 16 | 17 | 18 | 19 | Customizing a pipeline 20 | ---------------------- 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* Avoid white space above the logo */ 2 | div.document { 3 | margin-top: 20px 4 | } 5 | div.body { 6 | padding-top: 30px; 7 | } 8 | 9 | div.sphinxsidebar { 10 | width: 230px; 11 | 12 | } 13 | 14 | div.body { 15 | min-width: 450px; 16 | max-width: 800px; 17 | } 18 | 19 | 20 | /* Main page title */ 21 | div.body h1 { 22 | text-align: center; 23 | font-size: 250%; 24 | color: #47567F; 25 | } 26 | 27 | /* Secondary sections title */ 28 | div.body h2 { 29 | color: #47567F; 30 | } 31 | 32 | /* Python examples code block */ 33 | div.highlight pre { 34 | background-color: #f5f5f5; 35 | font-size: 80%; 36 | padding-left: 10px; 37 | padding-right: 10px; 38 | } 39 | 40 | /* restyle table */ 41 | div.body table.docutils tr{ 42 | background: #ccc; /* fallback if nth-child is not supported */ 43 | } 44 | div.body table.docutils tr:nth-child(odd){ 45 | background: #f8f4ee; 46 | } 47 | div.body table.docutils tr:nth-child(even){ 48 | background: #fff; 49 | } 50 | div.body table.docutils td{ 51 | border: none; 52 | } 53 | 54 | /* Move the ads down in read the docs */ 55 | div.sphinxsidebarwrapper ul:last-of-type { 56 | margin-top: -10px; 57 | margin-bottom: 100px; 58 | } 59 | 60 | /* But don't apply this to nested ul */ 61 | div.sphinxsidebarwrapper ul ul:last-of-type { 62 | margin-top: 0px; 63 | margin-bottom: 20px; 64 | } 65 | 66 | a.reference { 67 | border-bottom: 1px dotted #576AA0; 68 | } 69 | 70 | a { 71 | color: #576AA0; 72 | 73 | } -------------------------------------------------------------------------------- /docs/_static/cv_nested.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/cv_nested.png -------------------------------------------------------------------------------- /docs/_static/dataframe_pipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/dataframe_pipe.png -------------------------------------------------------------------------------- /docs/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '1.0.3.20200727', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | FILE_SUFFIX: '.html', 7 | HAS_SOURCE: true, 8 | SOURCELINK_SUFFIX: '.txt', 9 | NAVIGATION_WITH_KEYS: false 10 | }; -------------------------------------------------------------------------------- /docs/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/favicon.ico -------------------------------------------------------------------------------- /docs/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/file.png -------------------------------------------------------------------------------- /docs/_static/forum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/forum.png -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/_static/logo_header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/logo_header.png -------------------------------------------------------------------------------- /docs/_static/logo_lowres.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/logo_lowres.png -------------------------------------------------------------------------------- /docs/_static/matbench_pie_charts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/matbench_pie_charts.png -------------------------------------------------------------------------------- /docs/_static/matminer_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/matminer_examples.png -------------------------------------------------------------------------------- /docs/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/minus.png -------------------------------------------------------------------------------- /docs/_static/nature.css: -------------------------------------------------------------------------------- 1 | /* 2 | * nature.css_t 3 | * ~~~~~~~~~~~~ 4 | * 5 | * Sphinx stylesheet -- nature theme. 6 | * 7 | * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | @import url("basic.css"); 13 | /* -- page layout ----------------------------------------------------------- */ 14 | 15 | body { 16 | font-family: "Lato", sans-serif; 17 | font-size: 100%; 18 | background-color: #fff; 19 | color: #ffffff; 20 | margin: 0; 21 | padding: 0; 22 | } 23 | 24 | div.documentwrapper { 25 | float: left; 26 | width: 100%; 27 | } 28 | 29 | div.bodywrapper { 30 | margin: 0 0 0 230px; 31 | } 32 | 33 | hr { 34 | border: 1px solid #B1B4B6; 35 | } 36 | 37 | div.document { 38 | /* the light gray behind Show Source and Quick search box*/ 39 | background-color: #eee; 40 | } 41 | 42 | div.body { 43 | /* The background of the main document*/ 44 | background-color: #ffffff; 45 | 46 | /* Color of text in main document*/ 47 | color: #000000; 48 | padding: 0 30px 30px 30px; 49 | font-size: 0.95em; 50 | max-width: 1200px 51 | 52 | } 53 | 54 | div.footer { 55 | background: none repeat scroll 0 0 #eeeeee; 56 | color: #555; 57 | width: 100%; 58 | padding: 13px 0; 59 | text-align: center; 60 | font-size: 75%; 61 | } 62 | 63 | div.footer a { 64 | /* Color of Sphinx logo in footer*/ 65 | color: #F86709; 66 | /*text-decoration: underline;*/ 67 | } 68 | 69 | div.related { 70 | background-color: #60a483; 71 | line-height: 32px; 72 | color: #fff; 73 | text-shadow: 0px 1px 0 #444; 74 | font-size: 0.9em; 75 | } 76 | 77 | div.related a { 78 | color: #fff; 79 | } 80 | 81 | div.sphinxsidebar { 82 | font-size: 0.9em; 83 | line-height: 1.5em; 84 | max-width: 400px; 85 | 86 | } 87 | 88 | div.sphinxsidebarwrapper{ 89 | padding: 20px 0; 90 | max-width: 400px; 91 | } 92 | 93 | div.sphinxsidebar h3, 94 | div.sphinxsidebar h4 { 95 | font-family: Arial, sans-serif; 96 | color: #222; 97 | font-size: 1.2em; 98 | font-weight: normal; 99 | margin: 0; 100 | padding: 5px 10px; 101 | background-color: #ddd; 102 | text-shadow: 1px 1px 0 white 103 | } 104 | 105 | div.sphinxsidebar h4{ 106 | font-size: 1.1em; 107 | } 108 | 109 | div.sphinxsidebar h3 a { 110 | color: #444; 111 | } 112 | 113 | 114 | div.sphinxsidebar p { 115 | color: #888; 116 | padding: 5px 20px; 117 | } 118 | 119 | div.sphinxsidebar p.topless { 120 | } 121 | 122 | div.sphinxsidebar ul { 123 | margin: 10px 20px; 124 | padding: 0; 125 | color: #000; 126 | } 127 | 128 | div.sphinxsidebar a { 129 | color: #444; 130 | } 131 | 132 | div.sphinxsidebar input { 133 | border: 1px solid #ccc; 134 | font-family: sans-serif; 135 | font-size: 1em; 136 | } 137 | 138 | div.sphinxsidebar input[type=text]{ 139 | margin-left: 20px; 140 | } 141 | 142 | /* -- body styles ----------------------------------------------------------- */ 143 | 144 | a { 145 | color: #60a483; 146 | text-decoration: none; 147 | } 148 | 149 | a:hover { 150 | color: #E32E00; 151 | text-decoration: underline; 152 | } 153 | 154 | div.body h1, 155 | div.body h4, 156 | div.body h5, 157 | div.body h6 { 158 | font-family: "Lato", sans-serif; 159 | background-color: #60a483; 160 | font-weight: normal; 161 | color: #fff; 162 | margin: 30px 0px 10px 0px; 163 | padding: 5px 0 5px 10px; 164 | text-shadow: 0px 0px 0 black 165 | } 166 | 167 | div.body h1 { border-top: 20px ; margin-top: 0; font-size: 185%; ; font-family: "Helvetica", serif} 168 | div.body h2 { font-size: 130%; color: #60a483; font-family: "Helvetica", serif} 169 | div.body h3 { font-size: 130%; color: #446; font-family: "Palatino", serif} 170 | div.body h4 { font-size: 110%; background-color: #60a483; } 171 | div.body h5 { font-size: 100%; background-color: #60a483; } 172 | div.body h6 { font-size: 100%; background-color: #60a483; } 173 | 174 | a.headerlink { 175 | color: #fff; 176 | font-size: 0.8em; 177 | padding: 0 4px 0 4px; 178 | text-decoration: none; 179 | } 180 | 181 | a.headerlink:hover { 182 | background-color: #60a483; 183 | color: white; 184 | } 185 | 186 | div.body p, div.body dd, div.body li { 187 | line-height: 1.5em; 188 | } 189 | 190 | div.admonition p.admonition-title + p { 191 | display: inline; 192 | } 193 | 194 | div.highlight{ 195 | background-color: white; 196 | } 197 | 198 | div.note { 199 | background-color: #eee; 200 | border: 1px solid #ccc; 201 | } 202 | 203 | div.seealso { 204 | background-color: #ffc; 205 | border: 1px solid #ff6; 206 | } 207 | 208 | div.topic { 209 | background-color: #eee; 210 | } 211 | 212 | div.warning { 213 | background-color: #ffe4e4; 214 | border: 1px solid #f66; 215 | } 216 | 217 | p.admonition-title { 218 | display: inline; 219 | } 220 | 221 | p.admonition-title:after { 222 | content: ":"; 223 | } 224 | 225 | pre { 226 | padding: 10px; 227 | background-color: White; 228 | color: #222; 229 | line-height: 1.2em; 230 | border: 1px solid #C6C9CB; 231 | font-size: 1.1em; 232 | margin: 1.5em 0 1.5em 0; 233 | -webkit-box-shadow: 1px 1px 1px #d8d8d8; 234 | -moz-box-shadow: 1px 1px 1px #d8d8d8; 235 | } 236 | 237 | tt { 238 | background-color: #ecf0f3; 239 | color: #222; 240 | /* padding: 1px 2px; */ 241 | font-size: 1.1em; 242 | font-family: monospace; 243 | } 244 | 245 | h2 tt { 246 | color: #fff 247 | } 248 | 249 | .viewcode-back { 250 | font-family: Arial, sans-serif; 251 | } 252 | 253 | div.viewcode-block:target { 254 | background-color: #f4debf; 255 | border-top: 1px solid #ac9; 256 | border-bottom: 1px solid #ac9; 257 | } 258 | 259 | p.caption { 260 | font-family: Times, serif; 261 | font-weight: bold; 262 | } 263 | 264 | .pull-quote { 265 | color: #555; 266 | font-family: "Palatino", serif; 267 | font-size: 90%; 268 | background-color: #eee; 269 | width: 100%; 270 | padding: 10px 10px; 271 | } 272 | -------------------------------------------------------------------------------- /docs/_static/nature.css-e: -------------------------------------------------------------------------------- 1 | /* 2 | * nature.css_t 3 | * ~~~~~~~~~~~~ 4 | * 5 | * Sphinx stylesheet -- nature theme. 6 | * 7 | * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for inspect. 9 | * 10 | */ 11 | 12 | @import url("basic.css"); 13 | /* -- page layout ----------------------------------------------------------- */ 14 | 15 | body { 16 | font-family: "Lato", sans-serif; 17 | font-size: 100%; 18 | background-color: #eee; 19 | color: #555; 20 | margin: 0; 21 | padding: 0; 22 | } 23 | 24 | div.documentwrapper { 25 | float: left; 26 | width: 100%; 27 | } 28 | 29 | div.bodywrapper { 30 | margin: 0 0 0 230px; 31 | } 32 | 33 | hr { 34 | border: 1px solid #B1B4B6; 35 | } 36 | 37 | div.document { 38 | /* the light gray behind Show Source and Quick search box*/ 39 | background-color: #eee; 40 | } 41 | 42 | div.body { 43 | /* The background of the main document*/ 44 | background-color: #ffffff; 45 | 46 | /* Color of text in main document*/ 47 | color: #000000; 48 | padding: 0 30px 30px 30px; 49 | font-size: 0.95em; 50 | max-width: 1200px 51 | 52 | } 53 | 54 | div.footer { 55 | background: none repeat scroll 0 0 #eeeeee; 56 | color: #555; 57 | width: 100%; 58 | padding: 13px 0; 59 | text-align: center; 60 | font-size: 75%; 61 | } 62 | 63 | div.footer a { 64 | /* Color of Sphinx logo in footer*/ 65 | color: #F86709; 66 | /*text-decoration: underline;*/ 67 | } 68 | 69 | div.related { 70 | background-color: #E54C00; 71 | line-height: 32px; 72 | color: #fff; 73 | text-shadow: 0px 1px 0 #444; 74 | font-size: 0.9em; 75 | } 76 | 77 | div.related a { 78 | color: #fff; 79 | } 80 | 81 | div.sphinxsidebar { 82 | font-size: 0.9em; 83 | line-height: 1.5em; 84 | max-width: 400px; 85 | 86 | } 87 | 88 | div.sphinxsidebarwrapper{ 89 | padding: 20px 0; 90 | max-width: 400px; 91 | } 92 | 93 | div.sphinxsidebar h3, 94 | div.sphinxsidebar h4 { 95 | font-family: Arial, sans-serif; 96 | color: #222; 97 | font-size: 1.2em; 98 | font-weight: normal; 99 | margin: 0; 100 | padding: 5px 10px; 101 | background-color: #ddd; 102 | text-shadow: 1px 1px 0 white 103 | } 104 | 105 | div.sphinxsidebar h4{ 106 | font-size: 1.1em; 107 | } 108 | 109 | div.sphinxsidebar h3 a { 110 | color: #444; 111 | } 112 | 113 | 114 | div.sphinxsidebar p { 115 | color: #888; 116 | padding: 5px 20px; 117 | } 118 | 119 | div.sphinxsidebar p.topless { 120 | } 121 | 122 | div.sphinxsidebar ul { 123 | margin: 10px 20px; 124 | padding: 0; 125 | color: #000; 126 | } 127 | 128 | div.sphinxsidebar a { 129 | color: #444; 130 | } 131 | 132 | div.sphinxsidebar input { 133 | border: 1px solid #ccc; 134 | font-family: sans-serif; 135 | font-size: 1em; 136 | } 137 | 138 | div.sphinxsidebar input[type=text]{ 139 | margin-left: 20px; 140 | } 141 | 142 | /* -- body styles ----------------------------------------------------------- */ 143 | 144 | a { 145 | color: #E54C00; 146 | text-decoration: none; 147 | } 148 | 149 | a:hover { 150 | color: #E32E00; 151 | text-decoration: underline; 152 | } 153 | 154 | div.body h1, 155 | div.body h4, 156 | div.body h5, 157 | div.body h6 { 158 | font-family: "Lato", sans-serif; 159 | background-color: #E54C00; 160 | font-weight: normal; 161 | color: #fff; 162 | margin: 30px 0px 10px 0px; 163 | padding: 5px 0 5px 10px; 164 | text-shadow: 0px 0px 0 black 165 | } 166 | 167 | div.body h1 { border-top: 20px ; margin-top: 0; font-size: 185%; ; font-family: "Helvetica", serif} 168 | div.body h2 { font-size: 130%; color: #E54C00; font-family: "Helvetica", serif} 169 | div.body h3 { font-size: 130%; color: #446; font-family: "Palatino", serif} 170 | div.body h4 { font-size: 110%; background-color: #E54C00; } 171 | div.body h5 { font-size: 100%; background-color: #E54C00; } 172 | div.body h6 { font-size: 100%; background-color: #E54C00; } 173 | 174 | a.headerlink { 175 | color: #fff; 176 | font-size: 0.8em; 177 | padding: 0 4px 0 4px; 178 | text-decoration: none; 179 | } 180 | 181 | a.headerlink:hover { 182 | background-color: #E54C00; 183 | color: white; 184 | } 185 | 186 | div.body p, div.body dd, div.body li { 187 | line-height: 1.5em; 188 | } 189 | 190 | div.admonition p.admonition-title + p { 191 | display: inline; 192 | } 193 | 194 | div.highlight{ 195 | background-color: white; 196 | } 197 | 198 | div.note { 199 | background-color: #eee; 200 | border: 1px solid #ccc; 201 | } 202 | 203 | div.seealso { 204 | background-color: #ffc; 205 | border: 1px solid #ff6; 206 | } 207 | 208 | div.topic { 209 | background-color: #eee; 210 | } 211 | 212 | div.warning { 213 | background-color: #ffe4e4; 214 | border: 1px solid #f66; 215 | } 216 | 217 | p.admonition-title { 218 | display: inline; 219 | } 220 | 221 | p.admonition-title:after { 222 | content: ":"; 223 | } 224 | 225 | pre { 226 | padding: 10px; 227 | background-color: White; 228 | color: #222; 229 | line-height: 1.2em; 230 | border: 1px solid #C6C9CB; 231 | font-size: 1.1em; 232 | margin: 1.5em 0 1.5em 0; 233 | -webkit-box-shadow: 1px 1px 1px #d8d8d8; 234 | -moz-box-shadow: 1px 1px 1px #d8d8d8; 235 | } 236 | 237 | tt { 238 | background-color: #ecf0f3; 239 | color: #222; 240 | /* padding: 1px 2px; */ 241 | font-size: 1.1em; 242 | font-family: monospace; 243 | } 244 | 245 | h2 tt { 246 | color: #fff 247 | } 248 | 249 | .viewcode-back { 250 | font-family: Arial, sans-serif; 251 | } 252 | 253 | div.viewcode-block:target { 254 | background-color: #f4debf; 255 | border-top: 1px solid #ac9; 256 | border-bottom: 1px solid #ac9; 257 | } 258 | 259 | p.caption { 260 | font-family: Times, serif; 261 | font-weight: bold; 262 | } 263 | 264 | .pull-quote { 265 | color: #555; 266 | font-family: "Palatino", serif; 267 | font-size: 90%; 268 | background-color: #eee; 269 | width: 100%; 270 | padding: 10px 10px; 271 | } 272 | -------------------------------------------------------------------------------- /docs/_static/pipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/pipe.png -------------------------------------------------------------------------------- /docs/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/_static/plus.png -------------------------------------------------------------------------------- /docs/_static/pygments.css: -------------------------------------------------------------------------------- 1 | .highlight .hll { background-color: #ffffcc } 2 | .highlight { background: #eeffcc; } 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */ 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */ 6 | .highlight .o { color: #666666 } /* Operator */ 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */ 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */ 10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */ 11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ 12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ 13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 14 | .highlight .ge { font-style: italic } /* Generic.Emph */ 15 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 18 | .highlight .go { color: #333333 } /* Generic.Output */ 19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ 20 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */ 23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ 24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ 25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ 26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */ 27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ 28 | .highlight .kt { color: #902000 } /* Keyword.Type */ 29 | .highlight .m { color: #208050 } /* Literal.Number */ 30 | .highlight .s { color: #4070a0 } /* Literal.String */ 31 | .highlight .na { color: #4070a0 } /* Name.Attribute */ 32 | .highlight .nb { color: #007020 } /* Name.Builtin */ 33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ 34 | .highlight .no { color: #60add5 } /* Name.Constant */ 35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ 36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ 37 | .highlight .ne { color: #007020 } /* Name.Exception */ 38 | .highlight .nf { color: #06287e } /* Name.Function */ 39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ 40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ 41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ 42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */ 43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ 44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */ 46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */ 47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */ 48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */ 49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */ 50 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */ 51 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ 52 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */ 53 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */ 54 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ 55 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */ 56 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ 57 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ 58 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ 59 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */ 60 | .highlight .sr { color: #235388 } /* Literal.String.Regex */ 61 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */ 62 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */ 63 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ 64 | .highlight .fm { color: #06287e } /* Name.Function.Magic */ 65 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ 66 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ 67 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ 68 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */ 69 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/automatminer.automl.config.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | automatminer.automl.config package — Automatminer 1.0.3.20200727 documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 64 |
65 |
66 | 67 | 68 |
69 | 70 |
71 |

automatminer.automl.config package

72 |
73 |

Submodules

74 |
75 |
76 |

automatminer.automl.config.tpot_configs module

77 |

This file is modified from the default config files of the TPOT library. 78 | It contains a customed dict of operators that we want to optimize using 79 | genetic algorithm.

80 |

We can add/remove Regressors/Preprocessors/Selectors-related operators 81 | to custom the operators to be optimized by tpot in the future. For instance, 82 | the Preprocessors/Selectors-related procedures are currently taken care of 83 | by the Preprocess class in automatminer, so we may consider to comment out the 84 | related operators in the config_dicts (or use tpot instead of Preprocess 85 | to optimize some procedures).

86 |

Check the TPOT documentation for information on the structure of config_dicts

87 |
88 |
89 |

Module contents

90 |
91 |
92 | 93 | 94 |
95 | 96 |
97 |
98 |
99 |
100 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /docs/installation.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Installation — Automatminer 1.0.3.20200727 documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 65 |
66 |
67 | 68 | 69 |
70 | 71 |
72 |

Installation

73 |

Automatminer supports Python 3.6.7 and Python 3.7.1+ on MacOS and Linux. Windows 74 | users may be able to install Automatminer (and we will try to help you as much 75 | as possible on the forum), but it is not officially 76 | supported.

77 |
78 |

From PyPi (using pip)

79 |

You can install the latest released version of automatminer through pip

80 |
pip install automatminer
 81 | 
82 |
83 |
84 |
85 |

From source

86 |

To install Automatminer from source, first clone the repository from GitHub, 87 | then use pip to install:

88 |
git clone https://github.com/hackingmaterials/automatminer.git
 89 | cd automatminer
 90 | pip install .
 91 | 
92 |
93 |

If not installing from inside a virtual environment or conda environment, you 94 | may need to specify to install as a user via:

95 |
pip install . --user
 96 | 
97 |
98 |
99 |
100 | 101 | 102 |
103 | 104 |
105 |
106 |
107 |
108 | 115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | automatminer 2 | ============ 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | automatminer 8 | -------------------------------------------------------------------------------- /docs/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/objects.inv -------------------------------------------------------------------------------- /docs/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Search — Automatminer 1.0.3.20200727 documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 64 |
65 |
66 | 67 | 68 |
69 | 70 |

Search

71 |
72 | 73 |

74 | Please activate JavaScript to enable the search 75 | functionality. 76 |

77 |
78 |

79 | From here you can search these documents. Enter your search 80 | words into the box below and click "search". Note that the search 81 | function will automatically search for all of the words. Pages 82 | containing fewer words won't appear in the result list. 83 |

84 |
85 | 86 | 87 | 88 |
89 | 90 |
91 | 92 |
93 | 94 |
95 | 96 |
97 |
98 |
99 |
100 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /docs/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* Avoid white space above the logo */ 2 | div.document { 3 | margin-top: 20px 4 | } 5 | div.body { 6 | padding-top: 30px; 7 | } 8 | 9 | div.sphinxsidebar { 10 | width: 230px; 11 | 12 | } 13 | 14 | div.body { 15 | min-width: 450px; 16 | max-width: 800px; 17 | } 18 | 19 | 20 | /* Main page title */ 21 | div.body h1 { 22 | text-align: center; 23 | font-size: 250%; 24 | color: #47567F; 25 | } 26 | 27 | /* Secondary sections title */ 28 | div.body h2 { 29 | color: #47567F; 30 | } 31 | 32 | /* Python examples code block */ 33 | div.highlight pre { 34 | background-color: #f5f5f5; 35 | font-size: 80%; 36 | padding-left: 10px; 37 | padding-right: 10px; 38 | } 39 | 40 | /* restyle table */ 41 | div.body table.docutils tr{ 42 | background: #ccc; /* fallback if nth-child is not supported */ 43 | } 44 | div.body table.docutils tr:nth-child(odd){ 45 | background: #f8f4ee; 46 | } 47 | div.body table.docutils tr:nth-child(even){ 48 | background: #fff; 49 | } 50 | div.body table.docutils td{ 51 | border: none; 52 | } 53 | 54 | /* Move the ads down in read the docs */ 55 | div.sphinxsidebarwrapper ul:last-of-type { 56 | margin-top: -10px; 57 | margin-bottom: 100px; 58 | } 59 | 60 | /* But don't apply this to nested ul */ 61 | div.sphinxsidebarwrapper ul ul:last-of-type { 62 | margin-top: 0px; 63 | margin-bottom: 20px; 64 | } 65 | 66 | a.reference { 67 | border-bottom: 1px dotted #576AA0; 68 | } 69 | 70 | a { 71 | color: #576AA0; 72 | 73 | } -------------------------------------------------------------------------------- /docs/source/_static/cv_nested.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/cv_nested.png -------------------------------------------------------------------------------- /docs/source/_static/dataframe_pipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/dataframe_pipe.png -------------------------------------------------------------------------------- /docs/source/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/favicon.ico -------------------------------------------------------------------------------- /docs/source/_static/forum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/forum.png -------------------------------------------------------------------------------- /docs/source/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/logo.png -------------------------------------------------------------------------------- /docs/source/_static/logo_header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/logo_header.png -------------------------------------------------------------------------------- /docs/source/_static/logo_lowres.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/logo_lowres.png -------------------------------------------------------------------------------- /docs/source/_static/matbench_pie_charts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/matbench_pie_charts.png -------------------------------------------------------------------------------- /docs/source/_static/matminer_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/matminer_examples.png -------------------------------------------------------------------------------- /docs/source/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/minus.png -------------------------------------------------------------------------------- /docs/source/_static/pipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hackingmaterials/automatminer/860a19ef4078029f0063a4d8d7a9d69b75455b24/docs/source/_static/pipe.png -------------------------------------------------------------------------------- /docs/source/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | 11 | {% if methods %} 12 | .. rubric:: Methods 13 | 14 | .. autosummary:: 15 | {% for item in methods %} 16 | ~{{ name }}.{{ item }} 17 | {%- endfor %} 18 | {% endif %} 19 | {% endblock %} 20 | 21 | .. include:: {{module}}.{{objname}}.examples 22 | 23 | .. raw:: html 24 | 25 |
26 | -------------------------------------------------------------------------------- /docs/source/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
-------------------------------------------------------------------------------- /docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends '!layout.html' %} 2 | 3 | 4 | {% block extrahead %} 5 | 6 | 7 | 8 | {% endblock %} 9 | 10 | {%- block sidebarsourcelink %} 11 | {% endblock %} 12 | 13 | {%- block sidebarsearch %} 14 |
15 | {{ super() }} 16 |
17 |
18 |

Support

19 | Automatminer forum 20 |

21 | Send an email to subscribe

22 | 23 |
24 |
25 | {% endblock %} 26 | 27 | -------------------------------------------------------------------------------- /docs/source/automatminer.automl.config.rst: -------------------------------------------------------------------------------- 1 | automatminer.automl.config package 2 | ================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.automl.config.tpot\_configs module 8 | ----------------------------------------------- 9 | 10 | .. automodule:: automatminer.automl.config.tpot_configs 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: automatminer.automl.config 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/source/automatminer.automl.rst: -------------------------------------------------------------------------------- 1 | automatminer.automl package 2 | =========================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.automl.config 10 | automatminer.automl.tests 11 | 12 | Submodules 13 | ---------- 14 | 15 | automatminer.automl.adaptors module 16 | ----------------------------------- 17 | 18 | .. automodule:: automatminer.automl.adaptors 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | automatminer.automl.base module 24 | ------------------------------- 25 | 26 | .. automodule:: automatminer.automl.base 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: automatminer.automl 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/source/automatminer.automl.tests.rst: -------------------------------------------------------------------------------- 1 | automatminer.automl.tests package 2 | ================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.automl.tests.test\_adaptors module 8 | ----------------------------------------------- 9 | 10 | .. automodule:: automatminer.automl.tests.test_adaptors 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | automatminer.automl.tests.test\_base module 16 | ------------------------------------------- 17 | 18 | .. automodule:: automatminer.automl.tests.test_base 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: automatminer.automl.tests 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/automatminer.featurization.rst: -------------------------------------------------------------------------------- 1 | automatminer.featurization package 2 | ================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.featurization.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | automatminer.featurization.base module 15 | -------------------------------------- 16 | 17 | .. automodule:: automatminer.featurization.base 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | automatminer.featurization.core module 23 | -------------------------------------- 24 | 25 | .. automodule:: automatminer.featurization.core 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | automatminer.featurization.sets module 31 | -------------------------------------- 32 | 33 | .. automodule:: automatminer.featurization.sets 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: automatminer.featurization 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/source/automatminer.featurization.tests.rst: -------------------------------------------------------------------------------- 1 | automatminer.featurization.tests package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.featurization.tests.test\_base module 8 | -------------------------------------------------- 9 | 10 | .. automodule:: automatminer.featurization.tests.test_base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | automatminer.featurization.tests.test\_core module 16 | -------------------------------------------------- 17 | 18 | .. automodule:: automatminer.featurization.tests.test_core 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | automatminer.featurization.tests.test\_sets module 24 | -------------------------------------------------- 25 | 26 | .. automodule:: automatminer.featurization.tests.test_sets 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: automatminer.featurization.tests 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/source/automatminer.preprocessing.rst: -------------------------------------------------------------------------------- 1 | automatminer.preprocessing package 2 | ================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.preprocessing.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | automatminer.preprocessing.core module 15 | -------------------------------------- 16 | 17 | .. automodule:: automatminer.preprocessing.core 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | automatminer.preprocessing.feature\_selection module 23 | ---------------------------------------------------- 24 | 25 | .. automodule:: automatminer.preprocessing.feature_selection 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: automatminer.preprocessing 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/source/automatminer.preprocessing.tests.rst: -------------------------------------------------------------------------------- 1 | automatminer.preprocessing.tests package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.preprocessing.tests.test\_core module 8 | -------------------------------------------------- 9 | 10 | .. automodule:: automatminer.preprocessing.tests.test_core 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: automatminer.preprocessing.tests 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/source/automatminer.rst: -------------------------------------------------------------------------------- 1 | automatminer package 2 | ==================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.automl 10 | automatminer.featurization 11 | automatminer.preprocessing 12 | automatminer.tests 13 | automatminer.utils 14 | 15 | Submodules 16 | ---------- 17 | 18 | automatminer.base module 19 | ------------------------ 20 | 21 | .. automodule:: automatminer.base 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | automatminer.pipeline module 27 | ---------------------------- 28 | 29 | .. automodule:: automatminer.pipeline 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | automatminer.presets module 35 | --------------------------- 36 | 37 | .. automodule:: automatminer.presets 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | 43 | Module contents 44 | --------------- 45 | 46 | .. automodule:: automatminer 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | -------------------------------------------------------------------------------- /docs/source/automatminer.tests.rst: -------------------------------------------------------------------------------- 1 | automatminer.tests package 2 | ========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.tests.test\_base module 8 | ------------------------------------ 9 | 10 | .. automodule:: automatminer.tests.test_base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | automatminer.tests.test\_pipeline module 16 | ---------------------------------------- 17 | 18 | .. automodule:: automatminer.tests.test_pipeline 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | automatminer.tests.test\_presets module 24 | --------------------------------------- 25 | 26 | .. automodule:: automatminer.tests.test_presets 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: automatminer.tests 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/source/automatminer.utils.rst: -------------------------------------------------------------------------------- 1 | automatminer.utils package 2 | ========================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | automatminer.utils.tests 10 | 11 | Submodules 12 | ---------- 13 | 14 | automatminer.utils.log module 15 | ----------------------------- 16 | 17 | .. automodule:: automatminer.utils.log 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | automatminer.utils.ml module 23 | ---------------------------- 24 | 25 | .. automodule:: automatminer.utils.ml 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | automatminer.utils.pkg module 31 | ----------------------------- 32 | 33 | .. automodule:: automatminer.utils.pkg 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: automatminer.utils 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/source/automatminer.utils.tests.rst: -------------------------------------------------------------------------------- 1 | automatminer.utils.tests package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | automatminer.utils.tests.test\_log module 8 | ----------------------------------------- 9 | 10 | .. automodule:: automatminer.utils.tests.test_log 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | automatminer.utils.tests.test\_ml module 16 | ---------------------------------------- 17 | 18 | .. automodule:: automatminer.utils.tests.test_ml 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | automatminer.utils.tests.test\_pkg module 24 | ----------------------------------------- 25 | 26 | .. automodule:: automatminer.utils.tests.test_pkg 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: automatminer.utils.tests 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. raw:: html 2 | 3 | 25 | 26 | .. title:: automatminer documentation 27 | 28 | .. image:: _static/logo.png 29 | :alt: server 30 | :align: center 31 | :width: 600px 32 | 33 | 34 | `Automatminer `_ is a tool for 35 | *automatically* creating **complete** machine 36 | learning pipelines for materials science, including automatic featurization 37 | with `matminer `_, feature 38 | reduction, and an AutoML backend. Put in a materials dataset, get out a machine 39 | that predicts materials properties. 40 | 41 | How it works 42 | ------------ 43 | 44 | Automatminer automatically decorates a dataset using hundreds of descriptor 45 | techniques from matminer's descriptor library, picks the most useful 46 | features for learning, and runs a separate AutoML pipeline. 47 | Once a pipeline has been fit, it can be summarized in a text file, saved to 48 | disk, or used to make predictions on new materials. 49 | 50 | .. image:: _static/pipe.png 51 | :alt: server 52 | :align: center 53 | 54 | Automatminer uses `pandas `_ dataframes for all of 55 | its working objects. Put dataframes in, get dataframes out. 56 | 57 | 58 | .. image:: _static/dataframe_pipe.png 59 | :alt: server 60 | :align: center 61 | :width: 800px 62 | 63 | 64 | Here's an example of training on known data, and extending the model to out of 65 | sample data. 66 | 67 | .. code-block:: python 68 | 69 | from automatminer.pipeline import MatPipe 70 | 71 | # Fit a pipeline to training data to predict band gap 72 | pipe = MatPipe() 73 | pipe.fit(train_df, "band gap") 74 | 75 | # Predict bandgap of some unknown materials 76 | predicted_df = pipe.predict(unknown_df) 77 | 78 | Overview 79 | -------- 80 | 81 | **Automatminer can work with many kinds of data:** 82 | 83 | - both computational and experimental data 84 | - small (~100 samples) to moderate (~100k samples) sized datasets 85 | - crystalline datasets 86 | - composition-only (i.e., unknown phases) datasets 87 | - datasets containing electronic bandstructures or density of states 88 | 89 | **Many kinds of target properties:** 90 | 91 | - electronic 92 | - mechanical 93 | - thermodynamic 94 | - any other kind of property 95 | 96 | **And many featurization (descriptor) techniques:** 97 | 98 | See `matminer's Table of Featurizers `_ 99 | for a full (and growing) list. 100 | 101 | 102 | **Automatminer is designed to be easy to use and reproducible** 103 | 104 | - Save pipelines which are portable across machines 105 | - Fit a complete pipeline with 1 line of code 106 | - Predict on new samples with 1 line of code 107 | - Presets for easy setup 108 | 109 | **Automatminer is automatic and accurate** 110 | 111 | - No hand tuning required 112 | - Comparable in accuracy to hand-tuned models in benchmark tests 113 | 114 | 115 | User manual 116 | -------------- 117 | 118 | .. toctree:: 119 | :maxdepth: 2 120 | 121 | installation.rst 122 | basic.rst 123 | advanced.rst 124 | datasets.rst 125 | tutorials.rst 126 | license.rst 127 | 128 | 129 | .. toctree:: 130 | :hidden: 131 | :maxdepth: 2 132 | 133 | Python API 134 | 135 | What's new? 136 | ----------- 137 | 138 | Track changes to automatminer through the `changelog 139 | `_. 140 | 141 | Contributing / Contact / Support 142 | -------------------------------- 143 | 144 | Want to see something added or changed? Some ways to get involved are: 145 | 146 | - Help us improve the documentation – tell us where you got stuck and improve 147 | the install process for everyone. 148 | - Let us know if you'd like to see certain features. 149 | - Point us to areas of the code that are difficult to understand or use. 150 | - Contribute code! You can do this by forking 151 | `Automatminer on Github `_ 152 | and submitting a pull request. 153 | - Post to our `support forum `_. Don't be shy, we look forward to feedback! 154 | 155 | See our `contribution guidelines 156 | `_ 157 | for more inspect. For a list of contributors, see our 158 | `GitHub page `_ 159 | 160 | Citing Automatminer or MatBench 161 | -------------------------------- 162 | If you find Automatminer or the MatBench benchmarks helpful in your research, 163 | please consider citing our `publication in npj Computational Materials `_: 164 | 165 | 166 | .. code-block:: text 167 | 168 | Dunn, A., Wang, Q., Ganose, A., Dopp, D., Jain, A. Benchmarking Materials Property Prediction 169 | Methods: The Matbench Test Set and Automatminer Reference Algorithm. npj Computational Materials 170 | 6, 138 (2020). https://doi.org/10.1038/s41524-020-00406-3 171 | 172 | 173 | API documentation 174 | ------------------ 175 | 176 | Autogenerated API documentation. Beware! Only for the brave. 177 | 178 | - :ref:`modindex` 179 | - :ref:`genindex` 180 | - :ref:`search` 181 | 182 | 183 | 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Automatminer supports Python 3.6.7 and Python 3.7.1+ on MacOS and Linux. Windows 5 | users may be able to install Automatminer (and we will try to help you as much 6 | as possible on the `forum `_), but it is not officially 7 | supported. 8 | 9 | 10 | From PyPi (using pip) 11 | --------------------- 12 | 13 | You can install the latest released version of automatminer through pip 14 | 15 | .. code-block:: bash 16 | 17 | pip install automatminer 18 | 19 | 20 | From source 21 | ----------- 22 | 23 | To install Automatminer from source, first clone the repository from GitHub, 24 | then use pip to install: 25 | 26 | .. code-block:: bash 27 | 28 | git clone https://github.com/hackingmaterials/automatminer.git 29 | cd automatminer 30 | pip install . 31 | 32 | If not installing from inside a virtual environment or conda environment, you 33 | may need to specify to install as a *user* via: 34 | 35 | .. code-block:: bash 36 | 37 | pip install . --user 38 | 39 | -------------------------------------------------------------------------------- /docs/source/license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | Automatminer is developed under a BSD-style License (an open-source license), 5 | reproduced below: 6 | 7 | .. include:: ../../LICENSE 8 | :literal: 9 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | automatminer 2 | ============ 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | automatminer 8 | -------------------------------------------------------------------------------- /docs/source/tutorials.rst: -------------------------------------------------------------------------------- 1 | Tutorials and Support 2 | ===================== 3 | 4 | Tutorials 5 | --------- 6 | 7 | There are jupyter notebook tutorials for Automatminer hosted on the 8 | `matminer examples `_ 9 | repository on Github. 10 | 11 | .. image:: _static/matminer_examples.png 12 | :alt: server 13 | :align: center 14 | :width: 600px 15 | 16 | 17 | Support 18 | ------- 19 | 20 | Online support for all of our codes can be found on our 21 | `support forum `_. This is the place 22 | to troubleshoot any problems you might have with the software developers. 23 | 24 | .. image:: _static/forum.png 25 | :alt: server 26 | :align: center 27 | :width: 600px 28 | -------------------------------------------------------------------------------- /docs/tutorials.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Tutorials and Support — Automatminer 1.0.3.20200727 documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 65 |
66 |
67 | 68 | 69 |
70 | 71 |
72 |

Tutorials and Support

73 |
74 |

Tutorials

75 |

There are jupyter notebook tutorials for Automatminer hosted on the 76 | matminer examples 77 | repository on Github.

78 | server 79 |
80 |
81 |

Support

82 |

Online support for all of our codes can be found on our 83 | support forum. This is the place 84 | to troubleshoot any problems you might have with the software developers.

85 | server 86 |
87 |
88 | 89 | 90 |
91 | 92 |
93 |
94 |
95 |
96 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /docs/using.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Using Automatminer — Automatminer 2019.05.14_beta0 documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 59 |
60 |
61 | 62 | 63 |
64 | 65 |
66 |

Using Automatminer

67 |
68 |

Training a pipeline

69 |
70 |
71 |

Making predictions

72 |
73 |
74 |

Running a benchmark

75 |
76 |
77 |

Customizing a pipeline

78 |
79 |
80 | 81 | 82 |
83 | 84 |
85 |
86 |
87 |
88 | 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | # Ensure this is the same value as max-line-length 3 | # under [flake8] in setup.cfg. 4 | line-length = 85 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Direct requirements of this project alone 2 | matminer==0.6.2 3 | pymatgen==2020.01.28 4 | tpot==0.11.0 5 | skrebate==0.6 6 | pyyaml==5.1.2 7 | scikit-learn==0.22.2 8 | 9 | # Also requirements of matminer 10 | # numpy 11 | # pandas 12 | # scikit_learn 13 | # pymatgen -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # Requirements of this dev 2 | FireWorks==1.8.7 3 | GitPython==2.1.11 4 | black==19.3b0 5 | flake8==3.7.8 6 | isort==4.3.21 7 | pre-commit==1.18.3 8 | paramiko==2.6.0 9 | scp==0.13.2 10 | -------------------------------------------------------------------------------- /requirements_web.txt: -------------------------------------------------------------------------------- 1 | dash==1.4.1 2 | pandas -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # Ensure this is the same value as line-length under [tool.black] in pyproject.toml. 3 | max-line-length = 85 4 | max-complexity = 12 5 | exclude = .git 6 | # E731: do not assign a lambda expression, use a def 7 | # W503: line break before binary operator 8 | # C901: function is too complex 9 | ignore = E731, W503, C901 10 | 11 | # Make isort play nicely with black's import formatting. 12 | # https://github.com/microsoft/vscode-python/issues/5840 13 | [isort] 14 | multi_line_output = 3 15 | include_trailing_comma = True -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import os 3 | 4 | module_dir = os.path.dirname(os.path.abspath(__file__)) 5 | reqs_raw = open(os.path.join(module_dir, "requirements.txt")).read() 6 | # reqs_list = [r.replace("==", ">=") for r in reqs_raw.split("\n")] 7 | reqs_list = [r for r in reqs_raw.split("\n")] 8 | 9 | # Version is MAJOR.MINOR.PATCH.YYYYMMDD 10 | version = "1.0.3.20200727" 11 | 12 | if __name__ == "__main__": 13 | setup( 14 | name='automatminer', 15 | version=version, 16 | description='automated machine learning for materials science', 17 | long_description="Automated machine learning for materials science. " 18 | "https://github.com/hackingmaterials/automatminer", 19 | url='https://github.com/hackingmaterials/automatminer', 20 | author=['Alex Dunn', 'Alex Ganose', 'Alireza Faghaninia', 'Qi Wang', 21 | 'Anubhav Jain'], 22 | author_email='ardunn@lbl.gov', 23 | license='modified BSD', 24 | packages=find_packages(where=".", exclude=("benchdev", "benchdev.*")), 25 | package_data={}, 26 | zip_safe=False, 27 | install_requires=reqs_list, 28 | extras_require={}, 29 | classifiers=['Programming Language :: Python :: 3.6', 30 | 'Development Status :: 4 - Beta', 31 | 'Intended Audience :: Science/Research', 32 | 'Intended Audience :: System Administrators', 33 | 'Intended Audience :: Information Technology', 34 | 'Operating System :: OS Independent', 35 | 'Topic :: Other/Nonlisted Topic', 36 | 'Topic :: Scientific/Engineering'], 37 | test_suite='automatminer', 38 | tests_require='tests', 39 | include_package_data=True 40 | ) 41 | -------------------------------------------------------------------------------- /setup_dev.py: -------------------------------------------------------------------------------- 1 | """Bench-dev (HT-benchmarking) package. If you're looking to install 2 | automatminer (the regular package), just use setup.py.""" 3 | 4 | from setuptools import setup, find_packages 5 | import os 6 | 7 | from automatminer_dev import __version__ 8 | 9 | module_dir = os.path.dirname(os.path.abspath(__file__)) 10 | reqs_raw = open(os.path.join(module_dir, "requirements_dev.txt")).read() 11 | reqs_list = [r.replace("==", ">=") for r in reqs_raw.split("\n")] 12 | 13 | if __name__ == "__main__": 14 | setup( 15 | name='automatminer_dev', 16 | version=__version__, 17 | description='benchmarking infrastructure for automatminer', 18 | long_description="", 19 | url='https://github.com/hackingmaterials/automatminer', 20 | author=['Alex Dunn'], 21 | author_email='ardunn@lbl.gov', 22 | license='modified BSD', 23 | packages=find_packages(include="./automatminer_dev"), 24 | package_data={}, 25 | zip_safe=False, 26 | install_requires=reqs_list, 27 | extras_require={}, 28 | classifiers=[]) 29 | -------------------------------------------------------------------------------- /setup_web.py: -------------------------------------------------------------------------------- 1 | """Automatminer web edition. If you're looking to install 2 | automatminer (the regular package), just use setup.py.""" 3 | 4 | from setuptools import setup, find_packages 5 | import os 6 | 7 | from automatminer_web import __version__ 8 | 9 | module_dir = os.path.dirname(os.path.abspath(__file__)) 10 | reqs_raw = open(os.path.join(module_dir, "requirements_web.txt")).read() 11 | reqs_list = [r.replace("==", ">=") for r in reqs_raw.split("\n")] 12 | 13 | if __name__ == "__main__": 14 | setup( 15 | name='automatminer_web', 16 | description='a web app for automatminer', 17 | long_description="", 18 | url='https://github.com/hackingmaterials/automatminer', 19 | author=['Alex Dunn'], 20 | author_email='ardunn@lbl.gov', 21 | license='modified BSD', 22 | packages=find_packages(include="./automatminer_web"), 23 | package_data={}, 24 | version=__version__, 25 | zip_safe=False, 26 | install_requires=reqs_list, 27 | extras_require={}, 28 | classifiers=[] 29 | ) 30 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | """Deployment file to facilitate releases. 2 | """ 3 | import os 4 | import json 5 | import webbrowser 6 | import datetime 7 | import requests 8 | from invoke import task 9 | from automatminer import __version__ 10 | from monty.os import cd 11 | 12 | __author__ = ["Alex Dunn", "Shyue Ping Ong", "Anubhav Jain"] 13 | 14 | 15 | # Making and updatig documentation 16 | @task 17 | def make_doc(ctx): 18 | with cd("docs"): 19 | ctx.run("sphinx-apidoc -o ./source -f ../automatminer") 20 | ctx.run("make html") 21 | # ctx.run("cp _static/* ../docs/html/_static") 22 | ctx.run("cp -r build/html/* .") 23 | ctx.run("rm -r build") 24 | ctx.run("touch .nojekyll") 25 | 26 | 27 | @task 28 | def open_doc(ctx): 29 | pth = os.path.abspath("docs/index.html") 30 | webbrowser.open("file://" + pth) 31 | 32 | 33 | @task 34 | def version_check(ctx): 35 | with open("setup.py", "r") as f: 36 | setup_version = None 37 | for l in f.readlines(): 38 | if "version = " in l: 39 | setup_version = l.split(" ")[-1] 40 | setup_version = setup_version.replace('"', "").replace("\n", "") 41 | 42 | if setup_version is None: 43 | raise IOError("Could not parse setup.py for version.") 44 | 45 | if __version__ == setup_version: 46 | print("Setup and init versions match eachother.") 47 | today = datetime.date.today().strftime("%Y%m%d") 48 | if today not in __version__: 49 | raise ValueError(f"The version {__version__} does not match " 50 | f"the date format {today}!") 51 | else: 52 | print("Date is contained within the version.") 53 | else: 54 | raise ValueError(f"There is a mismatch in the date between the " 55 | f"rocketsled __init__ and the setup. Please " 56 | f"make sure they are the same." 57 | f"\n DIFF: {__version__}, {setup_version}") 58 | 59 | 60 | @task 61 | def update_changelog(ctx): 62 | ctx.run('github_changelog_generator --user hackingmaterials --project automatminer') 63 | ctx.run("git add CHANGELOG.md") 64 | ctx.run("git commit CHANGELOG.md -m 'update changelog [skip ci]'") 65 | 66 | 67 | @task 68 | def full_tests_circleci(ctx): 69 | ctx.run("./dev_scripts/run_intensive.sh") 70 | 71 | 72 | @task 73 | def release(ctx): 74 | version_check(ctx) 75 | payload = { 76 | "tag_name": "v" + __version__, 77 | "target_commitish": "master", 78 | "name": "v" + __version__, 79 | "body": "", 80 | "draft": False, 81 | "prerelease": False 82 | } 83 | response = requests.post( 84 | "https://api.github.com/repos/hackingmaterials/automatminer/releases", 85 | data=json.dumps(payload), 86 | headers={ 87 | "Authorization": "token " + os.environ["GITHUB_RELEASES_TOKEN"]}) 88 | print(response.text) 89 | 90 | 91 | @task 92 | def publish(ctx): 93 | version_check(ctx) 94 | ctx.run("rm -r dist build", warn=True) 95 | ctx.run("python3 setup.py sdist bdist_wheel") 96 | ctx.run("twine upload dist/* --verbose") 97 | --------------------------------------------------------------------------------