├── LICENSE.md ├── README.md ├── database.py ├── elasticsearch_config.yaml ├── environments ├── environment-linux.yaml └── environment-osx.yaml ├── experiment-templates ├── dataset_metadata.yaml ├── hyperopt_config.yaml └── task_template.yaml ├── experiment_driver.py ├── globals.py ├── lbt ├── __init__.py ├── build_def_files.py ├── datasets │ ├── __init__.py │ ├── base_dataset.py │ ├── toy-datasets │ │ ├── fever.csv │ │ ├── goemotions.csv │ │ └── toy_agnews.csv │ └── toy_datasets.py ├── experiments.py ├── metrics │ ├── __init__.py │ ├── base_metric.py │ ├── instance_prices.json │ ├── lbt_metrics.py │ └── utils.py ├── tools │ ├── __init__.py │ ├── robustnessgym │ │ ├── __init__.py │ │ ├── base_subpopulation.py │ │ ├── lbt_subpopulations.py │ │ └── robustnessgym.py │ ├── textattack │ │ ├── __init__.py │ │ └── textattack.py │ └── utils.py ├── utils │ ├── __pycache__ │ │ ├── experiment_utils.cpython-36.pyc │ │ ├── experiment_utils.cpython-37.pyc │ │ ├── experiment_utils.cpython-38.pyc │ │ ├── metadata_utils.cpython-36.pyc │ │ └── metadata_utils.cpython-37.pyc │ ├── experiment_utils.py │ ├── metadata_utils.py │ └── test_utils.py └── visualizations │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── visualize.cpython-37.pyc │ └── visualize.py ├── model-configs ├── bert_hyperopt.yaml ├── distilbert_hyperopt.yaml ├── electra_hyperopt.yaml ├── resnet_hyperopt.yaml ├── rnn_hyperopt.yaml ├── roberta_hyperopt.yaml ├── stackedcnn_hyperopt.yaml ├── stackedparallelcnn_hyperopt.yaml └── t5_hyperopt.yaml └── upload_to_db.py /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2021 Stanford Hazy Research 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ludwig Benchmarking Toolkit 2 | The Ludwig Benchmarking Toolkit is a personalized benchmarking toolkit for running end-to-end benchmark studies across an extensible set of tasks, deep learning models, standard datasets and evaluation metrics. 3 | 4 | # Getting set-up 5 | To get started, use the following commands to set-up your conda environment. 6 | ``` 7 | git clone https://github.com/HazyResearch/ludwig-benchmarking-toolkit.git 8 | cd ludwig-benchmarking-toolkit 9 | conda env create -f environments/{environment-osx.yaml, environment-linux.yaml} 10 | conda activate lbt 11 | ``` 12 | 13 | # Relevant files and directories 14 | `experiment-templates/task_template.yaml`: Every task (i.e. text classification) will have its owns task template. The template specifies the model architecture (encoder and decoder structure), training parameters, and a hyperopt configuration for the task at hand. A large majority of the values of the template will be populated by the values in the hyperopt_config.yaml file and dataset_metadata.yaml at training time. The sample task template located in `experiment-templates/task_template.yaml` is for text classification. See `sample-task-templates/` for other examples. 15 | 16 | `experiment-templates/hyperopt_config.yaml`: provides a range of values for training parameters and hyperopt params that will populate the hyperopt configuration in the model template 17 | 18 | `experiment-templates/dataset_metadata.yaml`: contains list of all available datasets (and associated metadata) that the hyperparameter optimization can be performed over. 19 | 20 | `model-configs/`: contains all encoder specific yaml files. Each files specifies possible values for relevant encoder parameters that will be optimized over. Each file in this directory adheres to the naming convention {encoder_name}_hyperopt.yaml 21 | 22 | `hyperopt-experiment-configs/`: houses all experiment configs built from the templates specified above (note: this folder will be populated at run-time) and will be used when the hyperopt experiment is called. At a high level, each config file specifies the training and hyperopt information for a (task, dataset, architecture) combination. An example might be (text classification, SST2, BERT) 23 | 24 | `elasticsearch_config.yaml `: this is an optional file that is to be defined if an experiment data will be saved to an elastic database. 25 | 26 | 27 | # USAGE 28 | ### **Command-Line Usage** 29 | 30 | ### *Running your first TOY experiment*: 31 | 32 | For testing/setup purposes we have included a toy dataset called toy_agnews. This dataset contains a small set of training, test and validation samples from the original agnews dataset. 33 | 34 | Before running a full-scale experiment, we recommend running an experiment locally on the toy dataset: 35 | ``` 36 | python experiment_driver.py --run_environment local --datasets toy_agnews --custom_models_list rnn 37 | ``` 38 | 39 | ### *Running your first REAL experiment*: 40 | 41 | Steps for configuring + running an experiment: 42 | 1. Declare and configure the search space of all non-model specific training and preprocessing hyperparameters in the `experiment-templates/hyperopt_config.yaml` file. The parameters specified in this file will be used across all model experiments. 43 | 2. Declare and configure the search space of model specific hyperparameters in the `{encoder}_hyperopt.yaml` files in `./model_configs` 44 | 45 | **NOTE**: 46 | * for both (1) and (2) see the [Ludwig Hyperparamter Optimization guide](https://ludwig-ai.github.io/ludwig-docs/user_guide/#hyper-parameter-optimization) to see what parameters for training, preprocessing, and input/ouput features 47 | can be used in the hyperopt search 48 | * if the exectuor type is `Ray` the list of available search spaces and input format differs slightly than the built-in ludwig types. Please see the [Ray Tune search space docs](https://docs.ray.io/en/master/tune/api_docs/search_space.html) for more information. 49 | 50 | 3. Run the following command specifying the datasets, encoders, path to elastic DB index config file, run environment and more: 51 | 52 | ``` 53 | python experiment_driver.py \ 54 | --experiment_output_dir 55 | --run_environment {local, gcp} 56 | --elasticsearch_config 57 | --dataset_cache_dir 58 | --custom_model_list 59 | --datasets 60 | --resume_existing_exp bool 61 | 62 | ``` 63 | 64 | **NOTE:** Please use `python experiment_driver.py -h` to see list of available datasets, encoders and args 65 | 66 | ### **API Usage** 67 | It is also possible to run, customize and experiments using LBTs APIs. In the following section, 68 | we describe the three flavors of APIs included in LBT. 69 | 70 | ### `experiment` API 71 | This API provides an alternative method for running experiments. Note that runnin experiments via the API still requires populating the aforemented configuration files 72 | 73 | ```python 74 | from lbt.experiments import experiment 75 | 76 | experiment( 77 | models = ['rnn', 'bert'], 78 | datasets = ['agnews'], 79 | run_environment = "local", 80 | elastic_search_config = None, 81 | resume_existing_exp = False, 82 | ) 83 | ``` 84 | 85 | ### `tools` API 86 | This API provides access to two tooling integrations (TextAttack and Robustness Gym (RG)). The TextAttack API can be used to generate adversarial attacks. Moreover, users can use the TextAttack interface to augment data files. The RG API which empowers users to inspect model performance on a set of generic, pre-built slices and to add more slices for their specific datasets and use cases. 87 | 88 | ```python 89 | from lbt.tools.robustnessgym import RG 90 | from lbt.tools.textattack import attack, augment 91 | 92 | # Robustness Gym API Usage 93 | RG( dataset_name="AGNews", 94 | models=["bert", "rnn"], 95 | path_to_dataset="agnews.csv", 96 | subpopulations=[ "entities", "positive_words", "negative_words"])) 97 | 98 | # TextAttack API Usage 99 | attack(dataset_name="AGNews", path_to_model="agnews/model/rnn_model", 100 | path_to_dataset="agnews.csv", attack_recipe=["CharSwapAugmenter"]) 101 | 102 | augment(dataset_name="AGNews", transformations_per_example=1 103 | path_to_dataset="agnews.csv", augmenter=["WordNetAugmenter"]) 104 | ``` 105 | 106 | ### `visualizations` API 107 | This API provides out-of-the-box support for visualizations for learning behavior, model performance, and hyperparameter optimization using the training and evaluation statistics generated during model training 108 | 109 | ```python 110 | import lbt.visualizations 111 | 112 | # compare model performance 113 | compare_performance_viz( 114 | dataset_name="toy_agnews", 115 | model_name="rnn", 116 | output_feature_name="class_index", 117 | ) 118 | 119 | # compare training and validation trajectory 120 | learning_curves_viz( 121 | dataset_name="toy_agnews", 122 | model_name="rnn", 123 | output_feature_name="class_index", 124 | ) 125 | 126 | # visualize hyperoptimzation search 127 | hyperopt_viz( 128 | dataset_name="toy_agnews", 129 | model_name="rnn", 130 | output_dir="." 131 | ) 132 | ``` 133 | 134 | # EXPERIMENT EXTENSIBILITY 135 | ### **Adding new custom datasets** 136 | 137 | Adding custom dataset requires creating a new `LBTDataset` class and adding it 138 | to the dataset registry. Creating an `LBTDataset` object requires implementing 139 | three class methods: download, process and load. Please see the the [`ToyAGNews`](lbt/datasets/toy_datasets.py) dataset as an example. 140 | 141 | ### **Adding new metrics** 142 | 143 | Adding custom evaluation metrics requires creating a new `LBTMetric` class and adding it 144 | to the metrics registry. Creating an `LBTMetric` object requires implementing 145 | the run class method which takes as potential inputs a path to a model directory, path to a dataset, training batch size, and training statistics. Please see the [`pre-built LBT metrics`](lbt/metrics/lbt_metrics.py) for examples. 146 | 147 | # ELASTICSEARCH RESEARCH DATABASE 148 | To get credentials to upload experiments to the shared Elasticsearch research database, please fill out this [form](https://forms.gle/rSQqQ3gAtTAURsxKA). 149 | 150 | 151 | -------------------------------------------------------------------------------- /database.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import logging 4 | import os 5 | import ray 6 | import socket 7 | from elasticsearch import Elasticsearch 8 | 9 | from lbt.utils.experiment_utils import ( 10 | format_fields_float, 11 | get_model_ckpt_paths, 12 | hash_dict, 13 | substitute_dict_parameters, 14 | ) 15 | 16 | # from utils.metadata_utils import append_experiment_metadata 17 | from lbt.metrics import get_experiment_metadata 18 | 19 | hostname = socket.gethostbyname(socket.gethostname()) 20 | 21 | 22 | # TODO: ASN --> DECOUPLE BUILDING ES DOCUMENT W/SAVING 23 | @ray.remote(num_cpus=0, resources={f"node:{hostname}": 0.001}) 24 | def save_results_to_es( 25 | experiment_attr: dict, 26 | hyperopt_results: list, 27 | tune_executor: str, 28 | top_n_trials: int = None, 29 | reupload=False, 30 | num_gpus=0, 31 | ): 32 | elastic_config = experiment_attr["elastic_config"] 33 | 34 | es_db = Database( 35 | elastic_config["host"], 36 | (elastic_config["username"], elastic_config["password"]), 37 | elastic_config["username"], 38 | elastic_config["index"], 39 | ) 40 | # save top_n model configs to elastic 41 | if top_n_trials is not None and len(hyperopt_results) > top_n_trials: 42 | hyperopt_results = hyperopt_results[0:top_n_trials] 43 | 44 | hyperopt_run_data = get_model_ckpt_paths( 45 | hyperopt_results, experiment_attr["output_dir"], executor=tune_executor 46 | ) 47 | 48 | sampled_params = {} 49 | 50 | # ensures that all numerical values are of type float 51 | format_fields_float(hyperopt_results) 52 | for run in hyperopt_run_data: 53 | new_config = substitute_dict_parameters( 54 | copy.deepcopy(experiment_attr["model_config"]), 55 | parameters=run["hyperopt_results"]["parameters"], 56 | ) 57 | del new_config["hyperopt"] 58 | 59 | # do some accounting of duplicate hyperparam configs (this count will 60 | # be added to the dict which will be hashed for the elastic document 61 | # id 62 | param_hash = hash_dict(run["hyperopt_results"]["parameters"]) 63 | if param_hash in sampled_params: 64 | sampled_params[param_hash] += 1 65 | else: 66 | sampled_params[param_hash] = 1 67 | 68 | document = { 69 | "hyperopt_results": run["hyperopt_results"], 70 | "model_path": run["model_path"], 71 | } 72 | 73 | try: 74 | get_experiment_metadata( 75 | document, 76 | model_path=run["model_path"], 77 | data_path=experiment_attr["dataset_path"], 78 | run_stats=run, 79 | num_gpus=num_gpus, 80 | ) 81 | except: 82 | pass 83 | 84 | formatted_document = es_db.format_document( 85 | document, 86 | encoder=experiment_attr["encoder"], 87 | dataset=experiment_attr["dataset"], 88 | config=experiment_attr["model_config"], 89 | ) 90 | 91 | formatted_document["sampled_run_config"] = new_config 92 | ds = experiment_attr["dataset"] 93 | enc = experiment_attr["encoder"] 94 | # doc_key = run["hyperopt_results"]["eval_stats"] 95 | 96 | trial_count = sampled_params[param_hash] 97 | 98 | doc_key = copy.deepcopy(new_config) 99 | doc_key["trial"] = trial_count 100 | try: 101 | es_db.upload_document(hash_dict(doc_key), formatted_document) 102 | logging.info(f"{ds} x {enc}" f"uploaded to elastic.") 103 | except: 104 | logging.warning( 105 | f"error uploading" f"{ds} x {enc}" f"to elastic..." 106 | ) 107 | return 1 108 | 109 | 110 | class Database: 111 | def __init__(self, host, http_auth, user_id, index): 112 | self.host = host 113 | self.http_auth = http_auth 114 | self.user_id = user_id 115 | self.index = index 116 | self._initialize_db() 117 | self._create_index(self.index) 118 | 119 | def _initialize_db(self): 120 | self.es_connection = Elasticsearch( 121 | [self.host], http_auth=self.http_auth 122 | ) 123 | 124 | def _create_index(self, index_name: str): 125 | mapping = { 126 | "mappings": { 127 | "_doc": { 128 | "properties": {"sampled_run_config": {"type": "nested"}} 129 | } 130 | } 131 | } 132 | self.es_connection.indices.create( 133 | index=index_name, body=mapping, include_type_name=True, ignore=400 134 | ) 135 | 136 | def upload_document(self, id, document): 137 | self.es_connection.index(index=self.index, id=id, body=document) 138 | 139 | def remove_document(self, id): 140 | self.es_connection.delete(index=self.index, id=id) 141 | 142 | def document_exists(self, id): 143 | return self.es_connection.exists(index=self.index, id=id) 144 | 145 | def search(self, query, size=1000): 146 | return self.es_connection.search( 147 | index=self.index, body=query, size=size 148 | ) 149 | 150 | def upload_document_from_outputdir( 151 | self, 152 | dir_path, 153 | encoder, 154 | dataset, 155 | ): 156 | hyperopt_stats = json.load( 157 | open(os.path.join(dir_path, "hyperopt_statistics.json"), "rb"), 158 | parse_int=float, 159 | ) 160 | 161 | formatted_document = self.format_document( 162 | hyperopt_stats, encoder, dataset 163 | ) 164 | 165 | self.es_connection.index( 166 | index=self.index, 167 | id=hash_dict(hyperopt_stats["hyperopt_config"]), 168 | body=formatted_document, 169 | ) 170 | 171 | def format_document(self, document, encoder, dataset, config=None): 172 | formatted_document = { 173 | "user_id": self.user_id, 174 | "encoder": encoder, 175 | "dataset": dataset, 176 | } 177 | formatted_document.update(document) 178 | if config is not None: 179 | formatted_document.update({"hyperopt_exp_config": config}) 180 | 181 | return formatted_document 182 | -------------------------------------------------------------------------------- /elasticsearch_config.yaml: -------------------------------------------------------------------------------- 1 | host : "" 2 | username : "" 3 | password : "" 4 | index : "" 5 | -------------------------------------------------------------------------------- /environments/environment-linux.yaml: -------------------------------------------------------------------------------- 1 | name: lbt 2 | channels: 3 | - defaults 4 | dependencies: 5 | - ca-certificates=2021.5.25 6 | - certifi=2020.5.30 7 | - libffi=3.3 8 | - ncurses=6.2 9 | - openssl=1.1.1k 10 | - pip=21.1.1 11 | - python=3.8.10 12 | - readline=8.1 13 | - setuptools=52.0.0 14 | - sqlite=3.35.4 15 | - tk=8.6.10 16 | - wheel=0.36.2 17 | - xz=5.2.5 18 | - zlib=1.2.11 19 | - pip: 20 | - absl-py==0.12.0 21 | - aiohttp==3.7.4.post0 22 | - aiohttp-cors==0.7.0 23 | - aioredis==1.3.1 24 | - astunparse==1.6.3 25 | - async-timeout==3.0.1 26 | - attrs==20.3.0 27 | - bayesmark==0.0.8 28 | - blessings==1.7 29 | - cachetools==4.2.1 30 | - cffi==1.14.5 31 | - chardet==4.0.0 32 | - click==7.1.2 33 | - cloudpickle==1.6.0 34 | - colorama==0.4.4 35 | - colorful==0.5.4 36 | - configspace==0.4.18 37 | - cython==0.29.22 38 | - dill==0.3.3 39 | - docker==4.4.4 40 | - elasticsearch==7.11.0 41 | - et-xmlfile==1.0.1 42 | - fiber==0.2.1 43 | - filelock==3.0.12 44 | - flatbuffers==1.12 45 | - gast==0.3.3 46 | - gitdb==4.0.5 47 | - gitpython==3.1.14 48 | - google-api-core==1.26.1 49 | - google-auth==1.27.1 50 | - google-auth-oauthlib==0.4.3 51 | - google-pasta==0.2.0 52 | - googleapis-common-protos==1.53.0 53 | - gpustat==0.6.0 54 | - gputil==1.4.0 55 | - grpcio==1.32.0 56 | - h5py==2.10.0 57 | - hiredis==1.1.0 58 | - idna==2.10 59 | - importlib-metadata==3.7.2 60 | - iniconfig==1.1.1 61 | - joblib==1.0.1 62 | - jsonschema==3.2.0 63 | - keras-preprocessing==1.1.2 64 | - kubernetes==12.0.1 65 | - git+https://github.com/ANarayan/ludwig.git@09dfe62a389226e9a125f2a66bb6eb6569f25130 66 | - git+https://github.com/Breakend/experiment-impact-tracker.git 67 | - git+https://github.com/robustness-gym/robustness-gym.git@8be2b1124e1a4fecdad15d73da073b9115f0f289 68 | - lxml==4.6.2 69 | - markdown==3.3.4 70 | - msgpack==1.0.2 71 | - multidict==5.1.0 72 | - nnpy-bundle==1.4.2.post1 73 | - numexpr==2.7.3 74 | - numpy>=1.18.0 75 | - nvidia-ml-py3==7.352.0 76 | - oauthlib==3.1.0 77 | - opencensus==0.7.12 78 | - opencensus-context==0.1.2 79 | - openpyxl==3.0.7 80 | - opt-einsum==3.3.0 81 | - packaging==20.9 82 | - pandas==1.1.4 83 | - pathvalidate==2.3.2 84 | - pluggy==0.13.1 85 | - poap==0.1.26 86 | - prometheus-client==0.9.0 87 | - protobuf==3.15.6 88 | - psutil==5.8.0 89 | - py==1.10.0 90 | - py-spy==0.3.4 91 | - pyaml==20.4.0 92 | - pyarrow==3.0.0 93 | - pyasn1==0.4.8 94 | - pyasn1-modules==0.2.8 95 | - pycparser==2.20 96 | - pydoe2==1.3.0 97 | - pyparsing==2.4.7 98 | - pyrsistent==0.17.3 99 | - pysot==0.3.3 100 | - pytest==6.2.2 101 | - python-dateutil==2.8.1 102 | - pytz==2021.1 103 | - pyyaml==5.4.1 104 | - https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl 105 | - redis==3.5.3 106 | - requests==2.25.1 107 | - requests-oauthlib==1.3.0 108 | - rsa==4.7.2 109 | - scikit-learn==0.24.1 110 | - scikit-optimize==0.8.1 111 | - scipy==1.4.1 112 | - six==1.15.0 113 | - smmap==3.0.5 114 | - tables==3.6.1 115 | - tabulate==0.8.9 116 | - tensorboard==2.4.1 117 | - tensorboard-plugin-wit==1.8.0 118 | - tensorboardx==2.1 119 | - tensorflow==2.3.1 120 | - tensorflow-estimator<2.4.0 121 | - termcolor==1.1.0 122 | - textattack==0.2.15 123 | - tfa-nightly==0.12.0.dev20201215223743 124 | - threadpoolctl==2.1.0 125 | - toml==0.10.2 126 | - tqdm>=4.27.0 127 | - transformers==4.2.1 128 | - typeguard==2.11.1 129 | - typing-extensions==3.7.4.3 130 | - urllib3==1.24.3 131 | - websocket-client==0.58.0 132 | - werkzeug==1.0.1 133 | - wget==3.2 134 | - wrapt==1.12.1 135 | - xarray==0.17.0 136 | - xlrd==2.0.1 137 | - xlwt==1.3.0 138 | - yarl==1.6.3 139 | - zipp==3.4.1 140 | -------------------------------------------------------------------------------- /environments/environment-osx.yaml: -------------------------------------------------------------------------------- 1 | name: lbt 2 | channels: 3 | - defaults 4 | dependencies: 5 | - ca-certificates=2021.5.25 6 | - certifi=2021.5.30 7 | - libcxx=10.0.0 8 | - libffi=3.3 9 | - ncurses=6.2 10 | - openssl=1.1.1k 11 | - pip=21.1.1 12 | - python=3.8.10 13 | - readline=8.1 14 | - setuptools=52.0.0 15 | - sqlite=3.35.4 16 | - tk=8.6.10 17 | - wheel=0.36.2 18 | - xz=5.2.5 19 | - zlib=1.2.11 20 | - pip: 21 | - absl-py==0.12.0 22 | - aiohttp==3.7.4.post0 23 | - aiohttp-cors==0.7.0 24 | - aioredis==1.3.1 25 | - astunparse==1.6.3 26 | - async-timeout==3.0.1 27 | - attrs==20.3.0 28 | - bayesmark==0.0.8 29 | - blessings==1.7 30 | - cachetools==4.2.1 31 | - cffi==1.14.5 32 | - chardet==4.0.0 33 | - click==7.1.2 34 | - cloudpickle==1.6.0 35 | - colorama==0.4.4 36 | - colorful==0.5.4 37 | - configspace==0.4.18 38 | - cython==0.29.22 39 | - dill==0.3.3 40 | - docker==4.4.4 41 | - elasticsearch==7.11.0 42 | - et-xmlfile==1.0.1 43 | - fiber==0.2.1 44 | - filelock==3.0.12 45 | - flatbuffers==1.12 46 | - gast==0.3.3 47 | - gitdb==4.0.5 48 | - gitpython==3.1.14 49 | - google-api-core==1.26.1 50 | - google-auth==1.27.1 51 | - google-auth-oauthlib==0.4.3 52 | - google-pasta==0.2.0 53 | - googleapis-common-protos==1.53.0 54 | - gpustat==0.6.0 55 | - gputil==1.4.0 56 | - grpcio==1.32.0 57 | - h5py==2.10.0 58 | - hiredis==1.1.0 59 | - idna==2.10 60 | - importlib-metadata==3.7.2 61 | - iniconfig==1.1.1 62 | - joblib==1.0.1 63 | - jsonschema==3.2.0 64 | - keras-preprocessing==1.1.2 65 | - kubernetes==12.0.1 66 | - git+https://github.com/ANarayan/ludwig.git@09dfe62a389226e9a125f2a66bb6eb6569f25130 67 | - git+https://github.com/Breakend/experiment-impact-tracker.git 68 | - git+https://github.com/robustness-gym/robustness-gym.git@8be2b1124e1a4fecdad15d73da073b9115f0f289 69 | - lxml==4.6.2 70 | - markdown==3.3.4 71 | - msgpack==1.0.2 72 | - multidict==5.1.0 73 | - nnpy-bundle==1.4.2.post1 74 | - numexpr==2.7.3 75 | - numpy>=1.18.0 76 | - nvidia-ml-py3==7.352.0 77 | - oauthlib==3.1.0 78 | - opencensus==0.7.12 79 | - opencensus-context==0.1.2 80 | - openpyxl==3.0.7 81 | - opt-einsum==3.3.0 82 | - packaging==20.9 83 | - pandas==1.1.4 84 | - pathvalidate==2.3.2 85 | - pluggy==0.13.1 86 | - poap==0.1.26 87 | - prometheus-client==0.9.0 88 | - protobuf==3.15.6 89 | - psutil==5.8.0 90 | - py==1.10.0 91 | - py-spy==0.3.4 92 | - pyaml==20.4.0 93 | - pyarrow==3.0.0 94 | - pyasn1==0.4.8 95 | - pyasn1-modules==0.2.8 96 | - pycparser==2.20 97 | - pydoe2==1.3.0 98 | - pyparsing==2.4.7 99 | - pyrsistent==0.17.3 100 | - pysot==0.3.3 101 | - pytest==6.2.2 102 | - python-dateutil==2.8.1 103 | - pytz==2021.1 104 | - pyyaml==5.4.1 105 | - https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp38-cp38-macosx_10_13_x86_64.whl 106 | - redis==3.5.3 107 | - requests==2.25.1 108 | - requests-oauthlib==1.3.0 109 | - rsa==4.7.2 110 | - scikit-learn==0.24.1 111 | - scikit-optimize==0.8.1 112 | - scipy==1.4.1 113 | - six==1.15.0 114 | - smmap==3.0.5 115 | - tables==3.6.1 116 | - tabulate==0.8.9 117 | - tensorboard==2.4.1 118 | - tensorboard-plugin-wit==1.8.0 119 | - tensorboardx==2.1 120 | - tensorflow==2.3.1 121 | - textattack==0.2.15 122 | - tensorflow-estimator<2.4.0 123 | - termcolor==1.1.0 124 | - tfa-nightly==0.12.0.dev20201215223743 125 | - threadpoolctl==2.1.0 126 | - toml==0.10.2 127 | - tqdm>=4.27.0 128 | - transformers==4.2.1 129 | - typeguard==2.11.1 130 | - typing-extensions==3.7.4.3 131 | - urllib3==1.24.3 132 | - websocket-client==0.58.0 133 | - werkzeug==1.0.1 134 | - wget==3.2 135 | - wrapt==1.12.1 136 | - xarray==0.17.0 137 | - xlrd==2.0.1 138 | - xlwt==1.3.0 139 | - yarl==1.6.3 140 | - zipp==3.4.1 141 | -------------------------------------------------------------------------------- /experiment-templates/dataset_metadata.yaml: -------------------------------------------------------------------------------- 1 | AGNews: 2 | data_class: AGNews 3 | input_features: 4 | - name: description 5 | type: text 6 | output_features: 7 | - name: class_index 8 | type: category 9 | 10 | AmazonPolarity: 11 | data_class: AmazonPolarity 12 | input_features: 13 | - name: review_text 14 | type: text 15 | output_features: 16 | - name: label 17 | type: category 18 | 19 | AmazonReviews: 20 | data_class: AmazonReviews 21 | input_features: 22 | - name: review_text 23 | type: text 24 | output_features: 25 | - name: label 26 | type: category 27 | 28 | DBPedia: 29 | data_class: DBPedia 30 | input_features: 31 | - name: content 32 | type: text 33 | output_features: 34 | - name: label 35 | type: category 36 | 37 | EthosBinary: 38 | data_class: EthosBinary 39 | input_features: 40 | - name: comment 41 | type: text 42 | output_features: isHate 43 | type: category 44 | 45 | GoEmotions: 46 | data_class: GoEmotions 47 | input_features: 48 | - name: text 49 | type: text 50 | output_features: 51 | - name: emotion_ids 52 | type: set 53 | 54 | Irony: 55 | data_class: Irony 56 | input_features: 57 | - name: comment_text 58 | type: text 59 | output_features: 60 | - name: label 61 | type: category 62 | 63 | SST2: 64 | data_class: SST2 65 | input_features: 66 | - name: sentence 67 | type: text 68 | output_features: 69 | - name: label 70 | type: category 71 | 72 | SST5: 73 | data_class: SST5 74 | input_features: 75 | - name: sentence 76 | type: text 77 | output_features: 78 | - name: label 79 | type: category 80 | 81 | YahooAnswers: 82 | data_class: YahooAnswers 83 | input_features: 84 | - name: question 85 | type: text 86 | output_features: 87 | - name: label 88 | type: category 89 | 90 | YelpPolarity: 91 | data_class: YelpPolarity 92 | input_features: 93 | - name: text 94 | type: text 95 | 96 | output_features: 97 | - name: label 98 | type: category 99 | 100 | YelpReviews: 101 | data_class: YelpReviews 102 | input_features: 103 | - name: text 104 | type: text 105 | output_features: 106 | - name: label 107 | type: category 108 | 109 | HateSpeech: 110 | data_class: HateSpeech 111 | input_features: 112 | - name: tweet 113 | type: text 114 | output_features: 115 | - name: class 116 | type: category 117 | 118 | SocialBiasFrames: 119 | data_class: SocialBiasFrames 120 | input_features: 121 | - name: post 122 | type: text 123 | output_features: 124 | - name: sexYN 125 | type: category 126 | - name: offensiveYN 127 | type: category 128 | - name: intentYN 129 | type: category 130 | - name: speakerMinorityYN 131 | type: category 132 | type: category 133 | 134 | MDGenderBias: 135 | data_class: MDGenderBias 136 | input_features: 137 | - name: text 138 | type: text 139 | output_features: 140 | - name: gender 141 | type: category 142 | type: category 143 | 144 | CIFAR10: 145 | data_class: CIFAR10 146 | input_features: 147 | - name: image_path 148 | type: image 149 | output_features: 150 | - name: label 151 | type: category 152 | type: category 153 | 154 | Mnist: 155 | data_class: Mnist 156 | input_features: 157 | - name: image_path 158 | type: image 159 | output_features: 160 | - name: label 161 | type: category 162 | type: category 163 | 164 | toy_agnews: 165 | data_class: toy_agnews 166 | input_features: 167 | - name: description 168 | type: text 169 | output_features: 170 | - name: class_index 171 | type: category 172 | -------------------------------------------------------------------------------- /experiment-templates/hyperopt_config.yaml: -------------------------------------------------------------------------------- 1 | goal: maximize 2 | metric: accuracy 3 | parameters: 4 | training.learning_rate: 5 | type: float 6 | lower: 0.00002 7 | upper: 0.01 8 | space: loguniform 9 | sampler: 10 | type: ray 11 | search_alg: 12 | type: skopt 13 | max_concurrent: 1 14 | num_samples: 1 15 | executor: 16 | type: ray 17 | cpu_resources_per_trial: 1 18 | gpu_resources_per_trial: 0 19 | #kubernetes_namespace: "ray" 20 | -------------------------------------------------------------------------------- /experiment-templates/task_template.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - name: ~ 3 | type: text 4 | level: word 5 | encoder: rnn 6 | preprocessing: 7 | word_tokenizer: space 8 | pretrained_model_name_or_path: None 9 | 10 | output_features: 11 | - name: ~ 12 | type: category 13 | 14 | training: 15 | learning_rate: 0.01 16 | batch_size: 16 17 | eval_batch_size: 64 18 | early_stop: 3 19 | epochs: 25 20 | validation_metric: accuracy 21 | 22 | hyperopt: 23 | output_feature: 24 | metric: ~ 25 | strategy: 26 | type: ~ 27 | num_samples: ~ 28 | parameters: 29 | training.learning_rate: 30 | min: ~ 31 | max: ~ 32 | scale: ~ 33 | -------------------------------------------------------------------------------- /experiment_driver.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import logging 4 | 5 | import ray 6 | import globals 7 | 8 | from lbt.utils.experiment_utils import set_globals, load_yaml 9 | from lbt.experiments import ( 10 | run_experiments, 11 | reproduce_experiment, 12 | download_data, 13 | ) 14 | from lbt.datasets import DATASET_REGISTRY 15 | from lbt.experiments import ( 16 | run_experiments, 17 | reproduce_experiment, 18 | download_data, 19 | ) 20 | import lbt.build_def_files 21 | from lbt.build_def_files import build_config_files 22 | 23 | logging.basicConfig( 24 | format=logging.basicConfig( 25 | format="[\N{books} LUDWIG-BENCHMARKING-TOOLKIT \N{books}] => %(levelname)s::%(message)s", 26 | level=logging.DEBUG, 27 | ), 28 | level=logging.DEBUG, 29 | ) 30 | 31 | 32 | def main(): 33 | parser = argparse.ArgumentParser( 34 | description="Ludwig Benchmarking Toolkit experiment driver script", 35 | ) 36 | 37 | parser.add_argument( 38 | "-hcd", 39 | "--hyperopt_config_dir", 40 | help="directory to save all model config", 41 | type=str, 42 | default=globals.EXPERIMENT_CONFIGS_DIR, 43 | ) 44 | 45 | parser.add_argument( 46 | "--resume_existing_exp", 47 | help="resume a previously stopped experiment", 48 | type=bool, 49 | default=False, 50 | ) 51 | 52 | parser.add_argument( 53 | "-eod", 54 | "--experiment_output_dir", 55 | help="directory to save hyperopt runs", 56 | type=str, 57 | default=globals.EXPERIMENT_OUTPUT_DIR, 58 | ) 59 | 60 | parser.add_argument( 61 | "--datasets", 62 | help="list of datasets to run experiemnts on", 63 | nargs="+", 64 | choices=list(DATASET_REGISTRY.keys()), 65 | default=None, 66 | required=True, 67 | ) 68 | parser.add_argument( 69 | "-re", 70 | "--run_environment", 71 | help="environment in which experiment will be run", 72 | choices=["local", "gcp"], 73 | default="local", 74 | ) 75 | parser.add_argument( 76 | "-esc", 77 | "--elasticsearch_config", 78 | help="path to elastic db config file", 79 | type=str, 80 | default=None, 81 | ) 82 | 83 | parser.add_argument( 84 | "-dcd", 85 | "--dataset_cache_dir", 86 | help="path to cache downloaded datasets", 87 | type=str, 88 | default=globals.DATASET_CACHE_DIR, 89 | ) 90 | 91 | # list of encoders to run hyperopt search over : 92 | # default is 23 ludwig encoders 93 | parser.add_argument( 94 | "-mel", 95 | "--custom_model_list", 96 | help="list of encoders to run hyperopt experiments on. \ 97 | The default setting is to use all 23 Ludwig encoders", 98 | nargs="+", 99 | choices=[ 100 | "all", 101 | "bert", 102 | "rnn", 103 | "stacked_parallel_cnn", 104 | "roberta", 105 | "distilbert", 106 | "electra", 107 | "resnet", 108 | "stacked_cnn", 109 | "t5", 110 | ], 111 | default="all", 112 | ) 113 | 114 | parser.add_argument( 115 | "-topn", 116 | "--top_n_trials", 117 | help="top n trials to save model performance for.", 118 | type=int, 119 | default=None, 120 | ) 121 | 122 | parser.add_argument( 123 | "-reproduce", 124 | "--experiment_to_reproduce", 125 | help="path to LBT experiment config to reproduce and experiment", 126 | type=str, 127 | default=None, 128 | ) 129 | 130 | args = parser.parse_args() 131 | set_globals(args) 132 | 133 | data_file_paths = download_data(args.dataset_cache_dir, args.datasets) 134 | logging.info("Datasets succesfully downloaded...") 135 | 136 | config_files = build_config_files() 137 | logging.info("Experiment configuration files built...") 138 | 139 | elastic_config = None 140 | if args.elasticsearch_config is not None: 141 | elastic_config = load_yaml(args.elasticsearch_config) 142 | 143 | experiment_config = None 144 | if args.experiment_to_reproduce is not None: 145 | experiment_config = load_yaml(args.experiment_to_reproduce) 146 | 147 | if args.run_environment == "gcp": 148 | ray.init(address="auto") 149 | 150 | if experiment_config: 151 | reproduce_experiment( 152 | model=args.custom_model_list[0], 153 | dataset=args.datasets[0], 154 | data_file_paths=data_file_paths, 155 | experiment_to_replicate=args.experiment_to_reproduce, 156 | run_environment=args.run_environment, 157 | ) 158 | else: 159 | run_experiments( 160 | data_file_paths, 161 | config_files, 162 | top_n_trials=args.top_n_trials, 163 | elastic_config=elastic_config, 164 | run_environment=args.run_environment, 165 | resume_existing_exp=args.resume_existing_exp, 166 | ) 167 | 168 | 169 | if __name__ == "__main__": 170 | main() 171 | -------------------------------------------------------------------------------- /globals.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | PATH_HERE = os.path.abspath(os.path.dirname(__file__)) 4 | ENCODER_CONFIG_DIR = os.path.join(PATH_HERE, "model-configs") 5 | # EXPERIMENT_CONFIGS_DIR = '/experiments/ludwig-bench-textclassification/experiment-configs' 6 | EXPERIMENT_CONFIGS_DIR = os.path.join(PATH_HERE, "hyperopt-experiment-configs") 7 | DATASET_CACHE_DIR = os.path.join(PATH_HERE,"datasets") 8 | ENERGY_LOGGING_DIR = os.path.join(PATH_HERE, "energy_logging") 9 | 10 | ENCODER_HYPEROPT_FILENAMES = { 11 | "bert": "bert_hyperopt.yaml", 12 | "rnn": "rnn_hyperopt.yaml", 13 | "distilbert": "distilbert_hyperopt.yaml", 14 | "electra": "electra_hyperopt.yaml", 15 | "roberta": "roberta_hyperopt.yaml", 16 | "stacked_parallel_cnn": "stackedparallelcnn_hyperopt.yaml", 17 | "t5": "t5_hyperopt.yaml", 18 | "resnet" : "resnet_hyperopt.yaml", 19 | "stacked_cnn" : "stackedcnn_hyperopt.yaml" 20 | } 21 | 22 | ENCODER_FILE_LIST = ENCODER_HYPEROPT_FILENAMES.values() 23 | DATASETS_LIST = None 24 | 25 | CONFIG_TEMPLATE_FILE = "./experiment-templates/task_template.yaml" 26 | DATASET_METADATA_FILE = "./experiment-templates/dataset_metadata.yaml" 27 | HYPEROPT_CONFIG_FILE = "./experiment-templates/hyperopt_config.yaml" 28 | EXPERIMENT_OUTPUT_DIR = "./experiment-outputs" 29 | 30 | PATH_TO_PRETRAINED_EMBEDDINGS = None 31 | 32 | RUNTIME_ENV = "local" 33 | -------------------------------------------------------------------------------- /lbt/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.3.0.post1" 2 | -------------------------------------------------------------------------------- /lbt/build_def_files.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pdb 4 | 5 | from copy import deepcopy 6 | 7 | import yaml 8 | 9 | import globals 10 | from globals import * 11 | from lbt.utils.experiment_utils import load_yaml 12 | 13 | template = load_yaml(CONFIG_TEMPLATE_FILE) 14 | dataset_metadata = load_yaml(DATASET_METADATA_FILE) 15 | hyperopt_config = load_yaml(HYPEROPT_CONFIG_FILE) 16 | 17 | 18 | def insert_global_vars(config): 19 | """ replace global variable placeholders with respective values """ 20 | for key, value in config.items(): 21 | if type(value) != dict and value in vars(globals): 22 | config[key] = getattr(globals, value) 23 | 24 | 25 | def build_config_files(): 26 | config_fps = {} 27 | config = deepcopy(template) 28 | 29 | encoder_hyperopt_vals = [] 30 | # select relevant encoders 31 | for encoder_filename in globals.ENCODER_FILE_LIST: 32 | with open(os.path.join(ENCODER_CONFIG_DIR, encoder_filename)) as f: 33 | encoder_hyperopt_params = yaml.load(f, Loader=yaml.SafeLoader) 34 | encoder_hyperopt_vals.append(encoder_hyperopt_params) 35 | 36 | # select relevant datasets 37 | selected_datasets = {} 38 | for dataset_name in globals.DATASETS_LIST: 39 | if dataset_name in dataset_metadata.keys(): 40 | selected_datasets[dataset_name] = dataset_metadata[dataset_name] 41 | else: 42 | raise ValueError( 43 | "The dataset you provided is not available." 44 | "Please see list of available datasets here: " 45 | "python experiment_drivery.py --h" 46 | ) 47 | 48 | config["hyperopt"].update(hyperopt_config) 49 | 50 | for dataset, metadata in selected_datasets.items(): 51 | # each dataset will have a model specific config file 52 | config_fps[dataset] = [] 53 | 54 | for idx, input_feature_name in enumerate(metadata["input_features"]): 55 | ipt_feat = deepcopy(config["input_features"][0]) 56 | ipt_feat["name"] = input_feature_name["name"] 57 | ipt_feat["type"] = input_feature_name["type"] 58 | if idx == 0: 59 | config["input_features"] = [ipt_feat] 60 | else: 61 | config["input_features"].append(ipt_feat) 62 | for idx, output_feature_info in enumerate(metadata["output_features"]): 63 | out_feat = deepcopy(config["output_features"][0]) 64 | out_feat["name"] = output_feature_info["name"] 65 | out_feat["type"] = output_feature_info["type"] 66 | if idx == 0: 67 | config["output_features"] = [out_feat] 68 | else: 69 | config["output_features"].append(out_feat) 70 | 71 | if len(metadata["output_features"]) > 1: 72 | config["hyperopt"]["output_feature"] = "combined" 73 | else: 74 | config["hyperopt"]["output_feature"] = metadata["output_features"][ 75 | 0 76 | ]["name"] 77 | 78 | input_feature_names = metadata["input_features"] 79 | output_feature_names = metadata["output_features"] 80 | 81 | for encoder_hyperopt_params in encoder_hyperopt_vals: 82 | curr_config = deepcopy(config) 83 | encoder_name = encoder_hyperopt_params["parameters"][ 84 | "input_features.name.encoder" 85 | ] 86 | 87 | # update input and output parameters (not preprocessing) 88 | for idx in range(len(curr_config["input_features"])): 89 | curr_config["input_features"][idx].update( 90 | encoder_hyperopt_params["input_features"][idx] 91 | ) 92 | insert_global_vars(curr_config["input_features"][idx]) 93 | 94 | for idx in range(len(curr_config["output_features"])): 95 | if "output_features" in encoder_hyperopt_params.keys(): 96 | curr_config["output_features"][idx].update( 97 | encoder_hyperopt_params["output_features"][idx] 98 | ) 99 | insert_global_vars(curr_config["output_features"][idx]) 100 | 101 | # handle encoder specific preprocessing 102 | for idx in range(len(curr_config["input_features"])): 103 | try: 104 | preprocessing = curr_config["input_features"][idx][ 105 | "preprocessing" 106 | ] 107 | for key, _ in preprocessing.items(): 108 | preprocessing[key] = encoder_hyperopt_params[ 109 | "input_features" 110 | ][idx]["preprocessing"][key] 111 | 112 | except: 113 | pass #no preprocessing param 114 | # handle encoder specific training params 115 | if "training" in encoder_hyperopt_params.keys(): 116 | curr_config["training"].update( 117 | encoder_hyperopt_params["training"] 118 | ) 119 | 120 | def input_or_output_feature(param_key): 121 | if param_key.split(".")[0] == "input_features": 122 | return True 123 | return False 124 | 125 | # handle encoder specific hyperopt 126 | input_encoder_hyperopt_params = { 127 | "parameters": { 128 | input_feat["name"] + "." + key.split(".")[-1]: value 129 | for input_feat in input_feature_names 130 | for key, value in encoder_hyperopt_params[ 131 | "parameters" 132 | ].items() 133 | if key.split(".")[-1] != "encoder" 134 | and input_or_output_feature(key) 135 | } 136 | } 137 | 138 | # handle encoder specific hyperopt 139 | output_encoder_hyperopt_params = { 140 | "parameters": { 141 | output_feat["name"] + "." + key.split(".")[-1]: value 142 | for output_feat in output_feature_names 143 | for key, value in encoder_hyperopt_params[ 144 | "parameters" 145 | ].items() 146 | if key.split(".")[-1] != "encoder" 147 | and not input_or_output_feature(key) 148 | } 149 | } 150 | 151 | ds_encoder_hyperopt_params = { 152 | "parameters": { 153 | **output_encoder_hyperopt_params["parameters"], 154 | **input_encoder_hyperopt_params["parameters"], 155 | } 156 | } 157 | curr_config["input_features"][0]["encoder"] = encoder_name 158 | 159 | # populate hyperopt parameters w/encoder specific settings 160 | curr_config["hyperopt"].update( 161 | { 162 | "parameters": { 163 | **ds_encoder_hyperopt_params["parameters"], 164 | **hyperopt_config["parameters"], 165 | } 166 | } 167 | ) 168 | 169 | config_fp = os.path.join( 170 | EXPERIMENT_CONFIGS_DIR, f"config_{dataset}_{encoder_name}.yaml" 171 | ) 172 | with open(config_fp, "w") as f: 173 | yaml.dump(curr_config, f) 174 | 175 | config_fps[dataset].append(config_fp) 176 | 177 | return config_fps 178 | -------------------------------------------------------------------------------- /lbt/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import inspect 3 | 4 | from lbt.datasets.base_dataset import LBTDataset 5 | from ludwig.datasets.base_dataset import BaseDataset 6 | 7 | DATASET_REGISTRY = {} 8 | 9 | 10 | def register_dataset(name): 11 | """ 12 | New dataset types can be added to LBT with the `register_dataset` 13 | function decorator. 14 | : 15 | @register_dataset('personal_dataset') 16 | class PersonalDataset(): 17 | (...) 18 | Args: 19 | name (str): the name of the dataset 20 | """ 21 | 22 | def register_dataset_cls(cls): 23 | if not issubclass(cls, LBTDataset): 24 | raise ValueError( 25 | "Dataset ({}: {}) must extend lbt.base_datast.LBTDataset".format( 26 | name, cls.__name__ 27 | ) 28 | ) 29 | DATASET_REGISTRY[name] = cls 30 | return cls 31 | 32 | return register_dataset_cls 33 | 34 | 35 | def build_dataset(dataset_name: str, cache_dir: str, **kwargs): 36 | if dataset_name not in DATASET_REGISTRY: 37 | if dataset_name in PRE_BUILT_DATASETS: 38 | importlib.import_module(PRE_BUILT_DATASETS[dataset_name]) 39 | else: 40 | raise ValueError( 41 | "Dataset ({}) is not supported by LBT".format(dataset_name) 42 | ) 43 | exit(1) 44 | 45 | dataset = DATASET_REGISTRY[dataset_name](cache_dir=cache_dir, **kwargs) 46 | dataset.load() 47 | return dataset 48 | 49 | 50 | PRE_BUILT_DATASETS = { 51 | "AGNews": "ludwig.datasets.agnews", 52 | "SST5": "ludwig.datasets.sst5", 53 | "GoEmotions": "ludwig.datasets.goemotions", 54 | "Fever": "ludwig.datasets.fever", 55 | "SST2": "ludwig.datasets.sst2", 56 | "EthosBinary": "ludwig.datasets.ethos_binary", 57 | "YelpPolarity": "ludwig.datasets.yelp_review_polarity", 58 | "DBPedia": "ludwig.datasets.dbpedia", 59 | "Irony": "ludwig.datasets.irony", 60 | "YelpReviews": "ludwig.datasets.yelp_reviews", 61 | "YahooAnswers": "ludwig.datasets.yahoo_answers", 62 | "AmazonPolarity": "ludwig.datasets.amazon_review_polarity", 63 | "AmazonReviews": "ludwig.datasets.amazon_reviews", 64 | "HateSpeech": "ludwig.datasets.hate_speech", 65 | "MDGenderBias": "ludwig.datasets.md_gender_bias", 66 | "toyAGNews": "lbt.datasets.toy_datasets", 67 | "Mnist" : "ludwig.datasets.mnist", 68 | "CIFAR10" : "ludwig.datasets.cifar10", 69 | } 70 | 71 | # TODO: ASN -> CHECK PLACEMENT 72 | for dataset_name, module_path in PRE_BUILT_DATASETS.items(): 73 | module = importlib.import_module(module_path) 74 | for obj in dir(module): 75 | if obj != "BaseDataset" and inspect.isclass(getattr(module, obj)): 76 | if issubclass(getattr(module, obj), BaseDataset): 77 | DATASET_REGISTRY[dataset_name] = getattr(module, obj) 78 | -------------------------------------------------------------------------------- /lbt/datasets/base_dataset.py: -------------------------------------------------------------------------------- 1 | from ludwig.datasets.base_dataset import BaseDataset, DEFAULT_CACHE_LOCATION 2 | import abc 3 | import pandas as pd 4 | 5 | 6 | class LBTDataset(BaseDataset): 7 | """Base LBT Dataset -- subclass wrapper around Ludwig data class""" 8 | 9 | def __init__(self, dataset_name, processed_file_name, cache_dir): 10 | self.name = dataset_name 11 | self.config = {"csv_filename": processed_file_name} 12 | self.cache_dir = cache_dir 13 | 14 | @abc.abstractmethod 15 | def download(self) -> None: 16 | """ Download the file from config url that represents the raw unprocessed training data.""" 17 | raise NotImplementedError() 18 | 19 | @abc.abstractmethod 20 | def process(self) -> None: 21 | """Process the dataset to get it ready to be plugged into a dataframe. 22 | Converts into a format to be used by the ludwig training API. To do this we create 23 | a new dictionary that contains the KV pairs in the format that we need. 24 | """ 25 | raise NotImplementedError() 26 | 27 | @abc.abstractmethod 28 | def load(self) -> pd.DataFrame: 29 | """ Load the processed data into a Pandas DataFrame """ 30 | raise NotImplementedError() 31 | 32 | @property 33 | def processed_dataset_path(self) -> str: 34 | """ Return path of the processed dataset """ 35 | raise NotImplementedError() 36 | 37 | def __repr__(self): 38 | return "{}()".format(self.name) 39 | -------------------------------------------------------------------------------- /lbt/datasets/toy-datasets/fever.csv: -------------------------------------------------------------------------------- 1 | ,id,verifiable,label,claim,evidence,split 2 | 155448,113501,NOT VERIFIABLE,NOT ENOUGH INFO,Grease had bad reviews.,"[[[133128, None, None, None]]]",1 3 | 155449,163803,VERIFIABLE,SUPPORTS,Ukrainian Soviet Socialist Republic was a founding participant of the UN.,"[[[296950, 288668, 'Ukrainian_Soviet_Socialist_Republic', 7]], [[298602, 290067, 'Ukrainian_Soviet_Socialist_Republic', 7], [298602, 290067, 'United_Nations', 0]], [[300696, 291816, 'Ukrainian_Soviet_Socialist_Republic', 7]], [[344347, 327887, 'Ukrainian_Soviet_Socialist_Republic', 7]], [[344994, 328433, 'Ukrainian_Soviet_Socialist_Republic', 7]], [[344997, 328435, 'Ukrainian_Soviet_Socialist_Republic', 7]]]",1 4 | 155450,70041,VERIFIABLE,SUPPORTS,2 Hearts is a musical composition by Minogue.,"[[[225394, 230056, '2_Hearts_-LRB-Kylie_Minogue_song-RRB-', 0]], [[317953, 306972, '2_Hearts_-LRB-Kylie_Minogue_song-RRB-', 0]], [[319638, 308345, '2_Hearts_-LRB-Kylie_Minogue_song-RRB-', 0]], [[319643, 308348, '2_Hearts_-LRB-Kylie_Minogue_song-RRB-', 0]]]",1 5 | 155451,202314,VERIFIABLE,REFUTES,The New Jersey Turnpike has zero shoulders.,"[[[238335, 240393, 'New_Jersey_Turnpike', 15]]]",1 6 | 155452,57085,NOT VERIFIABLE,NOT ENOUGH INFO,Legendary Entertainment is the owner of Wanda Cinemas.,"[[[178035, None, None, None], [182093, None, None, None], [314120, None, None, None], [314126, None, None, None], [314131, None, None, None]]]",1 7 | 155453,6032,VERIFIABLE,REFUTES,Aruba is the only ABC Island.,"[[[22769, 28071, 'ABC_islands_-LRB-Lesser_Antilles-RRB-', 0]], [[22769, 28072, 'ABC_islands_-LRB-Lesser_Antilles-RRB-', 1]]]",1 8 | 155454,176630,NOT VERIFIABLE,NOT ENOUGH INFO,Great white sharks do not prefer dolphins as prey.,"[[[204612, None, None, None]]]",1 9 | 155455,130048,VERIFIABLE,REFUTES,"Burbank, California has always been completely void of industry.","[[[152264, 167060, 'Burbank,_California', 7]]]",1 10 | 155456,100046,NOT VERIFIABLE,NOT ENOUGH INFO,The Guthrie Theater's second building began operating in 1963.,"[[[117690, None, None, None]]]",1 11 | 155457,204575,VERIFIABLE,REFUTES,Commodore is ranked above a rear admiral.,"[[[241594, 243126, 'Commodore_-LRB-rank-RRB-', 0]], [[241594, 243127, 'Commodore_-LRB-rank-RRB-', 9], [241594, 243127, 'Rear_admiral', 0]]]",1 12 | 0,75397,VERIFIABLE,SUPPORTS,Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.,"[[[92206, 104971, 'Nikolaj_Coster-Waldau', 7], [92206, 104971, 'Fox_Broadcasting_Company', 0]]]",0 13 | 1,150448,VERIFIABLE,SUPPORTS,Roman Atwood is a content creator.,"[[[174271, 187498, 'Roman_Atwood', 1]], [[174271, 187499, 'Roman_Atwood', 3]]]",0 14 | 2,214861,VERIFIABLE,SUPPORTS,"History of art includes architecture, dance, sculpture, music, painting, poetry literature, theatre, narrative, film, photography and graphic arts.","[[[255136, 254645, 'History_of_art', 2]]]",0 15 | 3,156709,VERIFIABLE,REFUTES,Adrienne Bailon is an accountant.,"[[[180804, 193183, 'Adrienne_Bailon', 0]]]",0 16 | 4,83235,NOT VERIFIABLE,NOT ENOUGH INFO,System of a Down briefly disbanded in limbo.,"[[[100277, None, None, None]]]",0 17 | 5,129629,VERIFIABLE,SUPPORTS,Homeland is an American television spy thriller based on the Israeli television series Prisoners of War.,"[[[151831, 166598, 'Homeland_-LRB-TV_series-RRB-', 0], [151831, 166598, 'Prisoners_of_War_-LRB-TV_series-RRB-', 0]]]",0 18 | 6,149579,NOT VERIFIABLE,NOT ENOUGH INFO,Beautiful reached number two on the Billboard Hot 100 in 2003.,"[[[173384, None, None, None]]]",0 19 | 7,229289,NOT VERIFIABLE,NOT ENOUGH INFO,Neal Schon was named in 1954.,"[[[273626, None, None, None]]]",0 20 | 8,33078,VERIFIABLE,SUPPORTS,The Boston Celtics play their home games at TD Garden.,"[[[49158, 58489, 'Boston_Celtics', 3]], [[49159, 58490, 'Boston_Celtics', 3]]]",0 21 | 9,6744,VERIFIABLE,SUPPORTS,The Ten Commandments is an epic film.,"[[[23513, 28977, 'The_Ten_Commandments_-LRB-1956_film-RRB-', 0]], [[23513, 28978, 'The_Ten_Commandments_-LRB-1956_film-RRB-', 20]]]",0 22 | 145449,91198,NOT VERIFIABLE,NOT ENOUGH INFO,Colin Kaepernick became a starting quarterback during the 49ers 63rd season in the National Football League.,"[[[108548, None, None, None]]]",2 23 | 145450,194462,NOT VERIFIABLE,NOT ENOUGH INFO,Tilda Swinton is a vegan.,"[[[227768, None, None, None]]]",2 24 | 145451,137334,VERIFIABLE,SUPPORTS,Fox 2000 Pictures released the film Soul Food.,"[[[289914, 283015, 'Soul_Food_-LRB-film-RRB-', 0]], [[291259, 284217, 'Soul_Food_-LRB-film-RRB-', 0]], [[293412, 285960, 'Soul_Food_-LRB-film-RRB-', 0]], [[337212, 322620, 'Soul_Food_-LRB-film-RRB-', 0]], [[337214, 322622, 'Soul_Food_-LRB-film-RRB-', 0]]]",2 25 | 145452,166626,NOT VERIFIABLE,NOT ENOUGH INFO,Anne Rice was born in New Jersey.,"[[[191656, None, None, None], [191657, None, None, None]]]",2 26 | 145453,111897,VERIFIABLE,REFUTES,Telemundo is a English-language television network.,"[[[131371, 146144, 'Telemundo', 0]], [[131371, 146148, 'Telemundo', 1]], [[131371, 146150, 'Telemundo', 4], [131371, 146150, 'Hispanic_and_Latino_Americans', 0]], [[131371, 146151, 'Telemundo', 5]]]",2 27 | 145454,89891,VERIFIABLE,REFUTES,Damon Albarn's debut album was released in 2011.,"[[[107201, 120581, 'Damon_Albarn', 17]]]",2 28 | 145455,181634,VERIFIABLE,SUPPORTS,There is a capital called Mogadishu.,"[[[210946, 218608, 'Mogadishu', 0]]]",2 29 | 145456,219028,VERIFIABLE,REFUTES,Savages was exclusively a German film.,"[[[260471, 258880, 'Savages_-LRB-2012_film-RRB-', 3]], [[260473, 258882, 'Savages_-LRB-2012_film-RRB-', 3]]]",2 30 | 145457,194372,NOT VERIFIABLE,NOT ENOUGH INFO,Happiness in Slavery is a gospel song by Nine Inch Nails.,"[[[227658, None, None, None]]]",2 31 | 145458,108281,VERIFIABLE,REFUTES,Andrew Kevin Walker is only Chinese.,"[[[127089, 141573, 'Andrew_Kevin_Walker', 0]]]",2 32 | -------------------------------------------------------------------------------- /lbt/datasets/toy-datasets/goemotions.csv: -------------------------------------------------------------------------------- 1 | ,text,emotion_ids,comment_id,split 2 | 48837,Is this in New Orleans?? I really feel like this is New Orleans.,27,edgurhb,1 3 | 48838,"You know the answer man, you are programmed to capture those codes they send you, don’t avoid them!",4 27,ee84bjg,1 4 | 48839,I've never been this sad in my life!,25,edcu99z,1 5 | 48840,"The economy is heavily controlled and subsidized by the government. In any case, I was poking at the lack of nuance in US politics today",4 27,edc32e2,1 6 | 48841,He could have easily taken a real camera from a legitimate source and change the price in Word/Photoshop and then print it out.,20,eepig6r,1 7 | 48842,"Thank you for your vote of confidence, but we statistically can't get to 10 wins.",15,eczm50f,1 8 | 48843,"Wah Mum other people call me on my bullshit and I can't ban them , Go out side son.",2,ed4yr9r,1 9 | 48844,There it is!,27,ede4v0m,1 10 | 48845,At least now [NAME] has more time to gain his confidence,20,eekez9p,1 11 | 48846,Good. We don't want more thrash liberal offspring in this world.,10,ee0fxpu,1 12 | 0,My favourite food is anything I didn't have to cook myself.,27,eebbqej,0 13 | 1,"Now if he does off himself, everyone will think hes having a laugh screwing with people instead of actually dead",27,ed00q6i,0 14 | 2,WHY THE FUCK IS BAYLESS ISOING,2,eezlygj,0 15 | 3,To make her feel threatened,14,ed7ypvh,0 16 | 4,Dirty Southern Wankers,3,ed0bdzj,0 17 | 5,OmG pEyToN iSn'T gOoD eNoUgH tO hElP uS iN tHe PlAyOfFs! Dumbass Broncos fans circa December 2015.,26,edvnz26,0 18 | 6,Yes I heard abt the f bombs! That has to be why. Thanks for your reply:) until then hubby and I will anxiously wait 😝,15,ee3b6wu,0 19 | 7,We need more boards and to create a bit more space for [NAME]. Then we’ll be good.,8 20,ef4qmod,0 20 | 8,Damn youtube and outrage drama is super lucrative for reddit,0,ed8wbdn,0 21 | 9,It might be linked to the trust factor of your friend.,27,eczgv1o,0 22 | 43410,"I’m really sorry about your situation :( Although I love the names Sapphira, Cirilla, and Scarlett!",25,eecwqtt,2 23 | 43411,It's wonderful because it's awful. At not with.,0,ed5f85d,2 24 | 43412,"Kings fan here, good luck to you guys! Will be an interesting game to watch! ",13,een27c3,2 25 | 43413,"I didn't know that, thank you for teaching me something today!",15,eelgwd1,2 26 | 43414,They got bored from haunting earth for thousands of years and ultimately moved on to the afterlife.,27,eem5uti,2 27 | 43415,Thank you for asking questions and recognizing that there may be things that you don’t know or understand about police tactics. Seriously. Thank you.,15,ef2nq7i,2 28 | 43416,You’re welcome,15,efdbh17,2 29 | 43417,100%! Congrats on your job too!,15,ef0ec3b,2 30 | 43418,I’m sorry to hear that friend :(. It’s for the best most likely if she didn’t accept you for who you are,24,ee8utmi,2 31 | 43419,"Girlfriend weak as well, that jump was pathetic.",25,eeni74k,2 32 | -------------------------------------------------------------------------------- /lbt/datasets/toy-datasets/toy_agnews.csv: -------------------------------------------------------------------------------- 1 | ,class_index,title,description,split 2 | 80453,1,Guarding of Ukraine central election commission relieved,"KIEV, November 3 (Itar-Tass) - Water cannons and armoured personnel carriers were removed from the territory of the Ukrainian central election commission on Wednesday.",0 3 | 59399,4,FDA Approves Use of Chip in Patients (AP),"AP - The Food and Drug Administration on Wednesday approved an implantable computer chip that can pass a patient's medical details to doctors, speeding care.",0 4 | 97947,3,Chiefs agree on oil prices,THE Group of 20 finance chiefs agreed yesterday that quot;abrupt changes quot; in foreign exchange rates and oil prices were unwelcome but showed no appetite for intervention to strengthen the US currency.,0 5 | 53073,4,Love DRM or my family starves: why Steve Ballmer doesn #39;t Get It,"Last Sunday Microsoft CEO Steve Ballmer kicked off this week #39;s European tour by sitting down with a small group* of British journalists and dispensing pearls of wisdom, notably on the future of Apple in home networking (it has none, natch, says Steve).",0 6 | 16329,4,Video Game Sales Seen Pausing After Record 2003,"Video game sales soared to a record \$18.2 billion last year, but the days of strong growth are on pause as players await a new generation of consoles.",0 7 | 85504,3,Google Blazes Lonely IPO Trail,"Few experts who tried to predict how Google's much-antipated IPO would perform managed to get it right, and no firms have opted to follow its auction approach. By Joanna Glasner.",1 8 | 86595,1,"Confusion Over Arafat, Palestinians Prepare Burial (Reuters)","Reuters - Palestinians looked to their leadership\to lift confusion over Yasser Arafat's fate in a French\hospital on Wednesday as preparations gathered pace to bury the\icon of their fight for statehood. Arafat, 75, suffered a brain\hemorrhage on Tuesday at the hospital where he was flown from\the West Bank on Oct. 29 and had lain in a coma. Officials\insisted in public that he was alive, though aides said\privately that he was dead.",0 9 | 104152,4,InfoWorld Announces 2005 Technology of the Year Awards,Apple Xserve G5 won InfoWorld #146;s #147;Best Server Hardware #148; award and Mac OS X v10.3 Panther won the #147;Best Operating System #148; award. Jan 03,1 10 | 17522,4,Retailer to follow RFID test with full rollout,Germany's Metro Group tested radio tag inventory control for a year and found it good enough to deploy--but not perfect.,0 11 | 104930,3,Asian Stocks Rise for Fourth Day in US Trading; Sony Climbs,Asian stocks rose in US trading. Sony Corp. gained after the world #39;s No. 2 consumer-electronics maker agreed to resolve a dispute over patents related to digital cameras.,1 12 | 20654,4,Toxic waste ship sinks off Turkey,A ship containing toxic power station waste has sunk after being moored in a Turkish harbour for four years. The Ulla had sat in the port of Iskenderun in south-eastern Turkey amid confusion over the ship #39;s future.,1 13 | 66666,1,"Cabinet set to agree troop movement, but delay likely",The cabinet is expected to agree in principle today to a request from George Bush #39;s military commanders for British troops to be redeployed to the so-called quot;triangle of death quot; near Baghdad.,1 14 | 58164,4,Virgin to Unveil Portable Music Player,"The consumer electronics arm of the Virgin Group is introducing a new 5-gigabyte hard-disk portable music player, bringing a powerful brand name in music to the increasingly crowded product space. <FONT face=""verdana,MS Sans Serif,arial,helvetica"" size=""-2""\ color=""#666666""><B>-The Associated Press</B></FONT>",1 15 | 41068,4,AT T Wireless unveils messaging-only handheld,"The company says the device, dubbed the Ogo, won't be bogged down by features that are ""hardly used.""",0 16 | 84690,4,Online spammer free on bond,"LEESBURG, Va. A North Carolina man convicted of illegally sending (m) millions of pieces of junk e-mail to America Online subscribers was granted bond today while he awaits sentencing.",0 17 | 79395,4,"Intel pushes Pentium 4 bus to 1,066-MHz for gamers","SAN JOSE, Calif. - Intel Corp. (Santa Clara, Calif.) on Monday (Nov. 1) rolled out a new Pentium 4 processor and chip set for PC gamers, based on a 1,066-MHz front-side bus technology.",0 18 | 12514,4,Intel #39;s #39;BTX #39; Motherboard Debuts on Gateway #39;s PC,"Intel #39;s (Quote, Chart) next-generation motherboard design has made its debut in a new Gateway (Quote, Chart) desktop, the company said today.",0 19 | 47284,1,Bush and Kerry Follow Debate With Sharp Jabs,"President Bush attacked John Kerry on Iraq, taking a more aggressive approach than he had during the debate.",0 20 | 164,4,Insecurity: (Or Why Americans Aren't Feeling The Recovery),"The New Republic's website is currently carrying an interesting piece which tries to explain the anomaly that although the US economy is growing, a lot of its citizens are still feeling worse off. The article explains the results of a 40 year panel study which has shown that although mean incomes have increased, income variability has increased massively, causing many Americans to feel less well off, despite the growing economy. ",0 21 | 96209,3,Stocks Fall on Greenspan Deficit Warning,Stocks fell modestly Friday as Federal Reserve Chairman Alan Greenspan sounded a warning over the nation #39;s spiraling trade deficit.,0 22 | 45653,1,House Defeats Gay Marriage Ban Amendment,"WASHINGTON - The Republican-controlled House emphatically defeated a constitutional amendment banning gay marriage Thursday, the latest in a string of conservative pet causes pushed to a vote by GOP leaders in the run-up to Election Day. The vote was 227-186, far short of the two-thirds needed for approval on a measure that President Bush backed but the Senate had previously rejected...",0 23 | 94095,2,"Garcia, Jimenez Favorites at Home for Cup (AP)","AP - Chomping on a cigar, his unruly ponytail flowing from his cap, Miguel Angel Jimenez smiled as he said, ""Buenos dias"" to the clubhouse guard at the Real Club de Golf.",0 24 | 102756,3,FCC is watching SBC #39;s VoIP charge,WASHINGTON--The Federal Communications Commission is keeping and eye on SBC Communications #39; new connection charge for calls made over the Internet.,0 25 | 4200,4,Martian hill shows signs of ancient water,"LOS ANGELES - NASA #39;s Spirit rover has found more evidence of past water on the hills of Mars, while its twin, Opportunity, has observed a field of dunes inside a crater. ",0 26 | 88362,2,Baseball GMs Split on Instant Replay (AP),"AP - Upon further review, baseball will hold off on taking a look at instant replay. After watching umpires reverse almost every missed call in the postseason, major league general managers split 15-15 Thursday on whether to keep exploring the subject.",0 27 | 68015,3,Briefly: EU #39;s Mercosur talks extended,"The European Union and the Mercosur group of South American economies agreed Thursday to let negotiations on opening their markets extend into next year, missing a month-end deadline to build the world #39;s largest trade area.",0 28 | 97817,2,Malice at the Palace - the last thing the NBA needed for their <b>...</b>,"Take your pick of culprits in the stunning melee Friday night at The Palace of Auburn Hills, in a heated game between Central Division rivals: the defending NBA champion Detroit Pistons and Indiana Pacers.",0 29 | 34312,2,Soccer Legend Maradona Arrives in Cuba (AP),AP - Former soccer great Diego Maradona returned to Cuba on Monday to resume treatment for cocaine addiction after a relapse confined him to a psychiatric hospital in his native Argentina and sparked unsuccessful attempts by his family to keep him at home.,0 30 | 55160,2,No. 24 LSU at No. 12 Florida,Coaches: Ron Zook is 19-11 in his third year at UF; Nick Saban is 42-15 in his fifth year at LSU and 85-41-1 in his 11th year overall.,0 31 | 35819,4,"Hello, halitosis","ZDNet #39;s survey of IT professionals in August kept Wired amp; Wireless on top for the 18th month in a row. Siemens Mobile is developing the first mobile phone that will alert people when their breath stinks, the company said Tuesday.",0 32 | 93263,4,"More Than 15,000 Species Said to Be Facing Extinction (Reuters)","Reuters - More than 15,000 species, from sharks\to frogs to fir trees, are facing extinction and the total is\rising faster than ever before, conservationists and scientists\said Wednesday.",0 33 | 17850,4,Archaeologists Discover Tomb Near Egypt's Pyramids (Reuters),"Reuters - Archaeologists have found a\2,500-year-old tomb near Egypt's ancient pyramids in Giza, the\head of the excavation team told Reuters on Thursday.",0 34 | 37562,4,Sony to support MP3 in future players,Sony #39;s apparently gotten the message: Some of its new MP3 player models will feature direct support for MP3 in addition to its proprietary ATRAC format.,0 35 | 70546,3,US Airways #39; pilots vote for 18 salary reduction,US Airways #39; pilots voted to approve a new labor agreement yesterday that will reduce their salaries by 18 percent but will save the airline \$300 million a year.,0 36 | 29634,4,Triumphant return of the big <cite>Reg</cite> logo t-shirt,"<strong>Cash'n'Carrion</strong> Cue trumpets, etc",0 37 | 9206,3,HHG buoyed by return to profits,British insurer and fund manager HHG was back in the black at the half-year stage today as it recovered from losses of 902m (1.,0 38 | 125556,4,Outsourcing to Arkansas,A new kid on the block promises to give offshore outsourcing a run for its money--by routing technology work to rural America. Outsourcing Blog,2 39 | 122189,4,Noah's Ark Quest Dead in Water -- Was It a Stunt?,"In April a Christian activist announced a summer 2004 expedition to search for Noah's ark. The quest didn't happen, and now critics are questioning the project's credibility.",2 40 | -------------------------------------------------------------------------------- /lbt/datasets/toy_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | import pandas as pd 4 | from lbt.datasets import register_dataset 5 | from lbt.datasets.base_dataset import LBTDataset 6 | 7 | 8 | @register_dataset("toy_agnews") 9 | class ToyAGNews(LBTDataset): 10 | def __init__( 11 | self, 12 | dataset_name="toy_agnews", 13 | processed_file_name="toy_agnews.csv", 14 | cache_dir=os.path.join(os.getcwd(), "lbt/datasets/toy-datasets"), 15 | ): 16 | super().__init__( 17 | dataset_name=dataset_name, 18 | processed_file_name=processed_file_name, 19 | cache_dir=os.path.join(os.getcwd(), "lbt/datasets/toy-datasets"), 20 | ) 21 | 22 | def download(self) -> None: 23 | pass 24 | 25 | def process(self) -> None: 26 | pass 27 | 28 | def load(self) -> pd.DataFrame: 29 | toy_agnews_ds = pd.read_csv( 30 | os.path.join(self.cache_dir, self.config["csv_filename"]) 31 | ) 32 | return toy_agnews_ds 33 | 34 | @property 35 | def processed_dataset_path(self): 36 | return self.cache_dir 37 | -------------------------------------------------------------------------------- /lbt/experiments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import logging 4 | import os 5 | import pickle 6 | import socket 7 | from typing import Union 8 | from collections import defaultdict 9 | 10 | import numpy as np 11 | import ray 12 | 13 | import globals 14 | from .build_def_files import * 15 | from database import save_results_to_es 16 | from ludwig.hyperopt.run import hyperopt 17 | from lbt.utils.experiment_utils import * 18 | from lbt.datasets import DATASET_REGISTRY 19 | 20 | hostname = socket.gethostbyname(socket.gethostname()) 21 | 22 | 23 | def download_data(cache_dir=None, datasets: list = None): 24 | """ Returns files paths for all datasets """ 25 | data_file_paths = {} 26 | for dataset in datasets: 27 | # if dataset in dataset_metadata.keys(): 28 | if dataset in list(DATASET_REGISTRY.keys()): 29 | data_class = dataset_metadata[dataset]["data_class"] 30 | data_path = download_dataset(data_class, cache_dir) 31 | process_dataset(data_path) 32 | data_file_paths[dataset] = data_path 33 | else: 34 | raise ValueError( 35 | f"{dataset} is not a valid dataset." 36 | "for list of valid dataets see: " 37 | "python experiment_driver.py -h" 38 | ) 39 | return data_file_paths 40 | 41 | 42 | def resume_training(model_config: dict, output_dir): 43 | results, metrics, params = collect_completed_trial_results(output_dir) 44 | original_num_samples = model_config["hyperopt"]["sampler"]["num_samples"] 45 | new_num_samples = max(original_num_samples - len(metrics), 0) 46 | model_config["hyperopt"]["sampler"]["search_alg"][ 47 | "points_to_evaluate" 48 | ] = params 49 | model_config["hyperopt"]["sampler"]["search_alg"][ 50 | "evaluated_rewards" 51 | ] = metrics 52 | model_config["hyperopt"]["sampler"]["num_samples"] = new_num_samples 53 | return model_config, results 54 | 55 | 56 | def run_hyperopt_exp( 57 | experiment_attr: dict, 58 | is_resume_training: bool = False, 59 | runtime_env: str = "local", 60 | ) -> int: 61 | 62 | dataset = experiment_attr["dataset"] 63 | encoder = experiment_attr["encoder"] 64 | model_config = experiment_attr["model_config"] 65 | 66 | # the following are temp solutions for issues in Ray 67 | if runtime_env == "local": 68 | # temp solution to ray problems 69 | os.environ["TUNE_PLACEMENT_GROUP_AUTO_DISABLED"] = "1" 70 | os.environ["TUNE_PLACEMENT_GROUP_CLEANUP_DISABLED"] = "1" 71 | 72 | try: 73 | start = datetime.datetime.now() 74 | 75 | tune_executor = model_config["hyperopt"]["executor"]["type"] 76 | 77 | num_gpus = 0 78 | try: 79 | num_gpus = model_config["hyperopt"]["executor"][ 80 | "gpu_resources_per_trial" 81 | ] 82 | except: 83 | pass 84 | 85 | if tune_executor == "ray" and runtime_env == "gcp": 86 | 87 | if ( 88 | "kubernetes_namespace" 89 | not in model_config["hyperopt"]["executor"].keys() 90 | ): 91 | raise ValueError( 92 | "Please specify the kubernetes namespace of the Ray cluster" 93 | ) 94 | 95 | if tune_executor == "ray" and runtime_env == "local": 96 | if ( 97 | "kubernetes_namespace" 98 | in model_config["hyperopt"]["executor"].keys() 99 | ): 100 | raise ValueError( 101 | "You are running locally. " 102 | "Please remove the kubernetes_namespace param in hyperopt_config.yaml" 103 | ) 104 | 105 | gpu_list = None 106 | if tune_executor != "ray": 107 | gpu_list = get_gpu_list() 108 | if len(gpu_list) > 0: 109 | num_gpus = 1 110 | 111 | new_model_config = copy.deepcopy(experiment_attr["model_config"]) 112 | existing_results = None 113 | if is_resume_training: 114 | new_model_config, existing_results = resume_training( 115 | new_model_config, experiment_attr["output_dir"] 116 | ) 117 | 118 | hyperopt_results = hyperopt( 119 | new_model_config, 120 | dataset=experiment_attr["dataset_path"], 121 | model_name=experiment_attr["model_name"], 122 | gpus=gpu_list, 123 | output_directory=experiment_attr["output_dir"], 124 | ) 125 | 126 | if existing_results is not None: 127 | hyperopt_results.extend(existing_results) 128 | hyperopt_results.sort(key=lambda result: result["metric_score"]) 129 | 130 | logging.info( 131 | "time to complete: {}".format(datetime.datetime.now() - start) 132 | ) 133 | 134 | # Save output locally 135 | try: 136 | pickle.dump( 137 | hyperopt_results, 138 | open( 139 | os.path.join( 140 | experiment_attr["output_dir"], 141 | f"{dataset}_{encoder}_hyperopt_results.pkl", 142 | ), 143 | "wb", 144 | ), 145 | ) 146 | except: 147 | pass 148 | 149 | # save lbt output w/additional metrics computed locall 150 | results_w_additional_metrics = compute_additional_metadata( 151 | experiment_attr, hyperopt_results, tune_executor 152 | ) 153 | try: 154 | pickle.dump( 155 | results_w_additional_metrics, 156 | open( 157 | os.path.join( 158 | experiment_attr["output_dir"], 159 | f"{dataset}_{encoder}_hyperopt_results_w_lbt_metrics.pkl", 160 | ), 161 | "wb", 162 | ), 163 | ) 164 | except: 165 | pass 166 | 167 | # create .completed file to indicate that experiment is completed 168 | _ = open( 169 | os.path.join(experiment_attr["output_dir"], ".completed"), "wb" 170 | ) 171 | 172 | logging.info( 173 | "time to complete: {}".format(datetime.datetime.now() - start) 174 | ) 175 | 176 | # save output to db 177 | if experiment_attr["elastic_config"]: 178 | try: 179 | save_results_to_es( 180 | experiment_attr, 181 | hyperopt_results, 182 | tune_executor=tune_executor, 183 | top_n_trials=experiment_attr["top_n_trials"], 184 | runtime_env="local", 185 | num_gpus=num_gpus, 186 | ) 187 | except: 188 | logging.warning("Not all files were uploaded to elastic db!") 189 | return 1 190 | except: 191 | logging.warning("Error running experiment...not completed") 192 | return 0 193 | 194 | 195 | def run_experiments( 196 | data_file_paths: dict, 197 | config_files: dict, 198 | top_n_trials: int, 199 | elastic_config=None, 200 | run_environment: str = "local", 201 | resume_existing_exp: bool = False, 202 | ): 203 | logging.info("Running hyperopt experiments...") 204 | # check if overall experiment has already been run 205 | if os.path.exists( 206 | os.path.join(globals.EXPERIMENT_OUTPUT_DIR, ".completed") 207 | ): 208 | logging.info("Experiment is already completed!") 209 | return 210 | 211 | completed_runs, experiment_queue = [], [] 212 | for dataset_name, file_path in data_file_paths.items(): 213 | logging.info("Dataset: {}".format(dataset_name)) 214 | 215 | for model_config_path in config_files[dataset_name]: 216 | config_name = model_config_path.split("/")[-1].split(".")[0] 217 | dataset = config_name.split("_")[1] 218 | encoder = "_".join(config_name.split("_")[2:]) 219 | experiment_name = dataset + "_" + encoder 220 | 221 | logging.info("Experiment: {}".format(experiment_name)) 222 | 223 | output_dir = os.path.join( 224 | globals.EXPERIMENT_OUTPUT_DIR, experiment_name 225 | ) 226 | 227 | if not os.path.isdir(output_dir): 228 | os.mkdir(output_dir) 229 | 230 | output_dir = os.path.join( 231 | globals.EXPERIMENT_OUTPUT_DIR, experiment_name 232 | ) 233 | 234 | if not os.path.exists(os.path.join(output_dir, ".completed")): 235 | 236 | model_config = load_yaml(model_config_path) 237 | experiment_attr = defaultdict() 238 | experiment_attr = { 239 | "model_config": copy.deepcopy(model_config), 240 | "dataset_path": file_path, 241 | "top_n_trials": top_n_trials, 242 | "model_name": config_name, 243 | "output_dir": output_dir, 244 | "encoder": encoder, 245 | "dataset": dataset, 246 | "elastic_config": elastic_config, 247 | } 248 | if run_environment == "local": 249 | completed_runs.append( 250 | run_hyperopt_exp( 251 | experiment_attr, 252 | resume_existing_exp, 253 | run_environment, 254 | ) 255 | ) 256 | 257 | experiment_queue.append(experiment_attr) 258 | else: 259 | logging.info( 260 | f"The {dataset} x {encoder} exp. has already completed!" 261 | ) 262 | 263 | if run_environment != "local": 264 | completed_runs = ray.get( 265 | [ 266 | ray.remote(num_cpus=0, resources={f"node:{hostname}": 0.001})( 267 | run_hyperopt_exp 268 | ).remote(exp, resume_existing_exp, run_environment) 269 | for exp in experiment_queue 270 | ] 271 | ) 272 | 273 | if len(completed_runs) == len(experiment_queue): 274 | # create .completed file to indicate that entire hyperopt experiment 275 | # is completed 276 | _ = open( 277 | os.path.join(globals.EXPERIMENT_OUTPUT_DIR, ".completed"), "wb" 278 | ) 279 | else: 280 | logging.warning("Not all experiments completed!") 281 | 282 | 283 | def reproduce_experiment( 284 | model, 285 | dataset, 286 | data_file_paths, 287 | elastic_config=None, 288 | experiment_to_replicate=None, 289 | run_environment: str = "local", 290 | ): 291 | experiment_config = load_yaml(experiment_to_replicate) 292 | experiment_name = dataset + "_" + model 293 | for dataset_name, file_path in data_file_paths.items(): 294 | 295 | output_dir = os.path.join( 296 | globals.EXPERIMENT_OUTPUT_DIR, experiment_name 297 | ) 298 | 299 | if not os.path.isdir(output_dir): 300 | os.mkdir(output_dir) 301 | 302 | output_dir = os.path.join( 303 | globals.EXPERIMENT_OUTPUT_DIR, experiment_name 304 | ) 305 | 306 | experiment_attr = defaultdict() 307 | experiment_attr = { 308 | "model_config": experiment_config, 309 | "dataset_path": file_path, 310 | "model_name": model, 311 | "output_dir": output_dir, 312 | "encoder": model, 313 | "dataset": dataset, 314 | "elastic_config": elastic_config, 315 | } 316 | run_hyperopt_exp( 317 | experiment_attr, 318 | False, 319 | run_environment, 320 | ) 321 | 322 | 323 | def experiment( 324 | models: Union[str, list], 325 | datasets: Union[str, list], 326 | experiment_configs_dir: str = globals.EXPERIMENT_CONFIGS_DIR, 327 | experiment_output_dir: str = globals.EXPERIMENT_OUTPUT_DIR, 328 | datasets_cache_dir: str = globals.DATASET_CACHE_DIR, 329 | run_environment: str = "local", 330 | elastic_search_config: str = None, 331 | resume_existing_exp: bool = False, 332 | ): 333 | if isinstance(datasets, str): 334 | datasets = [datasets] 335 | data_file_paths = download_data(datasets_cache_dir, datasets) 336 | 337 | config_files = build_config_files() 338 | elastic_config = None 339 | if elastic_search_config is not None: 340 | elastic_config = load_yaml(elastic_search_config) 341 | 342 | if run_environment == "gcp": 343 | ray.init(address="auto") 344 | 345 | run_experiments( 346 | data_file_paths, 347 | config_files, 348 | top_n_trials=None, 349 | elastic_config=elastic_config, 350 | run_environment=run_environment, 351 | resume_existing_exp=resume_existing_exp, 352 | ) 353 | -------------------------------------------------------------------------------- /lbt/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from lbt.metrics.base_metric import LBTMetric 2 | import ray 3 | import importlib 4 | import sys 5 | import json 6 | import os 7 | 8 | LOCATION = os.path.abspath(os.path.dirname(__file__)) 9 | INSTANCE_PRICES_FILEPATH = os.path.join(LOCATION, "instance_prices.json") 10 | METRIC_REGISTERY = {} 11 | INSTANCE_PRICES = {} 12 | 13 | 14 | def register_metric(name): 15 | """ 16 | New dataset types can be added to LBT with the `register_metric` 17 | function decorator. 18 | : 19 | @register_metric('personal_metric') 20 | class PersonalMetric(): 21 | (...) 22 | Args: 23 | name (str): the name of the dataset 24 | """ 25 | 26 | def register_metric_cls(cls): 27 | if not issubclass(cls, LBTMetric): 28 | raise ValueError( 29 | "Metric ({}: {}) must extend lbt.metrics.base_metric".format( 30 | name, cls.__name__ 31 | ) 32 | ) 33 | METRIC_REGISTERY[name] = cls 34 | return cls 35 | 36 | return register_metric_cls 37 | 38 | 39 | def get_experiment_metadata( 40 | document: dict, 41 | model_path: str, 42 | data_path: str, 43 | run_stats: dict, 44 | train_batch_size: int = 16, 45 | num_gpus=0, 46 | ): 47 | for key, metrics_class in METRIC_REGISTERY.items(): 48 | try: 49 | remote_class = ray.remote(num_cpus=1, num_gpus=num_gpus)( 50 | metrics_class 51 | ).remote() 52 | output = remote_class.run.remote( 53 | model_path=model_path, 54 | dataset_path=data_path, 55 | train_batch_size=train_batch_size, 56 | run_stats=run_stats, 57 | ) 58 | document.update({key: ray.get(output)}) 59 | except: 60 | print(f"FAILURE PROCESSING: {key}") 61 | 62 | 63 | INSTANCE_PRICES = json.load(open(INSTANCE_PRICES_FILEPATH, "rb")) 64 | 65 | PRE_BUILT_METRICS = { 66 | "lbt_metrics": "lbt.metrics.lbt_metrics", 67 | } 68 | 69 | for name, module in PRE_BUILT_METRICS.items(): 70 | if module not in sys.modules: 71 | importlib.import_module("lbt.metrics.lbt_metrics") 72 | -------------------------------------------------------------------------------- /lbt/metrics/base_metric.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from abc import ABC, ABCMeta, abstractmethod 3 | from typing import Tuple, Union 4 | 5 | import pandas as pd 6 | from ludwig.api import LudwigModel 7 | 8 | 9 | class LBTMetric(ABC): 10 | def __init__(self): 11 | super().__init__() 12 | 13 | @classmethod 14 | def run(cls, model_path, dataset_path, train_batch_size, run_stats): 15 | pass 16 | 17 | def load_model(self, model_path: str) -> LudwigModel: 18 | return LudwigModel.load(model_path) 19 | 20 | def evaluate( 21 | self, 22 | model: LudwigModel, 23 | dataset: Union[str, dict, pd.DataFrame] = None, 24 | **kwargs 25 | ) -> Tuple[dict, Union[dict, pd.DataFrame], str]: 26 | return model.evaluate(dataset, **kwargs) 27 | 28 | def predict( 29 | self, 30 | model: LudwigModel, 31 | dataset: Union[str, dict, pd.DataFrame] = None, 32 | **kwargs 33 | ) -> Tuple[Union[dict, pd.DataFrame], str]: 34 | return model.predict(dataset, **kwargs) 35 | -------------------------------------------------------------------------------- /lbt/metrics/instance_prices.json: -------------------------------------------------------------------------------- 1 | { 2 | "A100": 2.93, 3 | "Tesla T4": 0.35, 4 | "Tesla P4": 0.60, 5 | "Tesla V100": 2.48, 6 | "Tesla P100": 1.46, 7 | "Tesla K80": 0.45 8 | } -------------------------------------------------------------------------------- /lbt/metrics/lbt_metrics.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import shutil 4 | import tempfile 5 | 6 | import GPUtil 7 | import ludwig 8 | import numpy as np 9 | import pandas as pd 10 | import psutil 11 | import ray 12 | from experiment_impact_tracker.compute_tracker import ImpactTracker 13 | from experiment_impact_tracker.data_interface import DataInterface 14 | from globals import ENERGY_LOGGING_DIR 15 | from lbt.metrics import register_metric 16 | from lbt.metrics import INSTANCE_PRICES 17 | from lbt.metrics.base_metric import LBTMetric 18 | from lbt.metrics.utils import scale_bytes 19 | from ludwig.api import LudwigModel 20 | from ludwig.collect import collect_weights 21 | 22 | 23 | @register_metric("ludwig_version") 24 | class LudwigVersion(LBTMetric): 25 | def __init__(self): 26 | pass 27 | 28 | def run(cls, **kwargs): 29 | return ludwig.__version__ 30 | 31 | 32 | @register_metric("hardware_metadata") 33 | class HardwareMetadata(LBTMetric): 34 | num_gpus = 0 35 | 36 | def run(cls, **kwargs): 37 | machine_info = {} 38 | # GPU 39 | gpus = GPUtil.getGPUs() 40 | if len(gpus) != 0: 41 | machine_info["total_gpus"] = len(gpus) 42 | gpu_type = {} 43 | for gpu_id, gpu in enumerate(gpus): 44 | gpu_type[gpu_id] = gpu.name 45 | machine_info["gpu_info"] = gpu_type 46 | else: 47 | machine_info["total_gpus"] = 0 48 | # CPU 49 | total_cores = psutil.cpu_count(logical=True) 50 | machine_info["total_cores"] = total_cores 51 | # RAM 52 | svmem = psutil.virtual_memory() 53 | total_RAM = scale_bytes(svmem.total) 54 | machine_info["RAM"] = total_RAM 55 | return machine_info 56 | 57 | 58 | @register_metric("inference_latency") 59 | class InferenceLatencyMetric(LBTMetric): 60 | num_samples = 25 61 | num_gpus = 0 62 | 63 | def run(cls, model_path, dataset_path, **kwargs): 64 | """ 65 | Returns avg. time to perform inference on 1 sample 66 | 67 | # Inputs 68 | :param model_path: (str) filepath to pre-trained model (directory that 69 | contains the model_hyperparameters.json). 70 | :param dataset_path: (str) filepath to dataset 71 | :param dataset_path: (int) number of dev samples to randomly sample 72 | 73 | # Return 74 | :return: (str) avg. time per inference step 75 | """ 76 | # Create smaller datasets w/10 samples from original dev set 77 | full_dataset = pd.read_csv(dataset_path) 78 | # Note: split == 1 indicates the dev set 79 | if "split" in full_dataset: 80 | if len(full_dataset[full_dataset["split"] == 1]) > 0: 81 | sampled_dataset = full_dataset[ 82 | full_dataset["split"] == 1 83 | ].sample(n=cls.num_samples) 84 | elif len(full_dataset[full_dataset["split"] == 2]) > 0: 85 | sampled_dataset = full_dataset[ 86 | full_dataset["split"] == 2 87 | ].sample(n=cls.num_samples) 88 | else: 89 | sampled_dataset = full_dataset[ 90 | full_dataset["split"] == 0 91 | ].sample(n=cls.num_samples) 92 | else: 93 | sampled_dataset = full_dataset.sample(n=cls.num_samples) 94 | ludwig_model = LudwigModel.load(model_path) 95 | start = datetime.datetime.now() 96 | _, _ = ludwig_model.predict( 97 | dataset=sampled_dataset, 98 | batch_size=1, 99 | ) 100 | total_time = datetime.datetime.now() - start 101 | avg_time_per_sample = total_time / cls.num_samples 102 | formatted_time = "{:0>8}".format(str(avg_time_per_sample)) 103 | return formatted_time 104 | 105 | 106 | @register_metric("training_cost") 107 | class TrainingCost(LBTMetric): 108 | default_gpu_cost_per_hr = 0.35 # GCP cost for Tesla T4 109 | 110 | def run(cls, run_stats: dict, **kwargs) -> float: 111 | """ 112 | Return total cost to train model using GCP compute resource 113 | """ 114 | get_GPUS = GPUtil.getGPUs() 115 | instance_cost = None 116 | if len(get_GPUS) > 0: 117 | gpu_type = get_GPUS[0].name 118 | if gpu_type in INSTANCE_PRICES.keys(): 119 | instance_cost = INSTANCE_PRICES[gpu_type] 120 | if instance_cost is None: 121 | instance_cost = cls.default_gpu_cost_per_hr 122 | 123 | total_time_s = int(run_stats["hyperopt_results"]["time_total_s"]) 124 | total_time_hr = total_time_s / 3600 125 | return float(total_time_hr * instance_cost) 126 | 127 | 128 | @register_metric("training_speed") 129 | class TrainingSpeed(LBTMetric): 130 | num_gpus = 0 131 | 132 | def run( 133 | cls, 134 | dataset_path: str, 135 | train_batch_size: int, 136 | run_stats: dict, 137 | **kwargs, 138 | ) -> str: 139 | """ 140 | Returns avg. time per training step 141 | 142 | # Inputs 143 | :param model_path: (str) filepath to pre-trained model (directory that 144 | contains the model_hyperparameters.json). 145 | :param dataset_path: (str) filepath to dataset 146 | 147 | # Return 148 | :return: (str) avg. time per training step 149 | """ 150 | 151 | train_split_size = 0.7 152 | full_dataset = pd.read_csv(dataset_path) 153 | if "split" in full_dataset: 154 | total_samples = len(full_dataset[full_dataset["split"] == 0]) 155 | else: 156 | total_samples = int(train_split_size * len(full_dataset)) 157 | total_training_steps = int(total_samples / train_batch_size) 158 | time_per_batch = ( 159 | int(run_stats["hyperopt_results"]["time_this_iter_s"]) 160 | / total_training_steps 161 | ) 162 | formatted_time = "{:0>8}".format( 163 | str(datetime.timedelta(seconds=time_per_batch)) 164 | ) 165 | return formatted_time 166 | 167 | 168 | @register_metric("model_size") 169 | class ModelSize(LBTMetric): 170 | num_gpus = 0 171 | 172 | def run(cls, model_path: str, **kwargs): 173 | """ 174 | Computes minimum bytes required to store model to memory 175 | 176 | # Inputs 177 | :param model_path: (str) filepath to pre-trained model. 178 | 179 | # Return 180 | :return: (int) total bytes 181 | :return: (str) total bytes scaled in string format 182 | """ 183 | tensor_filepaths = collect_weights( 184 | model_path=model_path, 185 | tensors=None, 186 | output_directory=".model_tensors", 187 | ) 188 | total_size = 0 189 | for fp in tensor_filepaths: 190 | weight_tensor = np.load(fp) 191 | total_size += weight_tensor.size 192 | total_bytes = total_size * 32 193 | scaled_bytes = scale_bytes(total_bytes) 194 | model_size = {"total_bytes": total_bytes, "scaled_bytes": scaled_bytes} 195 | return model_size 196 | 197 | 198 | @register_metric("carbon_footprint") 199 | class Energy(LBTMetric): 200 | num_gpus = 0 201 | 202 | def run(cls, model_path: str, dataset_path, train_batch_size, run_stats): 203 | """ 204 | Computes energy metrics for one training epoch 205 | 206 | # Inputs 207 | :param model_path: (str) filepath to pre-trained model. 208 | 209 | # Return 210 | :return: (int) total bytes 211 | :return: (str) total bytes scaled in string format 212 | """ 213 | # First copy model_path to temp directory 214 | logging_path = os.path.join( 215 | ENERGY_LOGGING_DIR, run_stats["hyperopt_results"]["experiment_id"] 216 | ) 217 | tempdir = os.path.join(logging_path, "temp_model") 218 | shutil.copytree(model_path, tempdir) 219 | model = LudwigModel.load(tempdir) 220 | 221 | with ImpactTracker(logging_path): 222 | model.train_online(dataset=dataset_path) 223 | 224 | data_interface = DataInterface([logging_path]) 225 | carbon_output = { 226 | "kg_carbon": data_interface.kg_carbon, 227 | "total_power": data_interface.total_power, 228 | "PUE": data_interface.PUE, 229 | "duration_of_train_step": data_interface.exp_len_hours, 230 | } 231 | 232 | shutil.rmtree(tempdir) 233 | 234 | return carbon_output 235 | -------------------------------------------------------------------------------- /lbt/metrics/utils.py: -------------------------------------------------------------------------------- 1 | def scale_bytes(bytes: int, suffix: str = "B") -> str: 2 | factor = 1024 3 | for unit in ["", "K", "M", "G", "T", "P"]: 4 | if bytes < factor: 5 | return f"{bytes:.2f}{unit}{suffix}" 6 | bytes /= factor 7 | -------------------------------------------------------------------------------- /lbt/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/tools/__init__.py -------------------------------------------------------------------------------- /lbt/tools/robustnessgym/__init__.py: -------------------------------------------------------------------------------- 1 | RGSUBPOPULATION_REGISTRY = {} 2 | 3 | import importlib 4 | import sys 5 | import inspect 6 | 7 | from .base_subpopulation import BaseSubpopulation 8 | from .robustnessgym import RG 9 | from robustnessgym.slicebuilders.subpopulation import Subpopulation 10 | 11 | # from lbt.tools.robustnessgym imort RG 12 | 13 | 14 | def register_lbtsubpop(name): 15 | def register_subpop_cls(cls): 16 | if not issubclass(cls, BaseSubpopulation): 17 | raise ValueError( 18 | "Metric ({}: {}) must extend lbt.tools.robustnessgym.base_subpopulation".format( 19 | name, cls.__name__ 20 | ) 21 | ) 22 | RGSUBPOPULATION_REGISTRY[name] = cls 23 | return cls 24 | 25 | return register_subpop_cls 26 | 27 | 28 | LBT_SUBPOPULATIONS = { 29 | "lbt_subpops": "lbt.tools.robustnessgym.lbt_subpopulations", 30 | } 31 | 32 | RG_SUBPOPULATIONS = { 33 | "hans": "robustnessgym.slicebuilders.subpopulations.hans", 34 | "phrase": "robustnessgym.slicebuilders.subpopulations.phrase", 35 | } 36 | 37 | for name, module_name in LBT_SUBPOPULATIONS.items(): 38 | if module_name not in sys.modules: 39 | importlib.import_module(module_name) 40 | 41 | for name, module_name in RG_SUBPOPULATIONS.items(): 42 | for name, obj in inspect.getmembers(sys.modules[module_name]): 43 | if inspect.isclass(obj): 44 | if issubclass(obj, Subpopulation): 45 | RGSUBPOPULATION_REGISTRY[name] = obj 46 | -------------------------------------------------------------------------------- /lbt/tools/robustnessgym/base_subpopulation.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from abc import ABC 3 | import pandas as pd 4 | 5 | 6 | class BaseSubpopulation(ABC): 7 | def __init__(self, name): 8 | self.name = name 9 | 10 | @abc.abstractmethod 11 | def score_fn(self): 12 | """ scores a sample based on subpopulation the sample is a part of """ 13 | raise NotImplementedError() 14 | 15 | @abc.abstractmethod 16 | def get_subpops(self): 17 | raise NotImplementedError() 18 | 19 | @property 20 | def slice_name(self): 21 | return self.name -------------------------------------------------------------------------------- /lbt/tools/robustnessgym/lbt_subpopulations.py: -------------------------------------------------------------------------------- 1 | from lbt.tools.robustnessgym.base_subpopulation import BaseSubpopulation 2 | from lbt.tools.robustnessgym import register_lbtsubpop 3 | from robustnessgym import ( 4 | LengthSubpopulation, 5 | HasPhrase, 6 | HasAnyPhrase, 7 | ) 8 | 9 | import requests 10 | 11 | from robustnessgym import Spacy 12 | from robustnessgym import ScoreSubpopulation, Identifier 13 | import pandas as pd 14 | import itertools 15 | from functools import partial 16 | 17 | # TODO: ASN --> Identity Phrases, Emoji, 18 | 19 | 20 | @register_lbtsubpop("entities") 21 | class EntitySubpopulation(BaseSubpopulation): 22 | def __init__(self): 23 | self.name = "entities" 24 | self.entity_types = [ 25 | "PERSON", 26 | "NORP", 27 | "FAC", 28 | "ORG", 29 | "GPE", 30 | "LOC", 31 | "PRODUCT", 32 | "EVENT", 33 | "WORK_OF_ART", 34 | "LAW", 35 | "LANGUAGE", 36 | "DATE", 37 | "TIME", 38 | "PERCENT", 39 | "MONEY", 40 | "QUANTITY", 41 | "ORDINAL", 42 | "CARDINAL", 43 | ] 44 | 45 | def score_fn(self, batch, columns, entity, spacy): 46 | try: 47 | entites_list = Spacy.retrieve( 48 | batch, columns, proc_fns=Spacy.entities 49 | ) 50 | except ValueError: 51 | spacy_op = spacy(batch, columns) 52 | entites_list = Spacy.retrieve( 53 | spacy_op, columns, proc_fns=Spacy.entities 54 | ) 55 | overall_batch_score = [] 56 | for entities in entites_list: 57 | ents = set(entity["label"] for entity in entities) 58 | if entity in ents: 59 | overall_batch_score.append(1) 60 | else: 61 | overall_batch_score.append(0) 62 | return overall_batch_score 63 | 64 | def get_subpops(self, spacy): 65 | EntitiesSubpopulation = lambda entity, score_fn: ScoreSubpopulation( 66 | identifiers=[Identifier(f"{entity}")], 67 | intervals=[(1, 1)], 68 | score_fn=score_fn, 69 | ) 70 | 71 | entity_subpops = [] 72 | for entity in self.entity_types: 73 | entity_subpops.append( 74 | EntitiesSubpopulation( 75 | entity, partial(self.score_fn, entity=entity, spacy=spacy) 76 | ) 77 | ) 78 | return entity_subpops 79 | 80 | 81 | @register_lbtsubpop("pos") 82 | class POSSubpopulation(BaseSubpopulation): 83 | def __init__(self): 84 | self.name = "POS" 85 | self.universalpos = [ 86 | "ADJ", 87 | "ADP", 88 | "ADV", 89 | "AUX", 90 | "CONJ", 91 | "CCONJ", 92 | "DET", 93 | "INTJ", 94 | "NOUN", 95 | "NUM", 96 | "PART", 97 | "PRON", 98 | "PROPN", 99 | "PUNCT", 100 | "SCONJ", 101 | "SYM", 102 | "VERB", 103 | "X", 104 | "EOL", 105 | "SPACE", 106 | ] 107 | 108 | def score_fn(self, batch, columns, pos, spacy): 109 | try: 110 | spacy_annotations = Spacy.retrieve(batch, columns) 111 | except ValueError: 112 | spacy_op = spacy(batch, columns) 113 | spacy_annotations = Spacy.retrieve(spacy_op, columns) 114 | 115 | overall_batch_score = [] 116 | for sample_annotation in spacy_annotations: 117 | pos_in_sample = set( 118 | token["pos"] for token in sample_annotation["tokens"] 119 | ) 120 | if pos in pos_in_sample: 121 | overall_batch_score.append(1) 122 | else: 123 | overall_batch_score.append(0) 124 | 125 | return overall_batch_score 126 | 127 | def get_subpops(self, spacy): 128 | POSSubpopulation = lambda pos, score_fn: ScoreSubpopulation( 129 | identifiers=[Identifier(f"{pos}")], 130 | intervals=[(1, 1)], 131 | score_fn=score_fn, 132 | ) 133 | 134 | pos_subpops = [] 135 | for pos in self.universalpos: 136 | pos_subpops.append( 137 | POSSubpopulation( 138 | pos, partial(self.score_fn, pos=pos, spacy=spacy) 139 | ) 140 | ) 141 | return pos_subpops 142 | 143 | 144 | @register_lbtsubpop("gender_bias") 145 | class GenderBiasSubpopulation(BaseSubpopulation): 146 | def __init__(self): 147 | """ 148 | Measures performance on gender co-occurence pairs 149 | """ 150 | self.name = "gender_bias" 151 | self.female_identity = [ 152 | "she", 153 | "her", 154 | "herself", 155 | "girl", 156 | "woman", 157 | "women", 158 | "females", 159 | "female", 160 | "girls", 161 | "feminine", 162 | ] 163 | self.male_identity = [ 164 | "he", 165 | "him", 166 | "himself", 167 | "boy", 168 | "man", 169 | "men", 170 | "males", 171 | "male", 172 | "boys", 173 | "masculine", 174 | ] 175 | self.non_binary_identity = [ 176 | "they", 177 | "them", 178 | "theirs", 179 | "their", 180 | "themself", 181 | ] 182 | self.gender_categories = { 183 | "female": self.female_identity, 184 | "male": self.male_identity, 185 | "non_binary": self.non_binary_identity, 186 | } 187 | 188 | self.career_words = [ 189 | "executive", 190 | "professional", 191 | "corporation", 192 | "salary", 193 | "office", 194 | "business", 195 | "career", 196 | ] 197 | self.family_words = [ 198 | "home", 199 | "parents", 200 | "children", 201 | "family", 202 | "cousin", 203 | "marriage", 204 | "wedding", 205 | "relatives", 206 | ] 207 | self.math_words = [ 208 | "math", 209 | "algebra", 210 | "geometry", 211 | "calculus", 212 | "equation", 213 | "compute", 214 | "numbers", 215 | "addition", 216 | ] 217 | self.arts_words = [ 218 | "poetry", 219 | "art", 220 | "dance", 221 | "literature", 222 | "novel", 223 | "symphony", 224 | "drama", 225 | ] 226 | self.science_words = [ 227 | "science", 228 | "technology", 229 | "physics", 230 | "chemistry", 231 | "Einstein", 232 | "NASA", 233 | "experiment", 234 | "astronomy", 235 | ] 236 | 237 | self.domains = { 238 | "career": self.career_words, 239 | "family": self.family_words, 240 | "math": self.math_words, 241 | "arts": self.arts_words, 242 | "science": self.science_words, 243 | } 244 | 245 | def score_fn(self, batch, columns, pair): 246 | overall_batch_score = [] 247 | for text in batch[columns[0]]: 248 | if pair[0] in text and pair[1] in text: 249 | overall_batch_score.append(1) 250 | else: 251 | overall_batch_score.append(0) 252 | return overall_batch_score 253 | 254 | def build_cooccurence_pairs(self, gender_categories: dict, domains: dict): 255 | bias_pairs = [] 256 | for _, gender_list in gender_categories.items(): 257 | for _, phrase_list in domains.items(): 258 | bias_pairs.extend( 259 | [ 260 | pair 261 | for pair in itertools.product(gender_list, phrase_list) 262 | ] 263 | ) 264 | return bias_pairs 265 | 266 | def get_subpops(self, spacy): 267 | bias_pairs = self.build_cooccurence_pairs( 268 | self.gender_categories, self.domains 269 | ) 270 | BiasCooccurenceSubpopulation = ( 271 | lambda pair, score_fn: ScoreSubpopulation( 272 | identifiers=[Identifier(f"{pair[0]}_{pair[1]}")], 273 | intervals=[(1, 1)], 274 | score_fn=self.score_fn, 275 | ) 276 | ) 277 | 278 | bias_subpops = [] 279 | for pair in bias_pairs: 280 | bias_subpops.append( 281 | BiasCooccurenceSubpopulation( 282 | pair, partial(self.score_fn, pair=pair) 283 | ) 284 | ) 285 | return bias_subpops 286 | 287 | 288 | @register_lbtsubpop("positive_sentiment") 289 | class PositiveSentimentSubpopulation(BaseSubpopulation): 290 | def __init__(self): 291 | """ 292 | Slice of dataset which contains positive sentiment carrying words 293 | """ 294 | self.name = "positive_sentiment" 295 | self.positive_words_list = "https://gist.githubusercontent.com/mkulakowski2/4289437/raw/1bb4d7f9ee82150f339f09b5b1a0e6823d633958/positive-words.txt" 296 | 297 | def score_fn(self, batch, columns): 298 | pass 299 | 300 | def get_positive_words(self): 301 | response = requests.get(self.positive_words_list) 302 | _, words = ( 303 | response.text.split("\n\n")[0], 304 | response.text.split("\n\n")[1], 305 | ) 306 | word_list = words.split("\n") 307 | return word_list 308 | 309 | def get_subpops(self, spacy): 310 | return [ 311 | HasAnyPhrase( 312 | phrase_groups=[self.get_positive_words()], 313 | identifiers=[Identifier("Positive Sentiment Words")], 314 | ) 315 | ] 316 | 317 | 318 | @register_lbtsubpop("negative_sentiment") 319 | class NegativeSentimentSubpopulation(BaseSubpopulation): 320 | def __init__(self): 321 | """ 322 | Slice of dataset which contains negative sentiment carrying words 323 | """ 324 | self.name = "positive_sentiment" 325 | self.negative_words_list = "https://gist.githubusercontent.com/mkulakowski2/4289441/raw/dad8b64b307cd6df8068a379079becbb3f91101a/negative-words.txt" 326 | 327 | def score_fn(self, batch, columns): 328 | pass 329 | 330 | def get_negative_words(self): 331 | response = requests.get(self.negative_words_list) 332 | _, words = ( 333 | response.text.split("\n\n")[0], 334 | response.text.split("\n\n")[1], 335 | ) 336 | word_list = words.split("\n") 337 | return word_list 338 | 339 | def get_subpops(self, spacy): 340 | return [ 341 | HasAnyPhrase( 342 | phrase_groups=[self.get_negative_words()], 343 | identifiers=[Identifier("Negative Sentiment Words")], 344 | ) 345 | ] 346 | 347 | 348 | @register_lbtsubpop("naughty_and_obscene") 349 | class NaughtyObsceneSubpopulation(BaseSubpopulation): 350 | def __init__(self): 351 | """ 352 | Slice of dataset which contains naught + obscene words 353 | """ 354 | self.name = "naughty_and_obscene" 355 | self.word_list = "https://raw.githubusercontent.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/master/en" 356 | 357 | def score_fn(self, batch, columns): 358 | pass 359 | 360 | def get_naughty_obscene_word_list(self): 361 | response = requests.get(self.word_list) 362 | return response.text.split("\n") 363 | 364 | def get_subpops(self, spacy): 365 | return [ 366 | HasAnyPhrase( 367 | phrase_groups=[self.get_naughty_obscene_word_list()], 368 | identifiers=[Identifier("Naughty and Obscene Words")], 369 | ) 370 | ] 371 | 372 | 373 | @register_lbtsubpop("sentence_length") 374 | class SentenceLengthSubpopulation(BaseSubpopulation): 375 | def __init__(self): 376 | """ 377 | Sentence length based slices 378 | """ 379 | self.name = "sentence_length" 380 | 381 | def score_fn(self, batch, columns): 382 | pass 383 | 384 | def get_subpops(self, spacy): 385 | return [ 386 | LengthSubpopulation( 387 | intervals=[ 388 | (0, 20), 389 | (20, 40), 390 | (40, 60), 391 | (60, 80), 392 | (80, 100), 393 | (100, 120), 394 | (120, 140), 395 | ] 396 | ) 397 | ] 398 | -------------------------------------------------------------------------------- /lbt/tools/robustnessgym/robustnessgym.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import partial 3 | from typing import Union 4 | 5 | import numpy as np 6 | import pandas as pd 7 | from lbt.datasets import DATASET_REGISTRY 8 | from lbt.tools.robustnessgym import RGSUBPOPULATION_REGISTRY 9 | from ludwig.api import LudwigModel 10 | from lbt.tools.utils import get_dataset_features 11 | 12 | from robustnessgym import Dataset, Identifier, Spacy 13 | from robustnessgym.core.testbench import DevBench 14 | 15 | from .base_subpopulation import BaseSubpopulation 16 | 17 | OUTPUT_FEATURES = None 18 | 19 | 20 | def get_dataset_with_predictions( 21 | dataset: pd.DataFrame, 22 | models: dict, 23 | output_features: list, 24 | ): 25 | for model_name, path_to_model in models.items(): 26 | model = LudwigModel.load(model_dir=path_to_model) 27 | (predictions, output_directory) = model.predict(dataset) 28 | for output_feat in output_features: 29 | dataset[f"{model_name}_{output_feat}_pred"] = ( 30 | predictions[f"{output_feat}_predictions"] 31 | .astype(float) 32 | .tolist() 33 | ) 34 | dataset.rename( 35 | {output_feat: f"{output_feat}_label"}, axis=1, inplace=True 36 | ) 37 | return dataset 38 | 39 | 40 | def accuracy_eval_fn(model, dataset): 41 | global OUTPUT_FEATURES 42 | output_feat_accuracy = [] 43 | # aggregate accuracy over all output features 44 | for output_feat in OUTPUT_FEATURES: 45 | accuracy = np.mean( 46 | np.array(dataset[f"{model}_{output_feat}_pred"]) 47 | == (np.array(dataset[f"{output_feat}_label"])) 48 | ) 49 | output_feat_accuracy.append(accuracy) 50 | return np.mean(output_feat_accuracy) 51 | 52 | 53 | def RG( 54 | dataset_name: str, 55 | models: dict, 56 | path_to_dataset: str, 57 | subpopulations: list, 58 | output_directory: str, 59 | input_features: Union[str, list] = None, 60 | output_features: Union[str, list] = None, 61 | output_report_name: str = "rg_report.png", 62 | ): 63 | """ 64 | Runs RG evaluation on dataset across specified models 65 | 66 | # Inputs 67 | :param dataset_name: (str) name of dataset 68 | :param models: (dict) mapping between model name and saved model directory 69 | :param path_to_dataset: (str) location of dataset 70 | :param input_features: (list or str) names of input feature 71 | :param output_features: (list or str) names of output feature 72 | :param subpopulations: (list) subpopulations to evaluate model performance 73 | :param output_directory: (str) location to save all outputs of RG analysis 74 | :param output_report_name: (str) name of generated file 75 | 76 | 77 | # Return 78 | :return: (pd.DataFrame) performance metrics from RG analysis 79 | """ 80 | 81 | # first check if slices are valid 82 | for subpop in subpopulations: 83 | if subpop not in RGSUBPOPULATION_REGISTRY.keys(): 84 | raise ValueError( 85 | f"{subpop} is not in the list of supported RG Subpopulations\n" 86 | f"Please see lbt.tools.robustnessgym.RGSUBPOPULATION_REGISTRY for available subpopulations" 87 | ) 88 | 89 | # if user has not provided input/output feature info, collect it manually 90 | if input_features is None or output_features is None: 91 | (input_features, output_features) = get_dataset_features(dataset_name) 92 | 93 | else: 94 | if isinstance(input_features, str): 95 | input_features = [input_features] 96 | if isinstance(output_features, str): 97 | output_features = [output_features] 98 | 99 | global OUTPUT_FEATURES 100 | OUTPUT_FEATURES = output_features 101 | 102 | # load data 103 | # TODO (ASN): fix logic for extracting eval set 104 | dataset = pd.read_csv(path_to_dataset) 105 | 106 | # get preds 107 | dataset = get_dataset_with_predictions(dataset, models, output_features) 108 | # caste as RG Dataset 109 | dataset = Dataset.from_pandas(dataset, Identifier(dataset_name)) 110 | 111 | # initialize spacy 112 | spacy = Spacy() 113 | dataset = spacy(dataset, input_features) 114 | 115 | # for each subopulation, get subpopulation functions 116 | selected_subpopulations = [] 117 | for subpop in subpopulations: 118 | if issubclass(RGSUBPOPULATION_REGISTRY[subpop], BaseSubpopulation): 119 | subpops = RGSUBPOPULATION_REGISTRY[subpop]().get_subpops(spacy) 120 | else: 121 | subpops = RGSUBPOPULATION_REGISTRY[subpop]() 122 | if not isinstance(subpops, list): 123 | subpops = [subpops] 124 | selected_subpopulations.extend(subpops) 125 | 126 | # for each subpopulation get slcies 127 | slices = [] 128 | for subpop in selected_subpopulations: 129 | slices.extend(subpop(dataset, input_features)[0]) 130 | 131 | # build test bench 132 | dataset_db = DevBench(dataset) 133 | # add slices to test bench 134 | dataset_db.add_slices(slices) 135 | 136 | dataset_db.add_aggregators( 137 | { 138 | model: {"accuracy": partial(accuracy_eval_fn, model)} 139 | for model in models.keys() 140 | } 141 | ) 142 | # compute metrics 143 | metrics = dataset_db.metrics 144 | 145 | # save metrics dataframe 146 | metrics_df = pd.DataFrame(metrics) 147 | metrics_df.to_csv(os.path.join(output_directory, f"{dataset_name}_rg.csv")) 148 | 149 | # create report 150 | dataset_db.create_report().figure().write_image( 151 | output_report_name, engine="kaleido" 152 | ) 153 | return metrics 154 | -------------------------------------------------------------------------------- /lbt/tools/textattack/__init__.py: -------------------------------------------------------------------------------- 1 | from .textattack import ( 2 | attack, 3 | augment, 4 | ATTACKRECIPE_REGISTRY, 5 | AUGMENTATIONRECIPE_REGISTRY, 6 | ) 7 | -------------------------------------------------------------------------------- /lbt/tools/textattack/textattack.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import sys 3 | import os 4 | import pandas as pd 5 | from pandas.core.common import SettingWithCopyWarning 6 | 7 | import warnings 8 | 9 | warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) 10 | 11 | 12 | from ludwig.api import LudwigModel 13 | 14 | from textattack.attack_recipes import AttackRecipe 15 | from textattack.attack_results import ( 16 | MaximizedAttackResult, 17 | SuccessfulAttackResult, 18 | ) 19 | from textattack.augmentation import Augmenter 20 | from textattack.models.wrappers import ModelWrapper 21 | 22 | from lbt.tools.utils import get_dataset_features 23 | 24 | 25 | ATTACKRECIPE_REGISTRY = {} 26 | AUGMENTATIONRECIPE_REGISTRY = {} 27 | 28 | for key, obj in inspect.getmembers(sys.modules["textattack.attack_recipes"]): 29 | if inspect.isclass(obj): 30 | if issubclass(obj, AttackRecipe) and key != "AttackRecipe": 31 | ATTACKRECIPE_REGISTRY[key] = obj 32 | 33 | 34 | for key, obj in inspect.getmembers(sys.modules["textattack.augmentation"]): 35 | if inspect.isclass(obj): 36 | if issubclass(obj, Augmenter) and key != "Augmenter": 37 | AUGMENTATIONRECIPE_REGISTRY[key] = obj 38 | 39 | 40 | class CustomLudwigModelWrapper(ModelWrapper): 41 | def __init__( 42 | self, 43 | path_to_model: str, 44 | input_feature_name: str, 45 | output_feature_name: str, 46 | ): 47 | self.model = LudwigModel.load(path_to_model) 48 | self.input_feature_name = input_feature_name 49 | self.output_feature_name = output_feature_name 50 | 51 | def __call__(self, text_list): 52 | input_text_df = pd.DataFrame( 53 | text_list, columns=[self.input_feature_name] 54 | ) 55 | model_outputs = self.model.predict(input_text_df) 56 | pred_outputs = model_outputs[0] 57 | columns = [ 58 | col 59 | for col in pred_outputs.columns 60 | if self.output_feature_name in col 61 | ] 62 | preds = pred_outputs[columns].iloc[:, 1:-1].to_numpy() 63 | return preds 64 | 65 | 66 | def load_dataset( 67 | path_to_dataset: str, input_feature_name: str, output_feature_name: str 68 | ): 69 | dataset = pd.read_csv(path_to_dataset) 70 | dataset = dataset[0:10] 71 | if "split" not in dataset.columns: 72 | warnings.warn( 73 | "Dataset doesn't contain split column. Attacking entire dataset" 74 | ) 75 | test_split = dataset[[input_feature_name, output_feature_name]] 76 | else: 77 | test_split = dataset[dataset["split"] == 2][ 78 | [input_feature_name, output_feature_name] 79 | ] 80 | return test_split 81 | 82 | 83 | def build_custom_ta_dataset( 84 | path_to_dataset: str, input_feature_name: str, output_feature_name: str 85 | ): 86 | dataset = load_dataset( 87 | path_to_dataset, input_feature_name, output_feature_name 88 | ) 89 | dataset[output_feature_name] = ( 90 | dataset[output_feature_name].astype(int).tolist() 91 | ) 92 | tupelize = dataset.to_records(index=False) 93 | return list(tupelize) 94 | 95 | 96 | def attack( 97 | dataset_name: str, 98 | path_to_dataset: str, 99 | path_to_model: str, 100 | input_feature_name: str = None, 101 | output_feature_name: str = None, 102 | attack_recipe: str = "DeepWordBugGao2018", 103 | output_directory: str = "./", 104 | ): 105 | if input_feature_name is None or output_feature_name is None: 106 | (input_features, output_features) = get_dataset_features(dataset_name) 107 | input_feature_name = input_features[0] 108 | output_feature_name = output_features[0] 109 | 110 | custom_model = CustomLudwigModelWrapper( 111 | path_to_model=path_to_model, 112 | input_feature_name=input_feature_name, 113 | output_feature_name=output_feature_name, 114 | ) 115 | 116 | custom_datset = build_custom_ta_dataset( 117 | path_to_dataset=path_to_dataset, 118 | input_feature_name=input_feature_name, 119 | output_feature_name=output_feature_name, 120 | ) 121 | 122 | if attack_recipe not in ATTACKRECIPE_REGISTRY.keys(): 123 | raise ValueError( 124 | f"{attack_recipe} not valid.\n" 125 | f"Please check ATTACKRECIPE_REGISTRY to see valid recipes" 126 | ) 127 | attack = ATTACKRECIPE_REGISTRY[attack_recipe].build(custom_model) 128 | results_iterable = attack.attack_dataset(custom_datset) 129 | 130 | results = { 131 | "original_text": [], 132 | "perturbed_text": [], 133 | "original_result": [], 134 | "original_confidence_score": [], 135 | "perturbed_result": [], 136 | "perturbed_confidence_score": [], 137 | "success": [], 138 | } 139 | 140 | for result in results_iterable: 141 | results["original_text"].append(result.original_text()) 142 | results["perturbed_text"].append(result.perturbed_text()) 143 | results["original_result"].append( 144 | result.original_result.raw_output.argmax().item() 145 | ) 146 | results["original_confidence_score"].append( 147 | result.original_result.raw_output[ 148 | result.original_result.raw_output.argmax() 149 | ].item() 150 | ) 151 | results["perturbed_result"].append( 152 | result.perturbed_result.raw_output.argmax().item() 153 | ) 154 | results["perturbed_confidence_score"].append( 155 | result.perturbed_result.raw_output[ 156 | result.perturbed_result.raw_output.argmax() 157 | ].item() 158 | ) 159 | if type(result) in [SuccessfulAttackResult, MaximizedAttackResult]: 160 | results["success"].append(1) 161 | else: 162 | results["success"].append(0) 163 | 164 | results_df = pd.DataFrame.from_dict(results) 165 | output_path = os.path.join( 166 | output_directory, f"{dataset_name}_{attack_recipe}.csv" 167 | ) 168 | results_df.to_csv(output_path) 169 | return results_df 170 | 171 | 172 | def augment( 173 | dataset_name: str, 174 | path_to_dataset: str, 175 | input_feature_name: str = None, 176 | output_feature_name: str = None, 177 | augmenter_name: str = "CharSwapAugmenter", 178 | pct_words_to_swap: float = 0.1, 179 | transformations_per_example: int = 1, 180 | save_path: str = "augmented_ds.csv", 181 | save=True, 182 | ): 183 | if input_feature_name is None or output_feature_name is None: 184 | (input_features, output_features) = get_dataset_features(dataset_name) 185 | input_feature_name = input_features[0] 186 | output_feature_name = output_features[0] 187 | 188 | dataset = load_dataset( 189 | path_to_dataset, input_feature_name, output_feature_name 190 | ) 191 | 192 | if augmenter_name not in AUGMENTATIONRECIPE_REGISTRY.keys(): 193 | raise ValueError( 194 | f"{augmenter_name} not valid.\n" 195 | f"Please check AUGMENTATIONRECIPE_REGISTRY to see valid recipes" 196 | ) 197 | 198 | augmenter = AUGMENTATIONRECIPE_REGISTRY[augmenter_name]( 199 | pct_words_to_swap=pct_words_to_swap, 200 | transformations_per_example=transformations_per_example, 201 | ) 202 | 203 | text_df = ( 204 | dataset[[input_feature_name]] 205 | .applymap(augmenter.augment) 206 | .applymap(lambda sent: sent[0]) 207 | ) 208 | 209 | augmented_ds = dataset 210 | augmented_ds.loc[:, input_feature_name] = text_df[input_feature_name] 211 | 212 | if save: 213 | augmented_ds.to_csv(save_path) 214 | 215 | return augmented_ds 216 | -------------------------------------------------------------------------------- /lbt/tools/utils.py: -------------------------------------------------------------------------------- 1 | from lbt.utils.experiment_utils import load_yaml 2 | from globals import DATASET_METADATA_FILE 3 | from lbt.datasets import DATASET_REGISTRY 4 | 5 | 6 | def get_dataset_features(dataset_name): 7 | 8 | if dataset_name not in DATASET_REGISTRY: 9 | raise ValueError( 10 | f"{dataset_name} not found in dataset registry\n" 11 | f"Please check that it has been properly added" 12 | ) 13 | 14 | dataset_metadata = load_yaml(DATASET_METADATA_FILE) 15 | for ds, ds_metadata in dataset_metadata.items(): 16 | if dataset_name == ds_metadata["data_class"]: 17 | input_features = [ 18 | input_feat["name"] 19 | for input_feat in ds_metadata["input_features"] 20 | ] 21 | output_features = [ 22 | output_feat["name"] 23 | for output_feat in ds_metadata["output_features"] 24 | ] 25 | return (input_features, output_features) 26 | 27 | raise ValueError( 28 | f"{dataset_name} not found in {DATASET_METADATA_FILE}\n" 29 | f"Please check that it has been properly added" 30 | ) 31 | -------------------------------------------------------------------------------- /lbt/utils/__pycache__/experiment_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/experiment_utils.cpython-36.pyc -------------------------------------------------------------------------------- /lbt/utils/__pycache__/experiment_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/experiment_utils.cpython-37.pyc -------------------------------------------------------------------------------- /lbt/utils/__pycache__/experiment_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/experiment_utils.cpython-38.pyc -------------------------------------------------------------------------------- /lbt/utils/__pycache__/metadata_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/metadata_utils.cpython-36.pyc -------------------------------------------------------------------------------- /lbt/utils/__pycache__/metadata_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/metadata_utils.cpython-37.pyc -------------------------------------------------------------------------------- /lbt/utils/experiment_utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import copy 3 | import hashlib 4 | import json 5 | import logging 6 | import math 7 | import os 8 | from typing import Union 9 | from lbt.datasets import build_dataset 10 | from lbt.metrics import get_experiment_metadata 11 | 12 | import globals 13 | import pandas as pd 14 | import yaml 15 | 16 | 17 | def get_gpu_list(): 18 | try: 19 | return os.environ["CUDA_VISIBLE_DEVICES"] 20 | except KeyError: 21 | return None 22 | 23 | 24 | def compute_additional_metadata( 25 | experiment_attr: dict, 26 | hyperopt_results: list, 27 | tune_executor: str, 28 | ): 29 | hyperopt_run_data = get_model_ckpt_paths( 30 | hyperopt_results, experiment_attr["output_dir"], executor=tune_executor 31 | ) 32 | sampled_params = {} 33 | all_experiment_results = [] 34 | # ensures that all numerical values are of type float 35 | format_fields_float(hyperopt_results) 36 | for run in hyperopt_run_data: 37 | new_config = substitute_dict_parameters( 38 | copy.deepcopy(experiment_attr["model_config"]), 39 | parameters=run["hyperopt_results"]["parameters"], 40 | ) 41 | del new_config["hyperopt"] 42 | 43 | # do some accounting of duplicate hyperparam configs (this count will 44 | # be added to the dict which will be hashed for the elastic document 45 | # id 46 | param_hash = hash_dict(run["hyperopt_results"]["parameters"]) 47 | if param_hash in sampled_params: 48 | sampled_params[param_hash] += 1 49 | else: 50 | sampled_params[param_hash] = 1 51 | 52 | document = { 53 | "hyperopt_results": run["hyperopt_results"], 54 | "model_path": run["model_path"], 55 | } 56 | 57 | num_gpus = len(GPUtil.getGPUs()) 58 | 59 | get_experiment_metadata( 60 | document, 61 | model_path=run["model_path"], 62 | data_path=experiment_attr["dataset_path"], 63 | run_stats=run, 64 | num_gpus=num_gpus 65 | ) 66 | 67 | formatted_document = { 68 | "encoder": experiment_attr["encoder"], 69 | "dataset": experiment_attr["dataset"], 70 | } 71 | formatted_document.update(document) 72 | formatted_document.update( 73 | {"hyperopt_exp_config": experiment_attr["model_config"]} 74 | ) 75 | 76 | formatted_document["sampled_run_config"] = new_config 77 | all_experiment_results.append(formatted_document) 78 | return all_experiment_results 79 | 80 | 81 | def download_dataset(dataset_class: str, cache_dir: str) -> str: 82 | data = build_dataset(dataset_name=dataset_class, cache_dir=cache_dir) 83 | if dataset_class == "SST2": 84 | data = build_dataset( 85 | dataset_name=dataset_class, 86 | cache_dir=cache_dir, 87 | include_subtrees=True, 88 | remove_duplicates=True, 89 | ) 90 | elif dataset_class == "SST5": 91 | data = build_dataset( 92 | dataset_name=dataset_class, 93 | cache_dir=cache_dir, 94 | include_subtrees=True, 95 | ) 96 | elif dataset_class == "MDGenderBias": 97 | data = build_dataset( 98 | dataset_name=dataset_class, 99 | cache_dir=cache_dir, 100 | task="wizard", 101 | ) 102 | 103 | return os.path.join( 104 | data.processed_dataset_path, data.config["csv_filename"] 105 | ) 106 | 107 | 108 | def process_dataset(dataset_path: str): 109 | dataset = pd.read_csv(dataset_path) 110 | if "split" in dataset.columns: 111 | train_df = dataset[dataset["split"] == 0] 112 | val_df = dataset[dataset["split"] == 1] 113 | test_df = dataset[dataset["split"] == 2] 114 | 115 | # no validation set provided, sample 10% of train set 116 | if len(val_df) == 0: 117 | val_df = train_df.sample(frac=0.1, replace=False) 118 | train_df = train_df.drop(val_df.index) 119 | 120 | val_df.split = 1 121 | 122 | concat_df = pd.concat([train_df, val_df, test_df], ignore_index=True) 123 | concat_df.to_csv(dataset_path, index=False) 124 | return 125 | 126 | 127 | def hash_dict(d: dict, max_length: Union[int, None] = 6) -> bytes: 128 | s = json.dumps(d, sort_keys=True, ensure_ascii=True) 129 | h = hashlib.md5(s.encode()) 130 | d = h.digest() 131 | b = base64.b64encode(d) 132 | return b[:max_length] 133 | 134 | 135 | def load_yaml(filename: str) -> dict: 136 | with open(filename) as f: 137 | file_contents = yaml.load(f, Loader=yaml.SafeLoader) 138 | return file_contents 139 | 140 | 141 | def set_globals(args): 142 | """ set global vars based on command line args """ 143 | globals.EXPERIMENT_CONFIGS_DIR = args.hyperopt_config_dir 144 | logging.info(f"EXPERIMENT_CONFIG_DIR set to {args.hyperopt_config_dir}") 145 | globals.EXPERIMENT_OUTPUT_DIR = args.experiment_output_dir 146 | logging.info(f"EXPERIMENT_OUTPUT_DIR set to {args.experiment_output_dir}") 147 | globals.RUNTIME_ENV = args.run_environment 148 | logging.info(f"RUNTIME_ENV set to {args.run_environment}") 149 | globals.DATASET_CACHE_DIR = args.dataset_cache_dir 150 | logging.info(f"DATASET_CACHE_DIR set to {args.dataset_cache_dir}") 151 | 152 | if args.datasets is None: 153 | raise ValueError( 154 | "Please specify a dataset or list of dataset." 155 | "Use python experiment_driver.py --h to see: list of available datasets." 156 | ) 157 | else: 158 | if "smoke" in args.datasets: 159 | globals.DATASET_LIST = list(globals.SMOKE_DATASETS.keys()) 160 | logging.info("Setting global datasets list to smoke datasets...") 161 | else: 162 | globals.DATASETS_LIST = args.datasets 163 | logging.info(f"Setting global datasets list to {args.datasets}") 164 | 165 | if "all" not in args.custom_model_list: 166 | encoders_list = [] 167 | for enc_name in args.custom_model_list: 168 | if enc_name in globals.ENCODER_HYPEROPT_FILENAMES.keys(): 169 | encoders_list.append( 170 | globals.ENCODER_HYPEROPT_FILENAMES[enc_name] 171 | ) 172 | globals.ENCODER_FILE_LIST = encoders_list 173 | 174 | # create experiment output directories (if they don't already exist) 175 | for exp_dir in [ 176 | globals.EXPERIMENT_CONFIGS_DIR, 177 | globals.EXPERIMENT_OUTPUT_DIR, 178 | globals.DATASET_CACHE_DIR, 179 | globals.ENERGY_LOGGING_DIR, 180 | ]: 181 | if not os.path.isdir(exp_dir): 182 | os.mkdir(exp_dir) 183 | 184 | 185 | def format_fields_float(field_list: list) -> list: 186 | """ formats fields in elastic db entries """ 187 | 188 | def replace_ints(d): 189 | for k, v in d.items(): 190 | if isinstance(v, dict): 191 | replace_ints(v) 192 | else: 193 | if type(v) == int: 194 | v = float(v) 195 | if type(v) == list and type(v[0]) not in [list, dict]: 196 | new_v = [] 197 | for x in v: 198 | if isinstance(x, (int, float)) and math.isnan(x): 199 | new_v.append(0.0) 200 | else: 201 | new_v.append(x) 202 | v = new_v 203 | if isinstance(v, (int, float)) and math.isnan(v): 204 | v = 0.0 205 | d.update({k: v}) 206 | return d 207 | 208 | formatted_out = [replace_ints(d) for d in field_list] 209 | return formatted_out 210 | 211 | 212 | def decode_str_dicts(d: str) -> dict: 213 | json_acceptable_string = d.replace("'", '"') 214 | dct = json.loads(json_acceptable_string) 215 | return dct 216 | 217 | 218 | def substitute_dict_parameters(original_dict: dict, parameters: dict) -> dict: 219 | """ Fills in original ludwig config w/actual sampled hyperopt values """ 220 | 221 | def subsitute_param(dct: dict, path: list, val): 222 | if len(path) == 1: 223 | dct[path[0]] = val 224 | return dct 225 | else: 226 | key = path.pop(0) 227 | subsitute_param(dct[key], path, val) 228 | 229 | # in some cases the dict is encoded as a str 230 | if type(parameters) == str: 231 | parameters = decode_str_dicts(parameters) 232 | 233 | for key, value in parameters.items(): 234 | path = key.split(".") 235 | # Check for input/output parameter edge cases 236 | if path[0] not in original_dict.keys(): 237 | # check if param is associate with output feature 238 | for idx, out_feature in enumerate( 239 | original_dict["output_features"] 240 | ): 241 | if out_feature["name"] == path[0]: 242 | original_dict["output_features"][idx][path[1]] = value 243 | break 244 | 245 | for idx, out_feature in enumerate(original_dict["input_features"]): 246 | if out_feature["name"] == path[0]: 247 | original_dict["input_features"][idx][path[1]] = value 248 | break 249 | else: 250 | subsitute_param(original_dict, path, value) 251 | return original_dict 252 | 253 | 254 | def compare_json_enc_configs(cf_non_encoded, cf_json_encoded): 255 | """ compars to json encoded dicts """ 256 | for key, value in cf_non_encoded.items(): 257 | value_other = cf_json_encoded[key] 258 | if type(value) == list: 259 | value_other = json.loads(value_other) 260 | if type(value) == str: 261 | value_other = json.loads(value_other) 262 | if type(value) == int: 263 | value_other = int(value_other) 264 | if value_other != value: 265 | return False 266 | else: 267 | return True 268 | 269 | 270 | def decode_json_enc_dict(encoded_dict, json_enc_params: list): 271 | for key, value in encoded_dict.items(): 272 | if key in json_enc_params and type(value) == str: 273 | encoded_dict[key] = json.loads(value) 274 | return encoded_dict 275 | 276 | 277 | def get_ray_tune_trial_dirs(base_dir: str, trial_dirs): 278 | """ returns all output directories of individual ray.tune trials """ 279 | if "params.json" in os.listdir(base_dir): 280 | trial_dirs.append(base_dir) 281 | else: 282 | for d in os.scandir(base_dir): 283 | if os.path.isdir(d): 284 | get_ray_tune_trial_dirs(d, trial_dirs) 285 | return trial_dirs 286 | 287 | 288 | def get_lastest_checkpoint(trial_dir: str, idx: int = -1): 289 | checkpoints = [ 290 | ckpt_dir 291 | for ckpt_dir in os.scandir(trial_dir) 292 | if os.path.isdir(ckpt_dir) and "checkpoint" in ckpt_dir.path 293 | ] 294 | sorted_cps = sorted(checkpoints, key=lambda d: d.path) 295 | if idx >= len(sorted_cps): 296 | idx = -1 297 | return sorted_cps[idx] 298 | 299 | 300 | def get_model_ckpt_paths( 301 | hyperopt_training_stats: list, output_dir: str, executor: str = "ray" 302 | ): 303 | """ 304 | maps output of individual tial run statistics to associated 305 | output directories. Necessary for accessing model checkpoints 306 | """ 307 | if executor == "ray": # folder construction is different 308 | hyperopt_run_metadata = [] 309 | # populate paths 310 | trial_dirs = [] 311 | for path in os.scandir(output_dir): 312 | if os.path.isdir(path): 313 | trial_dirs.extend(get_ray_tune_trial_dirs(path, [])) 314 | for hyperopt_run in hyperopt_training_stats: 315 | hyperopt_run_metadata.append( 316 | { 317 | "hyperopt_results": decode_json_enc_dict( 318 | hyperopt_run, 319 | ["parameters", "training_stats", "eval_stats"], 320 | ), 321 | "model_path": None, 322 | } 323 | ) 324 | for path in trial_dirs: 325 | if os.path.getsize(os.path.join(path, "progress.csv")) > 0: 326 | training_progress = pd.read_csv( 327 | os.path.join(path, "progress.csv") 328 | ) 329 | out_parameters = json.loads( 330 | training_progress.iloc[-1]["parameters"] 331 | ) 332 | out_eval_stats = json.loads( 333 | training_progress.iloc[-1]["eval_stats"] 334 | ) 335 | # compare total time, metric score, and parameters 336 | output_total_time = training_progress.iloc[-1]["time_total_s"] 337 | output_metric_score = training_progress.iloc[-1][ 338 | "metric_score" 339 | ] 340 | for hyperopt_run in hyperopt_run_metadata: 341 | run_total_time = hyperopt_run["hyperopt_results"][ 342 | "time_total_s" 343 | ] 344 | run_metric_score = hyperopt_run["hyperopt_results"][ 345 | "metric_score" 346 | ] 347 | run_params = hyperopt_run["hyperopt_results"]["parameters"] 348 | run_eval_stats = hyperopt_run["hyperopt_results"][ 349 | "eval_stats" 350 | ] 351 | if hash_dict(run_eval_stats) == hash_dict(out_eval_stats): 352 | best_ckpt_idx = training_progress[ 353 | abs( 354 | training_progress["metric_score"] 355 | - hyperopt_run["hyperopt_results"][ 356 | "metric_score" 357 | ] 358 | ) 359 | < 1e-04 360 | ].iloc[0]["training_iteration"] 361 | best_ckpt_idx -= 1 362 | model_path = get_lastest_checkpoint( 363 | path, best_ckpt_idx 364 | ) 365 | if hyperopt_run["model_path"] is None: 366 | hyperopt_run["model_path"] = os.path.join( 367 | model_path, "model" 368 | ) 369 | break 370 | 371 | else: 372 | hyperopt_run_metadata = [] 373 | for run_dir in os.scandir(output_dir): 374 | if os.path.isdir(run_dir): 375 | sample_training_stats = json.load( 376 | open( 377 | os.path.join(run_dir.path, "training_statistics.json"), 378 | "rb", 379 | ) 380 | ) 381 | for hyperopt_run in hyperopt_training_stats: 382 | if hyperopt_run["training_stats"] == sample_training_stats: 383 | hyperopt_run_metadata.append( 384 | { 385 | "hyperopt_results": hyperopt_run, 386 | "model_path": os.path.join( 387 | run_dir.path, "model" 388 | ), 389 | } 390 | ) 391 | 392 | return hyperopt_run_metadata 393 | 394 | 395 | def collect_completed_trial_results(output_dir: str): 396 | results, metrics, params = [], [], [] 397 | trial_dirs = get_ray_tune_trial_dirs(output_dir, []) 398 | for trial_dir in trial_dirs: 399 | for f in os.scandir(trial_dir): 400 | if "progress" in f.name: 401 | try: 402 | progress = pd.read_csv(f) 403 | last_iter = len(progress) - 1 404 | last_iter_eval_stats = json.loads( 405 | progress.iloc[last_iter]["eval_stats"] 406 | ) 407 | if ( 408 | "overall_stats" 409 | in last_iter_eval_stats[ 410 | list(last_iter_eval_stats.keys())[0] 411 | ].keys() 412 | ): 413 | trial_results = decode_json_enc_dict( 414 | progress.iloc[last_iter].to_dict(), 415 | ["parameters", "training_stats", "eval_stats"], 416 | ) 417 | trial_results["done"] = True 418 | metrics.append( 419 | progress.iloc[last_iter]["metric_score"] 420 | ) 421 | curr_path = f.path 422 | params_path = curr_path.replace( 423 | "progress.csv", "params.json" 424 | ) 425 | trial_params = json.load(open(params_path, "rb")) 426 | params.append(trial_params) 427 | for key, value in trial_params.items(): 428 | config_key = "config" + "." + key 429 | trial_results[config_key] = value 430 | results.append(trial_results) 431 | except: 432 | pass 433 | return results, metrics, params 434 | 435 | 436 | def conditional_decorator(decorator, condition, *args): 437 | def wrapper(function): 438 | if condition(*args): 439 | return decorator(function) 440 | else: 441 | return function 442 | 443 | return wrapper 444 | -------------------------------------------------------------------------------- /lbt/utils/metadata_utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import platform 4 | 5 | import GPUtil 6 | import ludwig 7 | import numpy as np 8 | import pandas as pd 9 | import psutil 10 | import ray 11 | import tensorflow as tf 12 | from ludwig.api import LudwigModel 13 | from ludwig.collect import collect_weights 14 | 15 | 16 | @ray.remote 17 | def get_ludwig_version(**kwargs): 18 | return ludwig.__version__ 19 | 20 | 21 | def scale_bytes(bytes: int, suffix: str = "B") -> str: 22 | factor = 1024 23 | for unit in ["", "K", "M", "G", "T", "P"]: 24 | if bytes < factor: 25 | return f"{bytes:.2f}{unit}{suffix}" 26 | bytes /= factor 27 | 28 | 29 | @ray.remote(num_gpus=1, num_returns=1) 30 | def get_hardware_metadata(**kwargs) -> dict: 31 | """Returns GPU, CPU and RAM information""" 32 | 33 | machine_info = {} 34 | # GPU 35 | gpus = GPUtil.getGPUs() 36 | if len(gpus) != 0: 37 | machine_info["total_gpus"] = len(gpus) 38 | gpu_type = {} 39 | for gpu_id, gpu in enumerate(gpus): 40 | gpu_type[gpu_id] = gpu.name 41 | machine_info["gpu_info"] = gpu_type 42 | else: 43 | machine_info["total_gpus"] = 0 44 | # CPU 45 | total_cores = psutil.cpu_count(logical=True) 46 | machine_info["total_cores"] = total_cores 47 | # RAM 48 | svmem = psutil.virtual_memory() 49 | total_RAM = scale_bytes(svmem.total) 50 | machine_info["RAM"] = total_RAM 51 | return machine_info 52 | 53 | 54 | @ray.remote(num_gpus=1, num_returns=1, max_calls=1) 55 | def get_inference_latency( 56 | model_path: str, dataset_path: str, num_samples: int = 20, **kwargs 57 | ) -> str: 58 | """ 59 | Returns avg. time to perform inference on 1 sample 60 | 61 | # Inputs 62 | :param model_path: (str) filepath to pre-trained model (directory that 63 | contains the model_hyperparameters.json). 64 | :param dataset_path: (str) filepath to dataset 65 | :param dataset_path: (int) number of dev samples to randomly sample 66 | 67 | # Return 68 | :return: (str) avg. time per training step 69 | """ 70 | 71 | # Create smaller datasets w/10 samples from original dev set 72 | full_dataset = pd.read_csv(dataset_path) 73 | # Note: split == 1 indicates the dev set 74 | if "split" in full_dataset: 75 | if len(full_dataset[full_dataset["split"] == 1]) > 0: 76 | sampled_dataset = full_dataset[full_dataset["split"] == 1].sample( 77 | n=num_samples 78 | ) 79 | elif len(full_dataset[full_dataset["split"] == 2]) > 0: 80 | sampled_dataset = full_dataset[full_dataset["split"] == 2].sample( 81 | n=num_samples 82 | ) 83 | else: 84 | sampled_dataset = full_dataset[full_dataset["split"] == 0].sample( 85 | n=num_samples 86 | ) 87 | else: 88 | sampled_dataset = full_dataset.sample(n=num_samples) 89 | ludwig_model = LudwigModel.load(model_path) 90 | start = datetime.datetime.now() 91 | _, _ = ludwig_model.predict( 92 | dataset=sampled_dataset, 93 | batch_size=1, 94 | ) 95 | total_time = datetime.datetime.now() - start 96 | avg_time_per_sample = total_time / num_samples 97 | formatted_time = "{:0>8}".format(str(avg_time_per_sample)) 98 | return formatted_time 99 | 100 | 101 | @ray.remote(num_returns=1) 102 | def get_training_cost( 103 | model_path: str, 104 | dataset_path: str, 105 | train_batch_size: int, 106 | run_stats: dict, 107 | gpu_cost_per_hr: float = 0.35, # GCP cost for Tesla T4 108 | ) -> float: 109 | """ 110 | Return total cost to train model using GCP compute resource 111 | """ 112 | total_time_s = int(run_stats["hyperopt_results"]["time_total_s"]) 113 | total_time_hr = total_time_s / 3600 114 | return float(total_time_hr * gpu_cost_per_hr) 115 | 116 | 117 | @ray.remote(num_gpus=1, num_returns=1, max_calls=1) 118 | def get_train_speed( 119 | model_path: str, 120 | dataset_path: str, 121 | train_batch_size: int, 122 | run_stats: dict, 123 | **kwargs, 124 | ) -> str: 125 | """ 126 | Returns avg. time per training step 127 | 128 | # Inputs 129 | :param model_path: (str) filepath to pre-trained model (directory that 130 | contains the model_hyperparameters.json). 131 | :param dataset_path: (str) filepath to dataset 132 | 133 | # Return 134 | :return: (str) avg. time per training step 135 | """ 136 | 137 | train_split_size = 0.7 138 | full_dataset = pd.read_csv(dataset_path) 139 | if "split" in full_dataset: 140 | total_samples = len(full_dataset[full_dataset["split"] == 0]) 141 | else: 142 | total_samples = int(train_split_size * len(full_dataset)) 143 | total_training_steps = int(total_samples / train_batch_size) 144 | time_per_batch = ( 145 | int(run_stats["hyperopt_results"]["time_this_iter_s"]) 146 | / total_training_steps 147 | ) 148 | formatted_time = "{:0>8}".format( 149 | str(datetime.timedelta(seconds=time_per_batch)) 150 | ) 151 | return formatted_time 152 | 153 | 154 | @ray.remote(num_gpus=1, num_returns=1, max_calls=1) 155 | def get_model_flops(model_path: str, **kwargs) -> int: 156 | """ 157 | Computes total model flops 158 | 159 | # Inputs 160 | :param model_path: (str) filepath to pre-trained model. 161 | 162 | # Return 163 | :return: (int) total number of flops. 164 | """ 165 | tf.compat.v1.reset_default_graph() 166 | session = tf.compat.v1.Session() 167 | graph = tf.compat.v1.get_default_graph() 168 | flops = None 169 | with graph.as_default(): 170 | with session.as_default(): 171 | model = LudwigModel.load(model_path) 172 | run_meta = tf.compat.v1.RunMetadata() 173 | opts = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation() 174 | flops = tf.compat.v1.profiler.profile( 175 | graph=graph, run_meta=run_meta, cmd="op", options=opts 176 | ) 177 | tf.compat.v1.reset_default_graph() 178 | session.close() 179 | return flops.total_float_ops 180 | 181 | 182 | @ray.remote(num_gpus=1, num_returns=1, max_calls=1) 183 | def get_model_size(model_path: str, **kwargs): 184 | """ 185 | Computes minimum bytes required to store model to memory 186 | 187 | # Inputs 188 | :param model_path: (str) filepath to pre-trained model. 189 | 190 | # Return 191 | :return: (int) total bytes 192 | :return: (str) total bytes scaled in string format 193 | """ 194 | tensor_filepaths = collect_weights( 195 | model_path=model_path, tensors=None, output_directory=".model_tensors" 196 | ) 197 | total_size = 0 198 | for fp in tensor_filepaths: 199 | weight_tensor = np.load(fp) 200 | total_size += weight_tensor.size 201 | total_bytes = total_size * 32 202 | scaled_bytes = scale_bytes(total_bytes) 203 | model_size = {"total_bytes": total_bytes, "scaled_bytes": scaled_bytes} 204 | return model_size 205 | 206 | 207 | def append_experiment_metadata( 208 | document: dict, 209 | model_path: str, 210 | data_path: str, 211 | run_stats: dict, 212 | train_batch_size: int = 16, 213 | ): 214 | print("METADATA tracking") 215 | for key, metrics_func in metadata_registry.items(): 216 | print("currently processing: {}".format(key)) 217 | try: 218 | output = globals()[metrics_func].remote( 219 | model_path=model_path, 220 | dataset_path=data_path, 221 | train_batch_size=train_batch_size, 222 | run_stats=run_stats, 223 | ) 224 | document.update({key: ray.get(output)}) 225 | except: 226 | print(f"failure processing: {key}") 227 | pass 228 | 229 | 230 | metadata_registry = { 231 | "inference_latency": "get_inference_latency", 232 | "time_per_train_step": "get_train_speed", 233 | "model_size": "get_model_size", 234 | "model_flops": "get_model_flops", 235 | "hardware_metadata": "get_hardware_metadata", 236 | "ludwig_version": "get_ludwig_version", 237 | "training_cost": "get_training_cost", 238 | } 239 | -------------------------------------------------------------------------------- /lbt/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | from metadata_utils import * 2 | 3 | DATAPATH = "/sailhome/avanika/.ludwig_cache/sst2_1.0/processed/sst2.csv" 4 | MODEL_PATH = "/juice/scr/avanika/ludwig-benchmark-dev/ludwig-benchmark/experiment-outputs/sst2_bert/hyperopt_0_config_sst2_bert/model" 5 | 6 | machine_info = get_hardware_metadata() 7 | print(machine_info) 8 | 9 | #model_flops = model_flops(MODEL_PATH) 10 | #print(model_flops) 11 | 12 | #model_size = get_model_size(MODEL_PATH) 13 | #print(model_size) 14 | 15 | #latency = get_inference_latency(MODEL_PATH, DATAPATH) 16 | #print(latency) 17 | 18 | print(DATAPATH) 19 | train_speed = get_train_speed(MODEL_PATH, DATAPATH, train_batch_size=16) 20 | print(train_speed) 21 | -------------------------------------------------------------------------------- /lbt/visualizations/__init__.py: -------------------------------------------------------------------------------- 1 | from .visualize import ( 2 | hyperopt_viz, 3 | compare_performance_viz, 4 | learning_curves_viz, 5 | ) 6 | -------------------------------------------------------------------------------- /lbt/visualizations/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/visualizations/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lbt/visualizations/__pycache__/visualize.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/visualizations/__pycache__/visualize.cpython-37.pyc -------------------------------------------------------------------------------- /lbt/visualizations/visualize.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List, Union 3 | 4 | import globals 5 | import json 6 | import pickle 7 | from lbt.datasets import DATASET_REGISTRY 8 | from ludwig.visualize import ( 9 | compare_performance, 10 | hyperopt_report, 11 | learning_curves, 12 | ) 13 | 14 | 15 | def hyperopt_viz( 16 | hyperopt_stats_path: str = None, 17 | dataset_name: str = None, 18 | model_name: str = None, 19 | output_dir: str = None, 20 | ): 21 | """ 22 | Produces a report about hyperparameter optimization. 23 | Creating one graph per hyperparameter to show the distribution of results 24 | and one additional graph of pairwise hyperparameters interactions 25 | """ 26 | 27 | if hyperopt_stats_path: 28 | return hyperopt_report( 29 | hyperopt_stats_path=hyperopt_stats_path, 30 | output_directory=output_dir, 31 | ) 32 | elif dataset_name and model_name: 33 | if dataset_name not in DATASET_REGISTRY.keys(): 34 | raise ValueError("The specified dataset is not valid") 35 | elif model_name not in globals.ENCODER_HYPEROPT_FILENAMES.keys(): 36 | raise ValueError("The specified model name is not valid") 37 | 38 | exp_name = "_".join([dataset_name, model_name]) 39 | experiment_folder = os.path.join( 40 | globals.EXPERIMENT_OUTPUT_DIR, exp_name 41 | ) 42 | 43 | hyperopt_stats_json = os.path.join( 44 | experiment_folder, 45 | "hyperopt_statistics.json", 46 | ) 47 | json_file = json.load(open(hyperopt_stats_json, "rb")) 48 | 49 | # decode json 50 | hyperopt_results = [] 51 | for result in json_file["hyperopt_results"]: 52 | for key, val in result.items(): 53 | try: 54 | val = json.loads(val) 55 | result[key] = val 56 | except: 57 | pass 58 | hyperopt_results.append(result) 59 | json_file["hyperopt_results"] = hyperopt_results 60 | 61 | with open( 62 | os.path.join( 63 | experiment_folder, "hyperopt_statistics_decoded.json" 64 | ), 65 | "w", 66 | ) as outfile: 67 | json.dump(json_file, outfile) 68 | 69 | hyperopt_stats_path = os.path.join( 70 | experiment_folder, 71 | "hyperopt_statistics_decoded.json", 72 | ) 73 | return hyperopt_report( 74 | hyperopt_stats_path=hyperopt_stats_path, 75 | output_directory=output_dir, 76 | ) 77 | raise ValueError( 78 | "Please specify either a path to the hyperopt output stats json file" 79 | "or the dataset and model name of the experiment" 80 | ) 81 | 82 | 83 | def learning_curves_viz( 84 | model_name: str, 85 | dataset_name: str, 86 | output_feature_name: str, 87 | output_directory=None, 88 | file_format="pdf", 89 | ): 90 | """ 91 | Visualize how model metrics change over training and validation data 92 | epochs. 93 | """ 94 | 95 | exp_name = "_".join([dataset_name, model_name]) 96 | experiment_folder = os.path.join(globals.EXPERIMENT_OUTPUT_DIR, exp_name) 97 | 98 | results_file = os.path.join( 99 | experiment_folder, f"{exp_name}_hyperopt_results.pkl" 100 | ) 101 | hyperopt_results = pickle.load(open(results_file, "rb")) 102 | 103 | training_stats = [] 104 | experiment_ids = [] 105 | 106 | for model_results in hyperopt_results: 107 | training_stats.append(json.loads(model_results["training_stats"])) 108 | experiment_ids.append(model_results["experiment_id"]) 109 | 110 | return learning_curves( 111 | train_stats_per_model=training_stats, 112 | output_feature_name=output_feature_name, 113 | model_names=experiment_ids, 114 | output_directory=output_directory, 115 | file_format=file_format, 116 | ) 117 | 118 | 119 | def compare_performance_viz( 120 | model_name: str, 121 | dataset_name: str, 122 | output_feature_name: str, 123 | output_directory=None, 124 | file_format="pdf", 125 | ): 126 | """ Barplot visualization for each overall metric """ 127 | 128 | exp_name = "_".join([dataset_name, model_name]) 129 | experiment_folder = os.path.join(globals.EXPERIMENT_OUTPUT_DIR, exp_name) 130 | 131 | results_file = os.path.join( 132 | experiment_folder, f"{exp_name}_hyperopt_results.pkl" 133 | ) 134 | hyperopt_results = pickle.load(open(results_file, "rb")) 135 | 136 | eval_stats = [] 137 | experiment_ids = [] 138 | 139 | for model_results in hyperopt_results: 140 | eval_stats.append(json.loads(model_results["eval_stats"])) 141 | experiment_ids.append(model_results["experiment_id"]) 142 | 143 | return compare_performance( 144 | test_stats_per_model=eval_stats, 145 | output_feature_name=output_feature_name, 146 | model_names=experiment_ids, 147 | output_directory=output_directory, 148 | file_format=file_format, 149 | ) -------------------------------------------------------------------------------- /model-configs/bert_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - preprocessing: 3 | word_tokenizer: hf_tokenizer 4 | pretrained_model_name_or_path: bert-base-uncased 5 | 6 | training: 7 | batch_size: 16 8 | early_stop: 3 9 | 10 | parameters: 11 | input_features.name.encoder: bert 12 | input_features.name.reduced_output: 13 | space: choice 14 | type: category 15 | categories: ["cls_pooled", "sum", "avg"] 16 | output_features.name.fc_layers: 17 | # if space is grid_search, change 'categories' to 'values' 18 | space: choice 19 | type: category 20 | categories: 21 | [ 22 | [{ fc_size: 512 }, { fc_size: 256 }], 23 | [{ fc_size: 512 }], 24 | [{ fc_size: 256 }], 25 | ] 26 | -------------------------------------------------------------------------------- /model-configs/distilbert_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - preprocessing: 3 | word_tokenizer: hf_tokenizer 4 | pretrained_model_name_or_path: distilbert-base-uncased 5 | 6 | training: 7 | batch_size: 16 8 | early_stop: 3 9 | 10 | parameters: 11 | input_features.name.encoder: distilbert 12 | output_features.name.fc_layers: 13 | space: choice 14 | type: category 15 | categories: 16 | [ 17 | [{ fc_size: 512 }, { fc_size: 256 }], 18 | [{ fc_size: 512 }], 19 | [{ fc_size: 256 }], 20 | ] 21 | -------------------------------------------------------------------------------- /model-configs/electra_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - preprocessing: 3 | word_tokenizer: hf_tokenizer 4 | pretrained_model_name_or_path: google/electra-base-generator 5 | 6 | training: 7 | batch_size: 16 8 | early_stop: 3 9 | 10 | parameters: 11 | input_features.name.encoder: electra 12 | output_features.name.fc_layers: 13 | space: choice 14 | type: category 15 | categories: 16 | [ 17 | [{ fc_size: 512 }, { fc_size: 256 }], 18 | [{ fc_size: 512 }], 19 | [{ fc_size: 256 }], 20 | ] 21 | -------------------------------------------------------------------------------- /model-configs/resnet_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - encoder: resnet 3 | resnet_size: 14 4 | 5 | training: 6 | batch_size: 32 7 | early_stop: 5 8 | 9 | parameters: 10 | input_features.name.encoder: resnet 11 | output_features.name.fc_layers: 12 | # if space is grid_search, change 'categories' to 'values' 13 | space: choice 14 | categories: 15 | [ 16 | [{ fc_size: 512 }, { fc_size: 256 }], 17 | [{ fc_size: 512 }], 18 | [{ fc_size: 256 }], 19 | ] 20 | -------------------------------------------------------------------------------- /model-configs/rnn_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - encoder: rnn 3 | preprocessing: 4 | word_tokenizer: space 5 | pretrained_model_name_or_path: None 6 | dropout: 0.5 7 | 8 | training: 9 | early_stop: 7 10 | batch_size: 128 11 | eval_batch_size: 256 12 | 13 | parameters: 14 | input_features.name.encoder: rnn 15 | input_features.name.num_layers: 16 | space: randint 17 | type: int 18 | lower: 1 19 | upper: 5 20 | input_features.name.cell_type: 21 | space: choice 22 | type: category 23 | categories: [rnn, gru, lstm] 24 | input_features.name.state_size: 25 | space: choice 26 | type: category 27 | categories: [256, 512] 28 | input_features.name.fc_layers: 29 | space: choice 30 | type: category 31 | categories: 32 | [ 33 | [{ fc_size: 512 }, { fc_size: 256 }], 34 | [{ fc_size: 512 }], 35 | [{ fc_size: 256 }], 36 | ] 37 | -------------------------------------------------------------------------------- /model-configs/roberta_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - preprocessing: 3 | word_tokenizer: hf_tokenizer 4 | pretrained_model_name_or_path: roberta-base 5 | 6 | training: 7 | batch_size: 16 8 | early_stop: 3 9 | 10 | parameters: 11 | input_features.name.encoder: roberta 12 | input_features.name.reduced_output: 13 | space: choice 14 | type: category 15 | categories: [cls_pooled, sum, avg] 16 | output_features.name.fc_layers: 17 | space: choice 18 | type: category 19 | categories: 20 | [ 21 | [{ fc_size: 512 }, { fc_size: 256 }], 22 | [{ fc_size: 512 }], 23 | [{ fc_size: 256 }], 24 | ] 25 | -------------------------------------------------------------------------------- /model-configs/stackedcnn_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - encoder: stacked_cnn 3 | 4 | training: 5 | batch_size: 32 6 | early_stop: 5 7 | 8 | parameters: 9 | input_features.name.encoder: stacked_cnn 10 | output_features.name.fc_layers: 11 | # if space is grid_search, change 'categories' to 'values' 12 | space: choice 13 | type: category 14 | categories: 15 | [ 16 | [{ fc_size: 512 }, { fc_size: 256 }], 17 | [{ fc_size: 512 }], 18 | [{ fc_size: 256 }], 19 | ] 20 | -------------------------------------------------------------------------------- /model-configs/stackedparallelcnn_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - type: sequence 3 | encoder: stacked_parallel_cnn 4 | pretrained_embeddings: PATH_TO_PRETRAINED_EMBEDDINGS 5 | dropout: 0.5 6 | preprocessing: 7 | word_tokenizer: space 8 | 9 | training: 10 | early_stop: 7 11 | eval_batch_size: 256 12 | batch_size: 128 13 | 14 | parameters: 15 | input_features.name.encoder: stacked_parallel_cnn 16 | 17 | input_features.name.stacked_layers: 18 | space: choice 19 | type: category 20 | categories: 21 | [ 22 | [[{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }]], 23 | [ 24 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 25 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 26 | ], 27 | [ 28 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 29 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 30 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 31 | ], 32 | [ 33 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 34 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 35 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 36 | [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }], 37 | ], 38 | [[{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }]], 39 | [ 40 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 41 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 42 | ], 43 | [ 44 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 45 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 46 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 47 | ], 48 | [ 49 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 50 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 51 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 52 | [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }], 53 | ], 54 | [[{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }]], 55 | [ 56 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 57 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 58 | ], 59 | [ 60 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 61 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 62 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 63 | ], 64 | [ 65 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 66 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 67 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 68 | [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }], 69 | ], 70 | [[{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }]], 71 | [ 72 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 73 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 74 | ], 75 | [ 76 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 77 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 78 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 79 | ], 80 | [ 81 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 82 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 83 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 84 | [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }], 85 | ], 86 | [[{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }]], 87 | [ 88 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 89 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 90 | ], 91 | [ 92 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 93 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 94 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 95 | ], 96 | [ 97 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 98 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 99 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 100 | [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }], 101 | ], 102 | [[{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }]], 103 | [ 104 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 105 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 106 | ], 107 | [ 108 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 109 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 110 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 111 | ], 112 | [ 113 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 114 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 115 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 116 | [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }], 117 | ], 118 | [[{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }]], 119 | [ 120 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 121 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 122 | ], 123 | [ 124 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 125 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 126 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 127 | ], 128 | [ 129 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 130 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 131 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 132 | [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }], 133 | ], 134 | [[{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }]], 135 | [ 136 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 137 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 138 | ], 139 | [ 140 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 141 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 142 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 143 | ], 144 | [ 145 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 146 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 147 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 148 | [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }], 149 | ], 150 | ] 151 | 152 | input_features.name.num_filters: 153 | space: qrandint 154 | type: int 155 | lower: 100 156 | upper: 600 157 | steps: 100 158 | scale: linear 159 | 160 | input_features.name.activation: 161 | space: choice 162 | type: category 163 | categories: [tanh, relu] 164 | 165 | input_features.name.fc_layers: 166 | space: choice 167 | type: category 168 | categories: 169 | [ 170 | [{ fc_size: 512 }, { fc_size: 256 }], 171 | [{ fc_size: 512 }], 172 | [{ fc_size: 256 }], 173 | ] 174 | -------------------------------------------------------------------------------- /model-configs/t5_hyperopt.yaml: -------------------------------------------------------------------------------- 1 | input_features: 2 | - preprocessing: 3 | word_tokenizer: hf_tokenizer 4 | pretrained_model_name_or_path: t5-base 5 | 6 | training: 7 | batch_size: 16 8 | early_stop: 3 9 | 10 | parameters: 11 | input_features.name.encoder: t5 12 | output_features.name.fc_layers: 13 | space: choice 14 | type: category 15 | categories: 16 | [ 17 | [{ fc_size: 512 }, { fc_size: 256 }], 18 | [{ fc_size: 512 }], 19 | [{ fc_size: 256 }], 20 | ] 21 | -------------------------------------------------------------------------------- /upload_to_db.py: -------------------------------------------------------------------------------- 1 | import ray 2 | from database import Database, save_results_to_es 3 | from utils.experiment_utils import * 4 | 5 | # from experiment_driver import map_runstats_to_modelpath 6 | import pickle 7 | import os 8 | import json 9 | from utils.metadata_utils import append_experiment_metadata 10 | 11 | ray.init(address="auto") 12 | 13 | datasets = ["agnews"] 14 | encoders = ["rnn", "distilbert", "t5", "electra"] 15 | 16 | elastic_config_file = "./elasticsearch_config.yaml" 17 | paths_to_dataset = { 18 | "agnews": "/experiments/ludwig-bench-textclassification/data/agnews_1.0/processed/agnews.csv" 19 | } 20 | 21 | 22 | def main(): 23 | elastic_config = None 24 | elastic_config = load_yaml(elastic_config_file) 25 | 26 | exp_info = [] 27 | for dataset in datasets: 28 | for enc in encoders: 29 | path_to_stats_file = f"/experiments/ludwig-bench-textclassification/experiment-outputs/{dataset}_{enc}/{dataset}_{enc}_hyperopt_results.pkl" 30 | path_to_output_dir = f"/experiments/ludwig-bench-textclassification/experiment-outputs/{dataset}_{enc}/" 31 | path_to_model_config = f"/experiments/ludwig-bench-textclassification/experiment-configs/config_{dataset}_{enc}.yaml" 32 | model_config = load_yaml(path_to_model_config) 33 | path_to_dataset = paths_to_dataset[dataset] 34 | experiment_attr = { 35 | "model_config": copy.deepcopy(model_config), 36 | "dataset_path": path_to_dataset, 37 | "top_n_trials": None, 38 | "model_name": f"config_{dataset}_{enc}", 39 | "output_dir": path_to_output_dir, 40 | "encoder": enc, 41 | "dataset": dataset, 42 | "elastic_config": elastic_config, 43 | } 44 | hyperopt_results = pickle.load(open(path_to_stats_file, "rb")) 45 | exp_info.append((experiment_attr, hyperopt_results)) 46 | 47 | outputs = ray.get( 48 | [ 49 | save_results_to_es.remote(info[0], info[1], "ray") 50 | for info in exp_info 51 | ] 52 | ) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() --------------------------------------------------------------------------------