├── LICENSE.md
├── README.md
├── database.py
├── elasticsearch_config.yaml
├── environments
    ├── environment-linux.yaml
    └── environment-osx.yaml
├── experiment-templates
    ├── dataset_metadata.yaml
    ├── hyperopt_config.yaml
    └── task_template.yaml
├── experiment_driver.py
├── globals.py
├── lbt
    ├── __init__.py
    ├── build_def_files.py
    ├── datasets
    │   ├── __init__.py
    │   ├── base_dataset.py
    │   ├── toy-datasets
    │   │   ├── fever.csv
    │   │   ├── goemotions.csv
    │   │   └── toy_agnews.csv
    │   └── toy_datasets.py
    ├── experiments.py
    ├── metrics
    │   ├── __init__.py
    │   ├── base_metric.py
    │   ├── instance_prices.json
    │   ├── lbt_metrics.py
    │   └── utils.py
    ├── tools
    │   ├── __init__.py
    │   ├── robustnessgym
    │   │   ├── __init__.py
    │   │   ├── base_subpopulation.py
    │   │   ├── lbt_subpopulations.py
    │   │   └── robustnessgym.py
    │   ├── textattack
    │   │   ├── __init__.py
    │   │   └── textattack.py
    │   └── utils.py
    ├── utils
    │   ├── __pycache__
    │   │   ├── experiment_utils.cpython-36.pyc
    │   │   ├── experiment_utils.cpython-37.pyc
    │   │   ├── experiment_utils.cpython-38.pyc
    │   │   ├── metadata_utils.cpython-36.pyc
    │   │   └── metadata_utils.cpython-37.pyc
    │   ├── experiment_utils.py
    │   ├── metadata_utils.py
    │   └── test_utils.py
    └── visualizations
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-37.pyc
    │       └── visualize.cpython-37.pyc
    │   └── visualize.py
├── model-configs
    ├── bert_hyperopt.yaml
    ├── distilbert_hyperopt.yaml
    ├── electra_hyperopt.yaml
    ├── resnet_hyperopt.yaml
    ├── rnn_hyperopt.yaml
    ├── roberta_hyperopt.yaml
    ├── stackedcnn_hyperopt.yaml
    ├── stackedparallelcnn_hyperopt.yaml
    └── t5_hyperopt.yaml
└── upload_to_db.py


/LICENSE.md:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2021 Stanford Hazy Research
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Ludwig Benchmarking Toolkit
  2 | The Ludwig Benchmarking Toolkit is a personalized benchmarking toolkit for running end-to-end benchmark studies across an extensible set of tasks, deep learning models, standard datasets and evaluation metrics.
  3 | 
  4 | # Getting set-up
  5 | To get started, use the following commands to set-up your conda environment. 
  6 | ```
  7 | git clone https://github.com/HazyResearch/ludwig-benchmarking-toolkit.git
  8 | cd ludwig-benchmarking-toolkit
  9 | conda env create -f environments/{environment-osx.yaml, environment-linux.yaml}
 10 | conda activate lbt
 11 | ```
 12 | 
 13 | # Relevant files and directories
 14 | `experiment-templates/task_template.yaml`: Every task (i.e. text classification) will have its owns task template. The template specifies the model architecture (encoder and decoder structure), training parameters, and a hyperopt configuration for the task at hand. A large majority of the values of the template will be populated by the values in the hyperopt_config.yaml file and dataset_metadata.yaml at training time. The sample task template located in `experiment-templates/task_template.yaml` is for text classification. See `sample-task-templates/` for other examples.
 15 | 
 16 | `experiment-templates/hyperopt_config.yaml`: provides a range of values for training parameters and hyperopt params that will populate the hyperopt configuration in the model template
 17 | 
 18 | `experiment-templates/dataset_metadata.yaml`: contains list of all available datasets (and associated metadata) that the hyperparameter optimization can be performed over.
 19 | 
 20 | `model-configs/`: contains all encoder specific yaml files. Each files specifies possible values for relevant encoder parameters that will be optimized over. Each file in this directory adheres to the naming convention {encoder_name}_hyperopt.yaml
 21 | 
 22 | `hyperopt-experiment-configs/`: houses all experiment configs built from the templates specified above (note: this folder will be populated at run-time) and will be used when the hyperopt experiment is called. At a high level, each config file specifies the training and hyperopt information for a (task, dataset, architecture) combination. An example might be (text classification, SST2, BERT)
 23 | 
 24 | `elasticsearch_config.yaml `: this is an optional file that is to be defined if an experiment data will be saved to an elastic database.
 25 | 
 26 | 
 27 | # USAGE
 28 | ### **Command-Line Usage**
 29 | 
 30 | ### *Running your first TOY experiment*:
 31 | 
 32 | For testing/setup purposes we have included a toy dataset called toy_agnews. This dataset contains a small set of training, test and validation samples from the original agnews dataset. 
 33 | 
 34 | Before running a full-scale experiment, we recommend running an experiment locally on the toy dataset:
 35 | ```
 36 | python experiment_driver.py --run_environment local --datasets toy_agnews --custom_models_list rnn
 37 | ```
 38 | 
 39 | ### *Running your first REAL experiment*:
 40 | 
 41 | Steps for configuring + running an experiment:
 42 | 1. Declare and configure the search space of all non-model specific training and preprocessing hyperparameters in the `experiment-templates/hyperopt_config.yaml` file. The parameters specified in this file will be used across all model experiments.
 43 | 2. Declare and configure the search space of model specific hyperparameters in the `{encoder}_hyperopt.yaml` files in `./model_configs` 
 44 | 
 45 |     **NOTE**: 
 46 |     * for both (1) and (2) see the [Ludwig Hyperparamter Optimization guide](https://ludwig-ai.github.io/ludwig-docs/user_guide/#hyper-parameter-optimization) to see what parameters for training, preprocessing, and input/ouput features
 47 |     can be used in the hyperopt search
 48 |     * if the exectuor type is `Ray` the list of available search spaces and input format differs slightly than the built-in ludwig types. Please see the [Ray Tune search space docs](https://docs.ray.io/en/master/tune/api_docs/search_space.html) for more information.
 49 | 
 50 | 3. Run the following command specifying the datasets, encoders, path to elastic DB index config file, run environment and more:
 51 | 
 52 |     ```
 53 |         python experiment_driver.py \
 54 |             --experiment_output_dir  <path to dir to save experiment outputs>
 55 |             --run_environment {local, gcp}
 56 |             --elasticsearch_config <path to config file>
 57 |             --dataset_cache_dir <path to dir to save downloaded datasets>
 58 |             --custom_model_list <list of models>
 59 |             --datasets <list of datasets>
 60 |             --resume_existing_exp bool
 61 | 
 62 |     ``` 
 63 | 
 64 | **NOTE:** Please use `python experiment_driver.py -h` to see list of available datasets, encoders and args
 65 | 
 66 | ### **API Usage**
 67 | It is also possible to run, customize and experiments using LBTs APIs. In the following section,
 68 | we describe the three flavors of APIs included in LBT.
 69 | 
 70 | ### `experiment` API
 71 | This API provides an alternative method for running experiments. Note that runnin experiments via the API still requires populating the aforemented configuration files
 72 | 
 73 | ```python
 74 | from lbt.experiments import experiment
 75 | 
 76 | experiment(
 77 |     models = ['rnn', 'bert'],
 78 |     datasets = ['agnews'],
 79 |     run_environment = "local",
 80 |     elastic_search_config = None,
 81 |     resume_existing_exp = False,
 82 | )
 83 | ```
 84 | 
 85 | ### `tools` API
 86 | This API provides access to two tooling integrations (TextAttack and Robustness Gym (RG)). The TextAttack API can be used to generate adversarial attacks. Moreover, users can use the TextAttack interface to augment data files. The RG API which empowers users to inspect model performance on a set of generic, pre-built slices and to add more slices for their specific datasets and use cases. 
 87 | 
 88 | ```python
 89 | from lbt.tools.robustnessgym import RG 
 90 | from lbt.tools.textattack import attack, augment
 91 | 
 92 | # Robustness Gym API Usage
 93 | RG( dataset_name="AGNews",
 94 |     models=["bert", "rnn"],
 95 |     path_to_dataset="agnews.csv", 
 96 |     subpopulations=[ "entities", "positive_words", "negative_words"]))
 97 | 
 98 | # TextAttack API Usage
 99 | attack(dataset_name="AGNews", path_to_model="agnews/model/rnn_model",
100 |     path_to_dataset="agnews.csv", attack_recipe=["CharSwapAugmenter"])
101 | 
102 | augment(dataset_name="AGNews", transformations_per_example=1
103 |    path_to_dataset="agnews.csv", augmenter=["WordNetAugmenter"])
104 | ```
105 | 
106 | ### `visualizations` API
107 | This API provides out-of-the-box support for visualizations for learning behavior, model performance, and hyperparameter optimization using the training and evaluation statistics generated during model training
108 | 
109 | ```python
110 | import lbt.visualizations
111 | 
112 | # compare model performance
113 | compare_performance_viz(
114 |     dataset_name="toy_agnews",
115 |     model_name="rnn",
116 |     output_feature_name="class_index",
117 | )
118 | 
119 | # compare training and validation trajectory
120 | learning_curves_viz(
121 |     dataset_name="toy_agnews",
122 |     model_name="rnn",
123 |     output_feature_name="class_index",
124 | )
125 | 
126 | # visualize hyperoptimzation search
127 | hyperopt_viz(
128 |     dataset_name="toy_agnews",
129 |     model_name="rnn",
130 |     output_dir="."
131 | )
132 | ```
133 | 
134 | # EXPERIMENT EXTENSIBILITY
135 | ### **Adding new custom datasets**
136 | 
137 | Adding custom dataset requires creating a new `LBTDataset` class and adding it
138 | to the dataset registry. Creating an `LBTDataset` object requires implementing
139 | three class methods: download, process and load. Please see the the [`ToyAGNews`](lbt/datasets/toy_datasets.py) dataset as an example.
140 | 
141 | ### **Adding new metrics**
142 | 
143 | Adding custom evaluation metrics requires creating a new `LBTMetric` class and adding it
144 | to the metrics registry. Creating an `LBTMetric` object requires implementing
145 | the run class method which takes as potential inputs a path to a model directory, path to a dataset, training batch size, and training statistics. Please see the [`pre-built LBT metrics`](lbt/metrics/lbt_metrics.py) for examples.
146 | 
147 | # ELASTICSEARCH RESEARCH DATABASE
148 | To get credentials to upload experiments to the shared Elasticsearch research database, please fill out this [form](https://forms.gle/rSQqQ3gAtTAURsxKA).
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/database.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | import logging
  4 | import os
  5 | import ray
  6 | import socket
  7 | from elasticsearch import Elasticsearch
  8 | 
  9 | from lbt.utils.experiment_utils import (
 10 |     format_fields_float,
 11 |     get_model_ckpt_paths,
 12 |     hash_dict,
 13 |     substitute_dict_parameters,
 14 | )
 15 | 
 16 | # from utils.metadata_utils import append_experiment_metadata
 17 | from lbt.metrics import get_experiment_metadata
 18 | 
 19 | hostname = socket.gethostbyname(socket.gethostname())
 20 | 
 21 | 
 22 | # TODO: ASN --> DECOUPLE BUILDING ES DOCUMENT W/SAVING
 23 | @ray.remote(num_cpus=0, resources={f"node:{hostname}": 0.001})
 24 | def save_results_to_es(
 25 |     experiment_attr: dict,
 26 |     hyperopt_results: list,
 27 |     tune_executor: str,
 28 |     top_n_trials: int = None,
 29 |     reupload=False,
 30 |     num_gpus=0,
 31 | ):
 32 |     elastic_config = experiment_attr["elastic_config"]
 33 | 
 34 |     es_db = Database(
 35 |         elastic_config["host"],
 36 |         (elastic_config["username"], elastic_config["password"]),
 37 |         elastic_config["username"],
 38 |         elastic_config["index"],
 39 |     )
 40 |     # save top_n model configs to elastic
 41 |     if top_n_trials is not None and len(hyperopt_results) > top_n_trials:
 42 |         hyperopt_results = hyperopt_results[0:top_n_trials]
 43 | 
 44 |     hyperopt_run_data = get_model_ckpt_paths(
 45 |         hyperopt_results, experiment_attr["output_dir"], executor=tune_executor
 46 |     )
 47 | 
 48 |     sampled_params = {}
 49 | 
 50 |     # ensures that all numerical values are of type float
 51 |     format_fields_float(hyperopt_results)
 52 |     for run in hyperopt_run_data:
 53 |         new_config = substitute_dict_parameters(
 54 |             copy.deepcopy(experiment_attr["model_config"]),
 55 |             parameters=run["hyperopt_results"]["parameters"],
 56 |         )
 57 |         del new_config["hyperopt"]
 58 | 
 59 |         # do some accounting of duplicate hyperparam configs (this count will
 60 |         # be added to the dict which will be hashed for the elastic document
 61 |         # id
 62 |         param_hash = hash_dict(run["hyperopt_results"]["parameters"])
 63 |         if param_hash in sampled_params:
 64 |             sampled_params[param_hash] += 1
 65 |         else:
 66 |             sampled_params[param_hash] = 1
 67 | 
 68 |         document = {
 69 |             "hyperopt_results": run["hyperopt_results"],
 70 |             "model_path": run["model_path"],
 71 |         }
 72 | 
 73 |         try:
 74 |             get_experiment_metadata(
 75 |                 document,
 76 |                 model_path=run["model_path"],
 77 |                 data_path=experiment_attr["dataset_path"],
 78 |                 run_stats=run,
 79 |                 num_gpus=num_gpus,
 80 |             )
 81 |         except:
 82 |             pass
 83 | 
 84 |         formatted_document = es_db.format_document(
 85 |             document,
 86 |             encoder=experiment_attr["encoder"],
 87 |             dataset=experiment_attr["dataset"],
 88 |             config=experiment_attr["model_config"],
 89 |         )
 90 | 
 91 |         formatted_document["sampled_run_config"] = new_config
 92 |         ds = experiment_attr["dataset"]
 93 |         enc = experiment_attr["encoder"]
 94 |         # doc_key = run["hyperopt_results"]["eval_stats"]
 95 | 
 96 |         trial_count = sampled_params[param_hash]
 97 | 
 98 |         doc_key = copy.deepcopy(new_config)
 99 |         doc_key["trial"] = trial_count
100 |         try:
101 |             es_db.upload_document(hash_dict(doc_key), formatted_document)
102 |             logging.info(f"{ds} x {enc}" f"uploaded to elastic.")
103 |         except:
104 |             logging.warning(
105 |                 f"error uploading" f"{ds} x {enc}" f"to elastic..."
106 |             )
107 |     return 1
108 | 
109 | 
110 | class Database:
111 |     def __init__(self, host, http_auth, user_id, index):
112 |         self.host = host
113 |         self.http_auth = http_auth
114 |         self.user_id = user_id
115 |         self.index = index
116 |         self._initialize_db()
117 |         self._create_index(self.index)
118 | 
119 |     def _initialize_db(self):
120 |         self.es_connection = Elasticsearch(
121 |             [self.host], http_auth=self.http_auth
122 |         )
123 | 
124 |     def _create_index(self, index_name: str):
125 |         mapping = {
126 |             "mappings": {
127 |                 "_doc": {
128 |                     "properties": {"sampled_run_config": {"type": "nested"}}
129 |                 }
130 |             }
131 |         }
132 |         self.es_connection.indices.create(
133 |             index=index_name, body=mapping, include_type_name=True, ignore=400
134 |         )
135 | 
136 |     def upload_document(self, id, document):
137 |         self.es_connection.index(index=self.index, id=id, body=document)
138 | 
139 |     def remove_document(self, id):
140 |         self.es_connection.delete(index=self.index, id=id)
141 | 
142 |     def document_exists(self, id):
143 |         return self.es_connection.exists(index=self.index, id=id)
144 | 
145 |     def search(self, query, size=1000):
146 |         return self.es_connection.search(
147 |             index=self.index, body=query, size=size
148 |         )
149 | 
150 |     def upload_document_from_outputdir(
151 |         self,
152 |         dir_path,
153 |         encoder,
154 |         dataset,
155 |     ):
156 |         hyperopt_stats = json.load(
157 |             open(os.path.join(dir_path, "hyperopt_statistics.json"), "rb"),
158 |             parse_int=float,
159 |         )
160 | 
161 |         formatted_document = self.format_document(
162 |             hyperopt_stats, encoder, dataset
163 |         )
164 | 
165 |         self.es_connection.index(
166 |             index=self.index,
167 |             id=hash_dict(hyperopt_stats["hyperopt_config"]),
168 |             body=formatted_document,
169 |         )
170 | 
171 |     def format_document(self, document, encoder, dataset, config=None):
172 |         formatted_document = {
173 |             "user_id": self.user_id,
174 |             "encoder": encoder,
175 |             "dataset": dataset,
176 |         }
177 |         formatted_document.update(document)
178 |         if config is not None:
179 |             formatted_document.update({"hyperopt_exp_config": config})
180 | 
181 |         return formatted_document
182 | 


--------------------------------------------------------------------------------
/elasticsearch_config.yaml:
--------------------------------------------------------------------------------
1 | host : ""
2 | username : ""
3 | password : ""
4 | index : ""
5 | 


--------------------------------------------------------------------------------
/environments/environment-linux.yaml:
--------------------------------------------------------------------------------
  1 | name: lbt
  2 | channels:
  3 |   - defaults
  4 | dependencies:
  5 |   - ca-certificates=2021.5.25
  6 |   - certifi=2020.5.30
  7 |   - libffi=3.3
  8 |   - ncurses=6.2
  9 |   - openssl=1.1.1k
 10 |   - pip=21.1.1
 11 |   - python=3.8.10
 12 |   - readline=8.1
 13 |   - setuptools=52.0.0
 14 |   - sqlite=3.35.4
 15 |   - tk=8.6.10
 16 |   - wheel=0.36.2
 17 |   - xz=5.2.5
 18 |   - zlib=1.2.11
 19 |   - pip:
 20 |       - absl-py==0.12.0
 21 |       - aiohttp==3.7.4.post0
 22 |       - aiohttp-cors==0.7.0
 23 |       - aioredis==1.3.1
 24 |       - astunparse==1.6.3
 25 |       - async-timeout==3.0.1
 26 |       - attrs==20.3.0
 27 |       - bayesmark==0.0.8
 28 |       - blessings==1.7
 29 |       - cachetools==4.2.1
 30 |       - cffi==1.14.5
 31 |       - chardet==4.0.0
 32 |       - click==7.1.2
 33 |       - cloudpickle==1.6.0
 34 |       - colorama==0.4.4
 35 |       - colorful==0.5.4
 36 |       - configspace==0.4.18
 37 |       - cython==0.29.22
 38 |       - dill==0.3.3
 39 |       - docker==4.4.4
 40 |       - elasticsearch==7.11.0
 41 |       - et-xmlfile==1.0.1
 42 |       - fiber==0.2.1
 43 |       - filelock==3.0.12
 44 |       - flatbuffers==1.12
 45 |       - gast==0.3.3
 46 |       - gitdb==4.0.5
 47 |       - gitpython==3.1.14
 48 |       - google-api-core==1.26.1
 49 |       - google-auth==1.27.1
 50 |       - google-auth-oauthlib==0.4.3
 51 |       - google-pasta==0.2.0
 52 |       - googleapis-common-protos==1.53.0
 53 |       - gpustat==0.6.0
 54 |       - gputil==1.4.0
 55 |       - grpcio==1.32.0
 56 |       - h5py==2.10.0
 57 |       - hiredis==1.1.0
 58 |       - idna==2.10
 59 |       - importlib-metadata==3.7.2
 60 |       - iniconfig==1.1.1
 61 |       - joblib==1.0.1
 62 |       - jsonschema==3.2.0
 63 |       - keras-preprocessing==1.1.2
 64 |       - kubernetes==12.0.1
 65 |       - git+https://github.com/ANarayan/ludwig.git@09dfe62a389226e9a125f2a66bb6eb6569f25130
 66 |       - git+https://github.com/Breakend/experiment-impact-tracker.git
 67 |       - git+https://github.com/robustness-gym/robustness-gym.git@8be2b1124e1a4fecdad15d73da073b9115f0f289
 68 |       - lxml==4.6.2
 69 |       - markdown==3.3.4
 70 |       - msgpack==1.0.2
 71 |       - multidict==5.1.0
 72 |       - nnpy-bundle==1.4.2.post1
 73 |       - numexpr==2.7.3
 74 |       - numpy>=1.18.0
 75 |       - nvidia-ml-py3==7.352.0
 76 |       - oauthlib==3.1.0
 77 |       - opencensus==0.7.12
 78 |       - opencensus-context==0.1.2
 79 |       - openpyxl==3.0.7
 80 |       - opt-einsum==3.3.0
 81 |       - packaging==20.9
 82 |       - pandas==1.1.4
 83 |       - pathvalidate==2.3.2
 84 |       - pluggy==0.13.1
 85 |       - poap==0.1.26
 86 |       - prometheus-client==0.9.0
 87 |       - protobuf==3.15.6
 88 |       - psutil==5.8.0
 89 |       - py==1.10.0
 90 |       - py-spy==0.3.4
 91 |       - pyaml==20.4.0
 92 |       - pyarrow==3.0.0
 93 |       - pyasn1==0.4.8
 94 |       - pyasn1-modules==0.2.8
 95 |       - pycparser==2.20
 96 |       - pydoe2==1.3.0
 97 |       - pyparsing==2.4.7
 98 |       - pyrsistent==0.17.3
 99 |       - pysot==0.3.3
100 |       - pytest==6.2.2
101 |       - python-dateutil==2.8.1
102 |       - pytz==2021.1
103 |       - pyyaml==5.4.1
104 |       - https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl
105 |       - redis==3.5.3
106 |       - requests==2.25.1
107 |       - requests-oauthlib==1.3.0
108 |       - rsa==4.7.2
109 |       - scikit-learn==0.24.1
110 |       - scikit-optimize==0.8.1
111 |       - scipy==1.4.1
112 |       - six==1.15.0
113 |       - smmap==3.0.5
114 |       - tables==3.6.1
115 |       - tabulate==0.8.9
116 |       - tensorboard==2.4.1
117 |       - tensorboard-plugin-wit==1.8.0
118 |       - tensorboardx==2.1
119 |       - tensorflow==2.3.1
120 |       - tensorflow-estimator<2.4.0
121 |       - termcolor==1.1.0
122 |       - textattack==0.2.15
123 |       - tfa-nightly==0.12.0.dev20201215223743
124 |       - threadpoolctl==2.1.0
125 |       - toml==0.10.2
126 |       - tqdm>=4.27.0
127 |       - transformers==4.2.1
128 |       - typeguard==2.11.1
129 |       - typing-extensions==3.7.4.3
130 |       - urllib3==1.24.3
131 |       - websocket-client==0.58.0
132 |       - werkzeug==1.0.1
133 |       - wget==3.2
134 |       - wrapt==1.12.1
135 |       - xarray==0.17.0
136 |       - xlrd==2.0.1
137 |       - xlwt==1.3.0
138 |       - yarl==1.6.3
139 |       - zipp==3.4.1
140 | 


--------------------------------------------------------------------------------
/environments/environment-osx.yaml:
--------------------------------------------------------------------------------
  1 | name: lbt
  2 | channels:
  3 |   - defaults
  4 | dependencies:
  5 |   - ca-certificates=2021.5.25
  6 |   - certifi=2021.5.30
  7 |   - libcxx=10.0.0
  8 |   - libffi=3.3
  9 |   - ncurses=6.2
 10 |   - openssl=1.1.1k
 11 |   - pip=21.1.1
 12 |   - python=3.8.10
 13 |   - readline=8.1
 14 |   - setuptools=52.0.0
 15 |   - sqlite=3.35.4
 16 |   - tk=8.6.10
 17 |   - wheel=0.36.2
 18 |   - xz=5.2.5
 19 |   - zlib=1.2.11
 20 |   - pip:
 21 |       - absl-py==0.12.0
 22 |       - aiohttp==3.7.4.post0
 23 |       - aiohttp-cors==0.7.0
 24 |       - aioredis==1.3.1
 25 |       - astunparse==1.6.3
 26 |       - async-timeout==3.0.1
 27 |       - attrs==20.3.0
 28 |       - bayesmark==0.0.8
 29 |       - blessings==1.7
 30 |       - cachetools==4.2.1
 31 |       - cffi==1.14.5
 32 |       - chardet==4.0.0
 33 |       - click==7.1.2
 34 |       - cloudpickle==1.6.0
 35 |       - colorama==0.4.4
 36 |       - colorful==0.5.4
 37 |       - configspace==0.4.18
 38 |       - cython==0.29.22
 39 |       - dill==0.3.3
 40 |       - docker==4.4.4
 41 |       - elasticsearch==7.11.0
 42 |       - et-xmlfile==1.0.1
 43 |       - fiber==0.2.1
 44 |       - filelock==3.0.12
 45 |       - flatbuffers==1.12
 46 |       - gast==0.3.3
 47 |       - gitdb==4.0.5
 48 |       - gitpython==3.1.14
 49 |       - google-api-core==1.26.1
 50 |       - google-auth==1.27.1
 51 |       - google-auth-oauthlib==0.4.3
 52 |       - google-pasta==0.2.0
 53 |       - googleapis-common-protos==1.53.0
 54 |       - gpustat==0.6.0
 55 |       - gputil==1.4.0
 56 |       - grpcio==1.32.0
 57 |       - h5py==2.10.0
 58 |       - hiredis==1.1.0
 59 |       - idna==2.10
 60 |       - importlib-metadata==3.7.2
 61 |       - iniconfig==1.1.1
 62 |       - joblib==1.0.1
 63 |       - jsonschema==3.2.0
 64 |       - keras-preprocessing==1.1.2
 65 |       - kubernetes==12.0.1
 66 |       - git+https://github.com/ANarayan/ludwig.git@09dfe62a389226e9a125f2a66bb6eb6569f25130
 67 |       - git+https://github.com/Breakend/experiment-impact-tracker.git
 68 |       - git+https://github.com/robustness-gym/robustness-gym.git@8be2b1124e1a4fecdad15d73da073b9115f0f289
 69 |       - lxml==4.6.2
 70 |       - markdown==3.3.4
 71 |       - msgpack==1.0.2
 72 |       - multidict==5.1.0
 73 |       - nnpy-bundle==1.4.2.post1
 74 |       - numexpr==2.7.3
 75 |       - numpy>=1.18.0
 76 |       - nvidia-ml-py3==7.352.0
 77 |       - oauthlib==3.1.0
 78 |       - opencensus==0.7.12
 79 |       - opencensus-context==0.1.2
 80 |       - openpyxl==3.0.7
 81 |       - opt-einsum==3.3.0
 82 |       - packaging==20.9
 83 |       - pandas==1.1.4
 84 |       - pathvalidate==2.3.2
 85 |       - pluggy==0.13.1
 86 |       - poap==0.1.26
 87 |       - prometheus-client==0.9.0
 88 |       - protobuf==3.15.6
 89 |       - psutil==5.8.0
 90 |       - py==1.10.0
 91 |       - py-spy==0.3.4
 92 |       - pyaml==20.4.0
 93 |       - pyarrow==3.0.0
 94 |       - pyasn1==0.4.8
 95 |       - pyasn1-modules==0.2.8
 96 |       - pycparser==2.20
 97 |       - pydoe2==1.3.0
 98 |       - pyparsing==2.4.7
 99 |       - pyrsistent==0.17.3
100 |       - pysot==0.3.3
101 |       - pytest==6.2.2
102 |       - python-dateutil==2.8.1
103 |       - pytz==2021.1
104 |       - pyyaml==5.4.1
105 |       - https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp38-cp38-macosx_10_13_x86_64.whl
106 |       - redis==3.5.3
107 |       - requests==2.25.1
108 |       - requests-oauthlib==1.3.0
109 |       - rsa==4.7.2
110 |       - scikit-learn==0.24.1
111 |       - scikit-optimize==0.8.1
112 |       - scipy==1.4.1
113 |       - six==1.15.0
114 |       - smmap==3.0.5
115 |       - tables==3.6.1
116 |       - tabulate==0.8.9
117 |       - tensorboard==2.4.1
118 |       - tensorboard-plugin-wit==1.8.0
119 |       - tensorboardx==2.1
120 |       - tensorflow==2.3.1
121 |       - textattack==0.2.15
122 |       - tensorflow-estimator<2.4.0
123 |       - termcolor==1.1.0
124 |       - tfa-nightly==0.12.0.dev20201215223743
125 |       - threadpoolctl==2.1.0
126 |       - toml==0.10.2
127 |       - tqdm>=4.27.0
128 |       - transformers==4.2.1
129 |       - typeguard==2.11.1
130 |       - typing-extensions==3.7.4.3
131 |       - urllib3==1.24.3
132 |       - websocket-client==0.58.0
133 |       - werkzeug==1.0.1
134 |       - wget==3.2
135 |       - wrapt==1.12.1
136 |       - xarray==0.17.0
137 |       - xlrd==2.0.1
138 |       - xlwt==1.3.0
139 |       - yarl==1.6.3
140 |       - zipp==3.4.1
141 | 


--------------------------------------------------------------------------------
/experiment-templates/dataset_metadata.yaml:
--------------------------------------------------------------------------------
  1 | AGNews:
  2 |   data_class: AGNews
  3 |   input_features:
  4 |     - name: description
  5 |       type: text
  6 |   output_features:
  7 |     - name: class_index
  8 |       type: category
  9 | 
 10 | AmazonPolarity:
 11 |   data_class: AmazonPolarity
 12 |   input_features:
 13 |     - name: review_text
 14 |       type: text
 15 |   output_features:
 16 |     - name: label
 17 |       type: category
 18 | 
 19 | AmazonReviews:
 20 |   data_class: AmazonReviews
 21 |   input_features:
 22 |     - name: review_text
 23 |       type: text
 24 |   output_features:
 25 |     - name: label
 26 |       type: category
 27 | 
 28 | DBPedia:
 29 |   data_class: DBPedia
 30 |   input_features:
 31 |     - name: content
 32 |       type: text
 33 |   output_features:
 34 |     - name: label
 35 |       type: category
 36 | 
 37 | EthosBinary:
 38 |   data_class: EthosBinary
 39 |   input_features:
 40 |     - name: comment
 41 |       type: text
 42 |   output_features: isHate
 43 |   type: category
 44 | 
 45 | GoEmotions:
 46 |   data_class: GoEmotions
 47 |   input_features:
 48 |     - name: text
 49 |       type: text
 50 |   output_features:
 51 |     - name: emotion_ids
 52 |       type: set
 53 | 
 54 | Irony:
 55 |   data_class: Irony
 56 |   input_features:
 57 |     - name: comment_text
 58 |       type: text
 59 |   output_features:
 60 |     - name: label
 61 |       type: category
 62 | 
 63 | SST2:
 64 |   data_class: SST2
 65 |   input_features:
 66 |     - name: sentence
 67 |       type: text
 68 |   output_features:
 69 |     - name: label
 70 |       type: category
 71 | 
 72 | SST5:
 73 |   data_class: SST5
 74 |   input_features:
 75 |     - name: sentence
 76 |       type: text
 77 |   output_features:
 78 |     - name: label
 79 |       type: category
 80 | 
 81 | YahooAnswers:
 82 |   data_class: YahooAnswers
 83 |   input_features:
 84 |     - name: question
 85 |       type: text
 86 |   output_features:
 87 |     - name: label
 88 |       type: category
 89 | 
 90 | YelpPolarity:
 91 |   data_class: YelpPolarity
 92 |   input_features:
 93 |     - name: text
 94 |       type: text
 95 | 
 96 |   output_features:
 97 |     - name: label
 98 |       type: category
 99 | 
100 | YelpReviews:
101 |   data_class: YelpReviews
102 |   input_features:
103 |     - name: text
104 |       type: text
105 |   output_features:
106 |     - name: label
107 |       type: category
108 | 
109 | HateSpeech:
110 |   data_class: HateSpeech
111 |   input_features:
112 |     - name: tweet
113 |       type: text
114 |   output_features:
115 |     - name: class
116 |       type: category
117 | 
118 | SocialBiasFrames:
119 |   data_class: SocialBiasFrames
120 |   input_features:
121 |     - name: post
122 |       type: text
123 |   output_features:
124 |     - name: sexYN
125 |       type: category
126 |     - name: offensiveYN
127 |       type: category
128 |     - name: intentYN
129 |       type: category
130 |     - name: speakerMinorityYN
131 |       type: category
132 |   type: category
133 | 
134 | MDGenderBias:
135 |   data_class: MDGenderBias
136 |   input_features:
137 |     - name: text
138 |       type: text
139 |   output_features:
140 |     - name: gender
141 |       type: category
142 |   type: category
143 | 
144 | CIFAR10:
145 |   data_class: CIFAR10
146 |   input_features:
147 |     - name: image_path
148 |       type: image
149 |   output_features:
150 |     - name: label
151 |       type: category
152 |   type: category
153 | 
154 | Mnist:
155 |   data_class: Mnist
156 |   input_features:
157 |     - name: image_path
158 |       type: image
159 |   output_features:
160 |     - name: label
161 |       type: category
162 |   type: category
163 | 
164 | toy_agnews:
165 |   data_class: toy_agnews
166 |   input_features:
167 |     - name: description
168 |       type: text
169 |   output_features:
170 |     - name: class_index
171 |       type: category
172 | 


--------------------------------------------------------------------------------
/experiment-templates/hyperopt_config.yaml:
--------------------------------------------------------------------------------
 1 | goal: maximize
 2 | metric: accuracy
 3 | parameters:
 4 |   training.learning_rate:
 5 |     type: float
 6 |     lower: 0.00002
 7 |     upper: 0.01
 8 |     space: loguniform
 9 | sampler:
10 |   type: ray
11 |   search_alg:
12 |     type: skopt
13 |     max_concurrent: 1
14 |   num_samples: 1
15 | executor:
16 |   type: ray
17 |   cpu_resources_per_trial: 1
18 |   gpu_resources_per_trial: 0
19 |   #kubernetes_namespace: "ray"
20 | 


--------------------------------------------------------------------------------
/experiment-templates/task_template.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - name: ~
 3 |     type: text
 4 |     level: word
 5 |     encoder: rnn
 6 |     preprocessing:
 7 |       word_tokenizer: space
 8 |     pretrained_model_name_or_path: None
 9 | 
10 | output_features:
11 |   - name: ~
12 |     type: category
13 | 
14 | training:
15 |   learning_rate: 0.01
16 |   batch_size: 16
17 |   eval_batch_size: 64
18 |   early_stop: 3
19 |   epochs: 25
20 |   validation_metric: accuracy
21 | 
22 | hyperopt:
23 |   output_feature:
24 |   metric: ~
25 |   strategy:
26 |     type: ~
27 |     num_samples: ~
28 |   parameters:
29 |     training.learning_rate:
30 |       min: ~
31 |       max: ~
32 |       scale: ~
33 | 


--------------------------------------------------------------------------------
/experiment_driver.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import datetime
  3 | import logging
  4 | 
  5 | import ray
  6 | import globals
  7 | 
  8 | from lbt.utils.experiment_utils import set_globals, load_yaml
  9 | from lbt.experiments import (
 10 |     run_experiments,
 11 |     reproduce_experiment,
 12 |     download_data,
 13 | )
 14 | from lbt.datasets import DATASET_REGISTRY
 15 | from lbt.experiments import (
 16 |     run_experiments,
 17 |     reproduce_experiment,
 18 |     download_data,
 19 | )
 20 | import lbt.build_def_files
 21 | from lbt.build_def_files import build_config_files
 22 | 
 23 | logging.basicConfig(
 24 |     format=logging.basicConfig(
 25 |         format="[\N{books} LUDWIG-BENCHMARKING-TOOLKIT \N{books}] => %(levelname)s::%(message)s",
 26 |         level=logging.DEBUG,
 27 |     ),
 28 |     level=logging.DEBUG,
 29 | )
 30 | 
 31 | 
 32 | def main():
 33 |     parser = argparse.ArgumentParser(
 34 |         description="Ludwig Benchmarking Toolkit experiment driver script",
 35 |     )
 36 | 
 37 |     parser.add_argument(
 38 |         "-hcd",
 39 |         "--hyperopt_config_dir",
 40 |         help="directory to save all model config",
 41 |         type=str,
 42 |         default=globals.EXPERIMENT_CONFIGS_DIR,
 43 |     )
 44 | 
 45 |     parser.add_argument(
 46 |         "--resume_existing_exp",
 47 |         help="resume a previously stopped experiment",
 48 |         type=bool,
 49 |         default=False,
 50 |     )
 51 | 
 52 |     parser.add_argument(
 53 |         "-eod",
 54 |         "--experiment_output_dir",
 55 |         help="directory to save hyperopt runs",
 56 |         type=str,
 57 |         default=globals.EXPERIMENT_OUTPUT_DIR,
 58 |     )
 59 | 
 60 |     parser.add_argument(
 61 |         "--datasets",
 62 |         help="list of datasets to run experiemnts on",
 63 |         nargs="+",
 64 |         choices=list(DATASET_REGISTRY.keys()),
 65 |         default=None,
 66 |         required=True,
 67 |     )
 68 |     parser.add_argument(
 69 |         "-re",
 70 |         "--run_environment",
 71 |         help="environment in which experiment will be run",
 72 |         choices=["local", "gcp"],
 73 |         default="local",
 74 |     )
 75 |     parser.add_argument(
 76 |         "-esc",
 77 |         "--elasticsearch_config",
 78 |         help="path to elastic db config file",
 79 |         type=str,
 80 |         default=None,
 81 |     )
 82 | 
 83 |     parser.add_argument(
 84 |         "-dcd",
 85 |         "--dataset_cache_dir",
 86 |         help="path to cache downloaded datasets",
 87 |         type=str,
 88 |         default=globals.DATASET_CACHE_DIR,
 89 |     )
 90 | 
 91 |     # list of encoders to run hyperopt search over :
 92 |     # default is 23 ludwig encoders
 93 |     parser.add_argument(
 94 |         "-mel",
 95 |         "--custom_model_list",
 96 |         help="list of encoders to run hyperopt experiments on. \
 97 |             The default setting is to use all 23 Ludwig encoders",
 98 |         nargs="+",
 99 |         choices=[
100 |             "all",
101 |             "bert",
102 |             "rnn",
103 |             "stacked_parallel_cnn",
104 |             "roberta",
105 |             "distilbert",
106 |             "electra",
107 |             "resnet",
108 |             "stacked_cnn",
109 |             "t5",
110 |         ],
111 |         default="all",
112 |     )
113 | 
114 |     parser.add_argument(
115 |         "-topn",
116 |         "--top_n_trials",
117 |         help="top n trials to save model performance for.",
118 |         type=int,
119 |         default=None,
120 |     )
121 | 
122 |     parser.add_argument(
123 |         "-reproduce",
124 |         "--experiment_to_reproduce",
125 |         help="path to LBT experiment config to reproduce and experiment",
126 |         type=str,
127 |         default=None,
128 |     )
129 | 
130 |     args = parser.parse_args()
131 |     set_globals(args)
132 | 
133 |     data_file_paths = download_data(args.dataset_cache_dir, args.datasets)
134 |     logging.info("Datasets succesfully downloaded...")
135 | 
136 |     config_files = build_config_files()
137 |     logging.info("Experiment configuration files built...")
138 | 
139 |     elastic_config = None
140 |     if args.elasticsearch_config is not None:
141 |         elastic_config = load_yaml(args.elasticsearch_config)
142 | 
143 |     experiment_config = None
144 |     if args.experiment_to_reproduce is not None:
145 |         experiment_config = load_yaml(args.experiment_to_reproduce)
146 | 
147 |     if args.run_environment == "gcp":
148 |         ray.init(address="auto")
149 | 
150 |     if experiment_config:
151 |         reproduce_experiment(
152 |             model=args.custom_model_list[0],
153 |             dataset=args.datasets[0],
154 |             data_file_paths=data_file_paths,
155 |             experiment_to_replicate=args.experiment_to_reproduce,
156 |             run_environment=args.run_environment,
157 |         )
158 |     else:
159 |         run_experiments(
160 |             data_file_paths,
161 |             config_files,
162 |             top_n_trials=args.top_n_trials,
163 |             elastic_config=elastic_config,
164 |             run_environment=args.run_environment,
165 |             resume_existing_exp=args.resume_existing_exp,
166 |         )
167 | 
168 | 
169 | if __name__ == "__main__":
170 |     main()
171 | 


--------------------------------------------------------------------------------
/globals.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | PATH_HERE = os.path.abspath(os.path.dirname(__file__))
 4 | ENCODER_CONFIG_DIR = os.path.join(PATH_HERE, "model-configs")
 5 | # EXPERIMENT_CONFIGS_DIR = '/experiments/ludwig-bench-textclassification/experiment-configs'
 6 | EXPERIMENT_CONFIGS_DIR = os.path.join(PATH_HERE, "hyperopt-experiment-configs")
 7 | DATASET_CACHE_DIR = os.path.join(PATH_HERE,"datasets")
 8 | ENERGY_LOGGING_DIR = os.path.join(PATH_HERE, "energy_logging")
 9 | 
10 | ENCODER_HYPEROPT_FILENAMES = {
11 |     "bert": "bert_hyperopt.yaml",
12 |     "rnn": "rnn_hyperopt.yaml",
13 |     "distilbert": "distilbert_hyperopt.yaml",
14 |     "electra": "electra_hyperopt.yaml",
15 |     "roberta": "roberta_hyperopt.yaml",
16 |     "stacked_parallel_cnn": "stackedparallelcnn_hyperopt.yaml",
17 |     "t5": "t5_hyperopt.yaml",
18 |     "resnet" : "resnet_hyperopt.yaml",
19 |     "stacked_cnn" : "stackedcnn_hyperopt.yaml"
20 | }
21 | 
22 | ENCODER_FILE_LIST = ENCODER_HYPEROPT_FILENAMES.values()
23 | DATASETS_LIST = None
24 | 
25 | CONFIG_TEMPLATE_FILE = "./experiment-templates/task_template.yaml"
26 | DATASET_METADATA_FILE = "./experiment-templates/dataset_metadata.yaml"
27 | HYPEROPT_CONFIG_FILE = "./experiment-templates/hyperopt_config.yaml"
28 | EXPERIMENT_OUTPUT_DIR = "./experiment-outputs"
29 | 
30 | PATH_TO_PRETRAINED_EMBEDDINGS = None
31 | 
32 | RUNTIME_ENV = "local"
33 | 


--------------------------------------------------------------------------------
/lbt/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.3.0.post1"
2 | 


--------------------------------------------------------------------------------
/lbt/build_def_files.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pdb
  4 | 
  5 | from copy import deepcopy
  6 | 
  7 | import yaml
  8 | 
  9 | import globals
 10 | from globals import *
 11 | from lbt.utils.experiment_utils import load_yaml
 12 | 
 13 | template = load_yaml(CONFIG_TEMPLATE_FILE)
 14 | dataset_metadata = load_yaml(DATASET_METADATA_FILE)
 15 | hyperopt_config = load_yaml(HYPEROPT_CONFIG_FILE)
 16 | 
 17 | 
 18 | def insert_global_vars(config):
 19 |     """ replace global variable placeholders with respective values """
 20 |     for key, value in config.items():
 21 |         if type(value) != dict and value in vars(globals):
 22 |             config[key] = getattr(globals, value)
 23 | 
 24 | 
 25 | def build_config_files():
 26 |     config_fps = {}
 27 |     config = deepcopy(template)
 28 | 
 29 |     encoder_hyperopt_vals = []
 30 |     # select relevant encoders
 31 |     for encoder_filename in globals.ENCODER_FILE_LIST:
 32 |         with open(os.path.join(ENCODER_CONFIG_DIR, encoder_filename)) as f:
 33 |             encoder_hyperopt_params = yaml.load(f, Loader=yaml.SafeLoader)
 34 |             encoder_hyperopt_vals.append(encoder_hyperopt_params)
 35 | 
 36 |     # select relevant datasets
 37 |     selected_datasets = {}
 38 |     for dataset_name in globals.DATASETS_LIST:
 39 |         if dataset_name in dataset_metadata.keys():
 40 |             selected_datasets[dataset_name] = dataset_metadata[dataset_name]
 41 |         else:
 42 |             raise ValueError(
 43 |                 "The dataset you provided is not available."
 44 |                 "Please see list of available datasets here: "
 45 |                 "python experiment_drivery.py --h"
 46 |             )
 47 | 
 48 |     config["hyperopt"].update(hyperopt_config)
 49 | 
 50 |     for dataset, metadata in selected_datasets.items():
 51 |         # each dataset will have a model specific config file
 52 |         config_fps[dataset] = []
 53 | 
 54 |         for idx, input_feature_name in enumerate(metadata["input_features"]):
 55 |             ipt_feat = deepcopy(config["input_features"][0])
 56 |             ipt_feat["name"] = input_feature_name["name"]
 57 |             ipt_feat["type"] = input_feature_name["type"]
 58 |             if idx == 0:
 59 |                 config["input_features"] = [ipt_feat]
 60 |             else:
 61 |                 config["input_features"].append(ipt_feat)
 62 |         for idx, output_feature_info in enumerate(metadata["output_features"]):
 63 |             out_feat = deepcopy(config["output_features"][0])
 64 |             out_feat["name"] = output_feature_info["name"]
 65 |             out_feat["type"] = output_feature_info["type"]
 66 |             if idx == 0:
 67 |                 config["output_features"] = [out_feat]
 68 |             else:
 69 |                 config["output_features"].append(out_feat)
 70 | 
 71 |         if len(metadata["output_features"]) > 1:
 72 |             config["hyperopt"]["output_feature"] = "combined"
 73 |         else:
 74 |             config["hyperopt"]["output_feature"] = metadata["output_features"][
 75 |                 0
 76 |             ]["name"]
 77 | 
 78 |         input_feature_names = metadata["input_features"]
 79 |         output_feature_names = metadata["output_features"]
 80 | 
 81 |         for encoder_hyperopt_params in encoder_hyperopt_vals:
 82 |             curr_config = deepcopy(config)
 83 |             encoder_name = encoder_hyperopt_params["parameters"][
 84 |                 "input_features.name.encoder"
 85 |             ]
 86 | 
 87 |             # update input and output parameters (not preprocessing)
 88 |             for idx in range(len(curr_config["input_features"])):
 89 |                 curr_config["input_features"][idx].update(
 90 |                     encoder_hyperopt_params["input_features"][idx]
 91 |                 )
 92 |                 insert_global_vars(curr_config["input_features"][idx])
 93 | 
 94 |             for idx in range(len(curr_config["output_features"])):
 95 |                 if "output_features" in encoder_hyperopt_params.keys():
 96 |                     curr_config["output_features"][idx].update(
 97 |                         encoder_hyperopt_params["output_features"][idx]
 98 |                     )
 99 |                     insert_global_vars(curr_config["output_features"][idx])
100 | 
101 |             # handle encoder specific preprocessing
102 |             for idx in range(len(curr_config["input_features"])):
103 |                 try:
104 |                     preprocessing = curr_config["input_features"][idx][
105 |                         "preprocessing"
106 |                     ]
107 |                     for key, _ in preprocessing.items():
108 |                         preprocessing[key] = encoder_hyperopt_params[
109 |                             "input_features"
110 |                         ][idx]["preprocessing"][key]
111 | 
112 |                 except:
113 |                     pass #no preprocessing param
114 |             # handle encoder specific training params
115 |             if "training" in encoder_hyperopt_params.keys():
116 |                 curr_config["training"].update(
117 |                     encoder_hyperopt_params["training"]
118 |                 )
119 | 
120 |             def input_or_output_feature(param_key):
121 |                 if param_key.split(".")[0] == "input_features":
122 |                     return True
123 |                 return False
124 | 
125 |             # handle encoder specific hyperopt
126 |             input_encoder_hyperopt_params = {
127 |                 "parameters": {
128 |                     input_feat["name"] + "." + key.split(".")[-1]: value
129 |                     for input_feat in input_feature_names
130 |                     for key, value in encoder_hyperopt_params[
131 |                         "parameters"
132 |                     ].items()
133 |                     if key.split(".")[-1] != "encoder"
134 |                     and input_or_output_feature(key)
135 |                 }
136 |             }
137 | 
138 |             # handle encoder specific hyperopt
139 |             output_encoder_hyperopt_params = {
140 |                 "parameters": {
141 |                     output_feat["name"] + "." + key.split(".")[-1]: value
142 |                     for output_feat in output_feature_names
143 |                     for key, value in encoder_hyperopt_params[
144 |                         "parameters"
145 |                     ].items()
146 |                     if key.split(".")[-1] != "encoder"
147 |                     and not input_or_output_feature(key)
148 |                 }
149 |             }
150 | 
151 |             ds_encoder_hyperopt_params = {
152 |                 "parameters": {
153 |                     **output_encoder_hyperopt_params["parameters"],
154 |                     **input_encoder_hyperopt_params["parameters"],
155 |                 }
156 |             }
157 |             curr_config["input_features"][0]["encoder"] = encoder_name
158 | 
159 |             # populate hyperopt parameters w/encoder specific settings
160 |             curr_config["hyperopt"].update(
161 |                 {
162 |                     "parameters": {
163 |                         **ds_encoder_hyperopt_params["parameters"],
164 |                         **hyperopt_config["parameters"],
165 |                     }
166 |                 }
167 |             )
168 | 
169 |             config_fp = os.path.join(
170 |                 EXPERIMENT_CONFIGS_DIR, f"config_{dataset}_{encoder_name}.yaml"
171 |             )
172 |             with open(config_fp, "w") as f:
173 |                 yaml.dump(curr_config, f)
174 | 
175 |             config_fps[dataset].append(config_fp)
176 | 
177 |     return config_fps
178 | 


--------------------------------------------------------------------------------
/lbt/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import inspect
 3 | 
 4 | from lbt.datasets.base_dataset import LBTDataset
 5 | from ludwig.datasets.base_dataset import BaseDataset
 6 | 
 7 | DATASET_REGISTRY = {}
 8 | 
 9 | 
10 | def register_dataset(name):
11 |     """
12 |     New dataset types can be added to LBT with the `register_dataset`
13 |     function decorator.
14 |     :
15 |         @register_dataset('personal_dataset')
16 |         class PersonalDataset():
17 |             (...)
18 |     Args:
19 |         name (str): the name of the dataset
20 |     """
21 | 
22 |     def register_dataset_cls(cls):
23 |         if not issubclass(cls, LBTDataset):
24 |             raise ValueError(
25 |                 "Dataset ({}: {}) must extend lbt.base_datast.LBTDataset".format(
26 |                     name, cls.__name__
27 |                 )
28 |             )
29 |         DATASET_REGISTRY[name] = cls
30 |         return cls
31 | 
32 |     return register_dataset_cls
33 | 
34 | 
35 | def build_dataset(dataset_name: str, cache_dir: str, **kwargs):
36 |     if dataset_name not in DATASET_REGISTRY:
37 |         if dataset_name in PRE_BUILT_DATASETS:
38 |             importlib.import_module(PRE_BUILT_DATASETS[dataset_name])
39 |         else:
40 |             raise ValueError(
41 |                 "Dataset ({}) is not supported by LBT".format(dataset_name)
42 |             )
43 |             exit(1)
44 | 
45 |     dataset = DATASET_REGISTRY[dataset_name](cache_dir=cache_dir, **kwargs)
46 |     dataset.load()
47 |     return dataset
48 | 
49 | 
50 | PRE_BUILT_DATASETS = {
51 |     "AGNews": "ludwig.datasets.agnews",
52 |     "SST5": "ludwig.datasets.sst5",
53 |     "GoEmotions": "ludwig.datasets.goemotions",
54 |     "Fever": "ludwig.datasets.fever",
55 |     "SST2": "ludwig.datasets.sst2",
56 |     "EthosBinary": "ludwig.datasets.ethos_binary",
57 |     "YelpPolarity": "ludwig.datasets.yelp_review_polarity",
58 |     "DBPedia": "ludwig.datasets.dbpedia",
59 |     "Irony": "ludwig.datasets.irony",
60 |     "YelpReviews": "ludwig.datasets.yelp_reviews",
61 |     "YahooAnswers": "ludwig.datasets.yahoo_answers",
62 |     "AmazonPolarity": "ludwig.datasets.amazon_review_polarity",
63 |     "AmazonReviews": "ludwig.datasets.amazon_reviews",
64 |     "HateSpeech": "ludwig.datasets.hate_speech",
65 |     "MDGenderBias": "ludwig.datasets.md_gender_bias",
66 |     "toyAGNews": "lbt.datasets.toy_datasets",
67 |     "Mnist" : "ludwig.datasets.mnist",
68 |     "CIFAR10" : "ludwig.datasets.cifar10",
69 | }
70 | 
71 | # TODO: ASN -> CHECK PLACEMENT
72 | for dataset_name, module_path in PRE_BUILT_DATASETS.items():
73 |     module = importlib.import_module(module_path)
74 |     for obj in dir(module):
75 |         if obj != "BaseDataset" and inspect.isclass(getattr(module, obj)):
76 |             if issubclass(getattr(module, obj), BaseDataset):
77 |                 DATASET_REGISTRY[dataset_name] = getattr(module, obj)
78 | 


--------------------------------------------------------------------------------
/lbt/datasets/base_dataset.py:
--------------------------------------------------------------------------------
 1 | from ludwig.datasets.base_dataset import BaseDataset, DEFAULT_CACHE_LOCATION
 2 | import abc
 3 | import pandas as pd
 4 | 
 5 | 
 6 | class LBTDataset(BaseDataset):
 7 |     """Base LBT Dataset -- subclass wrapper around Ludwig data class"""
 8 | 
 9 |     def __init__(self, dataset_name, processed_file_name, cache_dir):
10 |         self.name = dataset_name
11 |         self.config = {"csv_filename": processed_file_name}
12 |         self.cache_dir = cache_dir
13 | 
14 |     @abc.abstractmethod
15 |     def download(self) -> None:
16 |         """ Download the file from config url that represents the raw unprocessed training data."""
17 |         raise NotImplementedError()
18 | 
19 |     @abc.abstractmethod
20 |     def process(self) -> None:
21 |         """Process the dataset to get it ready to be plugged into a dataframe.
22 |         Converts into a format to be used by the ludwig training API. To do this we create
23 |         a new dictionary that contains the KV pairs in the format that we need.
24 |         """
25 |         raise NotImplementedError()
26 | 
27 |     @abc.abstractmethod
28 |     def load(self) -> pd.DataFrame:
29 |         """ Load the processed data into a Pandas DataFrame """
30 |         raise NotImplementedError()
31 | 
32 |     @property
33 |     def processed_dataset_path(self) -> str:
34 |         """ Return path of the processed dataset """
35 |         raise NotImplementedError()
36 | 
37 |     def __repr__(self):
38 |         return "{}()".format(self.name)
39 | 


--------------------------------------------------------------------------------
/lbt/datasets/toy-datasets/fever.csv:
--------------------------------------------------------------------------------
 1 | ,id,verifiable,label,claim,evidence,split
 2 | 155448,113501,NOT VERIFIABLE,NOT ENOUGH INFO,Grease had bad reviews.,"[[[133128, None, None, None]]]",1
 3 | 155449,163803,VERIFIABLE,SUPPORTS,Ukrainian Soviet Socialist Republic was a founding participant of the UN.,"[[[296950, 288668, 'Ukrainian_Soviet_Socialist_Republic', 7]], [[298602, 290067, 'Ukrainian_Soviet_Socialist_Republic', 7], [298602, 290067, 'United_Nations', 0]], [[300696, 291816, 'Ukrainian_Soviet_Socialist_Republic', 7]], [[344347, 327887, 'Ukrainian_Soviet_Socialist_Republic', 7]], [[344994, 328433, 'Ukrainian_Soviet_Socialist_Republic', 7]], [[344997, 328435, 'Ukrainian_Soviet_Socialist_Republic', 7]]]",1
 4 | 155450,70041,VERIFIABLE,SUPPORTS,2 Hearts is a musical composition by Minogue.,"[[[225394, 230056, '2_Hearts_-LRB-Kylie_Minogue_song-RRB-', 0]], [[317953, 306972, '2_Hearts_-LRB-Kylie_Minogue_song-RRB-', 0]], [[319638, 308345, '2_Hearts_-LRB-Kylie_Minogue_song-RRB-', 0]], [[319643, 308348, '2_Hearts_-LRB-Kylie_Minogue_song-RRB-', 0]]]",1
 5 | 155451,202314,VERIFIABLE,REFUTES,The New Jersey Turnpike has zero shoulders.,"[[[238335, 240393, 'New_Jersey_Turnpike', 15]]]",1
 6 | 155452,57085,NOT VERIFIABLE,NOT ENOUGH INFO,Legendary Entertainment is the owner of Wanda Cinemas.,"[[[178035, None, None, None], [182093, None, None, None], [314120, None, None, None], [314126, None, None, None], [314131, None, None, None]]]",1
 7 | 155453,6032,VERIFIABLE,REFUTES,Aruba is the only ABC Island.,"[[[22769, 28071, 'ABC_islands_-LRB-Lesser_Antilles-RRB-', 0]], [[22769, 28072, 'ABC_islands_-LRB-Lesser_Antilles-RRB-', 1]]]",1
 8 | 155454,176630,NOT VERIFIABLE,NOT ENOUGH INFO,Great white sharks do not prefer dolphins as prey.,"[[[204612, None, None, None]]]",1
 9 | 155455,130048,VERIFIABLE,REFUTES,"Burbank, California has always been completely void of industry.","[[[152264, 167060, 'Burbank,_California', 7]]]",1
10 | 155456,100046,NOT VERIFIABLE,NOT ENOUGH INFO,The Guthrie Theater's second building began operating in 1963.,"[[[117690, None, None, None]]]",1
11 | 155457,204575,VERIFIABLE,REFUTES,Commodore is ranked above a rear admiral.,"[[[241594, 243126, 'Commodore_-LRB-rank-RRB-', 0]], [[241594, 243127, 'Commodore_-LRB-rank-RRB-', 9], [241594, 243127, 'Rear_admiral', 0]]]",1
12 | 0,75397,VERIFIABLE,SUPPORTS,Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.,"[[[92206, 104971, 'Nikolaj_Coster-Waldau', 7], [92206, 104971, 'Fox_Broadcasting_Company', 0]]]",0
13 | 1,150448,VERIFIABLE,SUPPORTS,Roman Atwood is a content creator.,"[[[174271, 187498, 'Roman_Atwood', 1]], [[174271, 187499, 'Roman_Atwood', 3]]]",0
14 | 2,214861,VERIFIABLE,SUPPORTS,"History of art includes architecture, dance, sculpture, music, painting, poetry literature, theatre, narrative, film, photography and graphic arts.","[[[255136, 254645, 'History_of_art', 2]]]",0
15 | 3,156709,VERIFIABLE,REFUTES,Adrienne Bailon is an accountant.,"[[[180804, 193183, 'Adrienne_Bailon', 0]]]",0
16 | 4,83235,NOT VERIFIABLE,NOT ENOUGH INFO,System of a Down briefly disbanded in limbo.,"[[[100277, None, None, None]]]",0
17 | 5,129629,VERIFIABLE,SUPPORTS,Homeland is an American television spy thriller based on the Israeli television series Prisoners of War.,"[[[151831, 166598, 'Homeland_-LRB-TV_series-RRB-', 0], [151831, 166598, 'Prisoners_of_War_-LRB-TV_series-RRB-', 0]]]",0
18 | 6,149579,NOT VERIFIABLE,NOT ENOUGH INFO,Beautiful reached number two on the Billboard Hot 100 in 2003.,"[[[173384, None, None, None]]]",0
19 | 7,229289,NOT VERIFIABLE,NOT ENOUGH INFO,Neal Schon was named in 1954.,"[[[273626, None, None, None]]]",0
20 | 8,33078,VERIFIABLE,SUPPORTS,The Boston Celtics play their home games at TD Garden.,"[[[49158, 58489, 'Boston_Celtics', 3]], [[49159, 58490, 'Boston_Celtics', 3]]]",0
21 | 9,6744,VERIFIABLE,SUPPORTS,The Ten Commandments is an epic film.,"[[[23513, 28977, 'The_Ten_Commandments_-LRB-1956_film-RRB-', 0]], [[23513, 28978, 'The_Ten_Commandments_-LRB-1956_film-RRB-', 20]]]",0
22 | 145449,91198,NOT VERIFIABLE,NOT ENOUGH INFO,Colin Kaepernick became a starting quarterback during the 49ers 63rd season in the National Football League.,"[[[108548, None, None, None]]]",2
23 | 145450,194462,NOT VERIFIABLE,NOT ENOUGH INFO,Tilda Swinton is a vegan.,"[[[227768, None, None, None]]]",2
24 | 145451,137334,VERIFIABLE,SUPPORTS,Fox 2000 Pictures released the film Soul Food.,"[[[289914, 283015, 'Soul_Food_-LRB-film-RRB-', 0]], [[291259, 284217, 'Soul_Food_-LRB-film-RRB-', 0]], [[293412, 285960, 'Soul_Food_-LRB-film-RRB-', 0]], [[337212, 322620, 'Soul_Food_-LRB-film-RRB-', 0]], [[337214, 322622, 'Soul_Food_-LRB-film-RRB-', 0]]]",2
25 | 145452,166626,NOT VERIFIABLE,NOT ENOUGH INFO,Anne Rice was born in New Jersey.,"[[[191656, None, None, None], [191657, None, None, None]]]",2
26 | 145453,111897,VERIFIABLE,REFUTES,Telemundo is a English-language television network.,"[[[131371, 146144, 'Telemundo', 0]], [[131371, 146148, 'Telemundo', 1]], [[131371, 146150, 'Telemundo', 4], [131371, 146150, 'Hispanic_and_Latino_Americans', 0]], [[131371, 146151, 'Telemundo', 5]]]",2
27 | 145454,89891,VERIFIABLE,REFUTES,Damon Albarn's debut album was released in 2011.,"[[[107201, 120581, 'Damon_Albarn', 17]]]",2
28 | 145455,181634,VERIFIABLE,SUPPORTS,There is a capital called Mogadishu.,"[[[210946, 218608, 'Mogadishu', 0]]]",2
29 | 145456,219028,VERIFIABLE,REFUTES,Savages was exclusively a German film.,"[[[260471, 258880, 'Savages_-LRB-2012_film-RRB-', 3]], [[260473, 258882, 'Savages_-LRB-2012_film-RRB-', 3]]]",2
30 | 145457,194372,NOT VERIFIABLE,NOT ENOUGH INFO,Happiness in Slavery is a gospel song by Nine Inch Nails.,"[[[227658, None, None, None]]]",2
31 | 145458,108281,VERIFIABLE,REFUTES,Andrew Kevin Walker is only Chinese.,"[[[127089, 141573, 'Andrew_Kevin_Walker', 0]]]",2
32 | 


--------------------------------------------------------------------------------
/lbt/datasets/toy-datasets/goemotions.csv:
--------------------------------------------------------------------------------
 1 | ,text,emotion_ids,comment_id,split
 2 | 48837,Is this in New Orleans?? I really feel like this is New Orleans.,27,edgurhb,1
 3 | 48838,"You know the answer man, you are programmed to capture those codes they send you, don’t avoid them!",4 27,ee84bjg,1
 4 | 48839,I've never been this sad in my life!,25,edcu99z,1
 5 | 48840,"The economy is heavily controlled and subsidized by the government. In any case, I was poking at the lack of nuance in US politics today",4 27,edc32e2,1
 6 | 48841,He could have easily taken a real camera from a legitimate source and change the price in Word/Photoshop and then print it out.,20,eepig6r,1
 7 | 48842,"Thank you for your vote of confidence, but we statistically can't get to 10 wins.",15,eczm50f,1
 8 | 48843,"Wah Mum other people call me on my bullshit and I can't ban them , Go out side son.",2,ed4yr9r,1
 9 | 48844,There it is!,27,ede4v0m,1
10 | 48845,At least now [NAME] has more time to gain his confidence,20,eekez9p,1
11 | 48846,Good. We don't want more thrash liberal offspring in this world.,10,ee0fxpu,1
12 | 0,My favourite food is anything I didn't have to cook myself.,27,eebbqej,0
13 | 1,"Now if he does off himself, everyone will think hes having a laugh screwing with people instead of actually dead",27,ed00q6i,0
14 | 2,WHY THE FUCK IS BAYLESS ISOING,2,eezlygj,0
15 | 3,To make her feel threatened,14,ed7ypvh,0
16 | 4,Dirty Southern Wankers,3,ed0bdzj,0
17 | 5,OmG pEyToN iSn'T gOoD eNoUgH tO hElP uS iN tHe PlAyOfFs! Dumbass Broncos fans circa December 2015.,26,edvnz26,0
18 | 6,Yes I heard abt the f bombs! That has to be why. Thanks for your reply:) until then hubby and I will anxiously wait 😝,15,ee3b6wu,0
19 | 7,We need more boards and to create a bit more space for [NAME]. Then we’ll be good.,8 20,ef4qmod,0
20 | 8,Damn youtube and outrage drama is super lucrative for reddit,0,ed8wbdn,0
21 | 9,It might be linked to the trust factor of your friend.,27,eczgv1o,0
22 | 43410,"I’m really sorry about your situation :( Although I love the names Sapphira, Cirilla, and Scarlett!",25,eecwqtt,2
23 | 43411,It's wonderful because it's awful. At not with.,0,ed5f85d,2
24 | 43412,"Kings fan here, good luck to you guys! Will be an interesting game to watch! ",13,een27c3,2
25 | 43413,"I didn't know that, thank you for teaching me something today!",15,eelgwd1,2
26 | 43414,They got bored from haunting earth for thousands of years and ultimately moved on to the afterlife.,27,eem5uti,2
27 | 43415,Thank you for asking questions and recognizing that there may be things that you don’t know or understand about police tactics. Seriously. Thank you.,15,ef2nq7i,2
28 | 43416,You’re welcome,15,efdbh17,2
29 | 43417,100%! Congrats on your job too!,15,ef0ec3b,2
30 | 43418,I’m sorry to hear that friend :(. It’s for the best most likely if she didn’t accept you for who you are,24,ee8utmi,2
31 | 43419,"Girlfriend weak as well, that jump was pathetic.",25,eeni74k,2
32 | 


--------------------------------------------------------------------------------
/lbt/datasets/toy-datasets/toy_agnews.csv:
--------------------------------------------------------------------------------
 1 | ,class_index,title,description,split
 2 | 80453,1,Guarding of Ukraine central election commission relieved,"KIEV, November 3 (Itar-Tass) - Water cannons and armoured personnel carriers were removed from the territory of the Ukrainian central election commission on Wednesday.",0
 3 | 59399,4,FDA Approves Use of Chip in Patients (AP),"AP - The Food and Drug Administration on Wednesday approved an implantable computer chip that can pass a patient's medical details to doctors, speeding care.",0
 4 | 97947,3,Chiefs agree on oil prices,THE Group of 20 finance chiefs agreed yesterday that  quot;abrupt changes quot; in foreign exchange rates and oil prices were unwelcome but showed no appetite for intervention to strengthen the US currency.,0
 5 | 53073,4,Love DRM or my family starves: why Steve Ballmer doesn #39;t Get It,"Last Sunday Microsoft CEO Steve Ballmer kicked off this week #39;s European tour by sitting down with a small group* of British journalists and dispensing pearls of wisdom, notably on the future of Apple in home networking (it has none, natch, says Steve).",0
 6 | 16329,4,Video Game Sales Seen Pausing After Record 2003,"Video game sales soared to a record \$18.2 billion last year, but the days of strong growth are on pause as players await a new generation of consoles.",0
 7 | 85504,3,Google Blazes Lonely IPO Trail,"Few experts who tried to predict how Google's much-antipated IPO would perform managed to get it right, and no firms have opted to follow its auction approach. By Joanna Glasner.",1
 8 | 86595,1,"Confusion Over Arafat, Palestinians Prepare Burial (Reuters)","Reuters - Palestinians looked to their leadership\to lift confusion over Yasser Arafat's fate in a French\hospital on Wednesday as preparations gathered pace to bury the\icon of their fight for statehood. Arafat, 75, suffered a brain\hemorrhage on Tuesday at the hospital where he was flown from\the West Bank on Oct. 29 and had lain in a coma. Officials\insisted in public that he was alive, though aides said\privately that he was dead.",0
 9 | 104152,4,InfoWorld Announces 2005 Technology of the Year Awards,Apple Xserve G5 won InfoWorld #146;s  #147;Best Server Hardware #148; award and Mac OS X v10.3 Panther won the  #147;Best Operating System #148; award. Jan 03,1
10 | 17522,4,Retailer to follow RFID test with full rollout,Germany's Metro Group tested radio tag inventory control for a year and found it good enough to deploy--but not perfect.,0
11 | 104930,3,Asian Stocks Rise for Fourth Day in US Trading; Sony Climbs,Asian stocks rose in US trading. Sony Corp. gained after the world #39;s No. 2 consumer-electronics maker agreed to resolve a dispute over patents related to digital cameras.,1
12 | 20654,4,Toxic waste ship sinks off Turkey,A ship containing toxic power station waste has sunk after being moored in a Turkish harbour for four years. The Ulla had sat in the port of Iskenderun in south-eastern Turkey amid confusion over the ship #39;s future.,1
13 | 66666,1,"Cabinet set to agree troop movement, but delay likely",The cabinet is expected to agree in principle today to a request from George Bush #39;s military commanders for British troops to be redeployed to the so-called  quot;triangle of death quot; near Baghdad.,1
14 | 58164,4,Virgin to Unveil Portable Music Player,"The consumer electronics arm of the Virgin Group is introducing a new 5-gigabyte hard-disk portable music player, bringing a powerful brand name in music to the increasingly crowded product space. &lt;FONT face=""verdana,MS Sans Serif,arial,helvetica"" size=""-2""\ color=""#666666""&gt;&lt;B&gt;-The Associated Press&lt;/B&gt;&lt;/FONT&gt;",1
15 | 41068,4,AT T Wireless unveils messaging-only handheld,"The company says the device, dubbed the Ogo, won't be bogged down by features that are ""hardly used.""",0
16 | 84690,4,Online spammer free on bond,"LEESBURG, Va. A North Carolina man convicted of illegally sending (m) millions of pieces of junk e-mail to America Online subscribers was granted bond today while he awaits sentencing.",0
17 | 79395,4,"Intel pushes Pentium 4 bus to 1,066-MHz for gamers","SAN JOSE, Calif. - Intel Corp. (Santa Clara, Calif.) on Monday (Nov. 1) rolled out a new Pentium 4 processor and chip set for PC gamers, based on a 1,066-MHz front-side bus technology.",0
18 | 12514,4,Intel #39;s  #39;BTX #39; Motherboard Debuts on Gateway #39;s PC,"Intel #39;s (Quote, Chart) next-generation motherboard design has made its debut in a new Gateway (Quote, Chart) desktop, the company said today.",0
19 | 47284,1,Bush and Kerry Follow Debate With Sharp Jabs,"President Bush attacked John Kerry on Iraq, taking a more aggressive approach than he had during the debate.",0
20 | 164,4,Insecurity: (Or Why Americans Aren't Feeling The Recovery),"The New Republic's website is currently carrying an interesting piece which tries to explain the anomaly that although the US economy is growing, a lot of its citizens are still feeling worse off. The article explains the results of a 40 year panel study which has shown that although mean incomes have increased, income variability has increased massively, causing many Americans to feel less well off, despite the growing economy.      ",0
21 | 96209,3,Stocks Fall on Greenspan Deficit Warning,Stocks fell modestly Friday as Federal Reserve Chairman Alan Greenspan sounded a warning over the nation #39;s spiraling trade deficit.,0
22 | 45653,1,House Defeats Gay Marriage Ban Amendment,"WASHINGTON - The Republican-controlled House emphatically defeated a constitutional amendment banning gay marriage Thursday, the latest in a string of conservative pet causes pushed to a vote by GOP leaders in the run-up to Election Day.    The vote was 227-186, far short of the two-thirds needed for approval on a measure that President Bush backed but the Senate had previously rejected...",0
23 | 94095,2,"Garcia, Jimenez Favorites at Home for Cup (AP)","AP - Chomping on a cigar, his unruly ponytail flowing from his cap, Miguel Angel Jimenez smiled as he said, ""Buenos dias"" to the clubhouse guard at the Real Club de Golf.",0
24 | 102756,3,FCC is watching SBC #39;s VoIP charge,WASHINGTON--The Federal Communications Commission is keeping and eye on SBC Communications #39; new connection charge for calls made over the Internet.,0
25 | 4200,4,Martian hill shows signs of ancient water,"LOS ANGELES - NASA #39;s Spirit rover has found more evidence of past water on the hills of Mars, while its twin, Opportunity, has observed a field of dunes inside a crater. ",0
26 | 88362,2,Baseball GMs Split on Instant Replay (AP),"AP - Upon further review, baseball will hold off on taking a look at instant replay. After watching umpires reverse almost every missed call in the postseason, major league general managers split 15-15 Thursday on whether to keep exploring the subject.",0
27 | 68015,3,Briefly: EU #39;s Mercosur talks extended,"The European Union and the Mercosur group of South American economies agreed Thursday to let negotiations on opening their markets extend into next year, missing a month-end deadline to build the world #39;s largest trade area.",0
28 | 97817,2,Malice at the Palace - the last thing the NBA needed for their &lt;b&gt;...&lt;/b&gt;,"Take your pick of culprits in the stunning melee Friday night at The Palace of Auburn Hills, in a heated game between Central Division rivals: the defending NBA champion Detroit Pistons and Indiana Pacers.",0
29 | 34312,2,Soccer Legend Maradona Arrives in Cuba (AP),AP - Former soccer great Diego Maradona returned to Cuba on Monday to resume treatment for cocaine addiction after a relapse confined him to a psychiatric hospital in his native Argentina and sparked unsuccessful attempts by his family to keep him at home.,0
30 | 55160,2,No. 24 LSU at No. 12 Florida,Coaches: Ron Zook is 19-11 in his third year at UF; Nick Saban is 42-15 in his fifth year at LSU and 85-41-1 in his 11th year overall.,0
31 | 35819,4,"Hello, halitosis","ZDNet #39;s survey of IT professionals in August kept Wired  amp; Wireless on top for the 18th month in a row. Siemens Mobile is developing the first mobile phone that will alert people when their breath stinks, the company said Tuesday.",0
32 | 93263,4,"More Than 15,000 Species Said to Be Facing Extinction (Reuters)","Reuters - More than 15,000 species, from sharks\to frogs to fir trees, are facing extinction and the total is\rising faster than ever before, conservationists and scientists\said Wednesday.",0
33 | 17850,4,Archaeologists Discover Tomb Near Egypt's Pyramids (Reuters),"Reuters - Archaeologists have found a\2,500-year-old tomb near Egypt's ancient pyramids in Giza, the\head of the excavation team told Reuters on Thursday.",0
34 | 37562,4,Sony to support MP3 in future players,Sony #39;s apparently gotten the message: Some of its new MP3 player models will feature direct support for MP3 in addition to its proprietary ATRAC format.,0
35 | 70546,3,US Airways #39; pilots vote for 18 salary reduction,US Airways #39; pilots voted to approve a new labor agreement yesterday that will reduce their salaries by 18 percent but will save the airline \$300 million a year.,0
36 | 29634,4,Triumphant return of the big &lt;cite&gt;Reg&lt;/cite&gt; logo t-shirt,"&lt;strong&gt;Cash'n'Carrion&lt;/strong&gt; Cue trumpets, etc",0
37 | 9206,3,HHG buoyed by return to profits,British insurer and fund manager HHG was back in the black at the half-year stage today as it recovered from losses of 902m (1.,0
38 | 125556,4,Outsourcing to Arkansas,A new kid on the block promises to give offshore outsourcing a run for its money--by routing technology work to rural America. Outsourcing Blog,2
39 | 122189,4,Noah's Ark Quest Dead in Water -- Was It a Stunt?,"In April a Christian activist announced a summer 2004 expedition to search for Noah's ark. The quest didn't happen, and now critics are questioning the project's credibility.",2
40 | 


--------------------------------------------------------------------------------
/lbt/datasets/toy_datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pdb
 3 | import pandas as pd
 4 | from lbt.datasets import register_dataset
 5 | from lbt.datasets.base_dataset import LBTDataset
 6 | 
 7 | 
 8 | @register_dataset("toy_agnews")
 9 | class ToyAGNews(LBTDataset):
10 |     def __init__(
11 |         self,
12 |         dataset_name="toy_agnews",
13 |         processed_file_name="toy_agnews.csv",
14 |         cache_dir=os.path.join(os.getcwd(), "lbt/datasets/toy-datasets"),
15 |     ):
16 |         super().__init__(
17 |             dataset_name=dataset_name,
18 |             processed_file_name=processed_file_name,
19 |             cache_dir=os.path.join(os.getcwd(), "lbt/datasets/toy-datasets"),
20 |         )
21 | 
22 |     def download(self) -> None:
23 |         pass
24 | 
25 |     def process(self) -> None:
26 |         pass
27 | 
28 |     def load(self) -> pd.DataFrame:
29 |         toy_agnews_ds = pd.read_csv(
30 |             os.path.join(self.cache_dir, self.config["csv_filename"])
31 |         )
32 |         return toy_agnews_ds
33 | 
34 |     @property
35 |     def processed_dataset_path(self):
36 |         return self.cache_dir
37 | 


--------------------------------------------------------------------------------
/lbt/experiments.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import datetime
  3 | import logging
  4 | import os
  5 | import pickle
  6 | import socket
  7 | from typing import Union
  8 | from collections import defaultdict
  9 | 
 10 | import numpy as np
 11 | import ray
 12 | 
 13 | import globals
 14 | from .build_def_files import *
 15 | from database import save_results_to_es
 16 | from ludwig.hyperopt.run import hyperopt
 17 | from lbt.utils.experiment_utils import *
 18 | from lbt.datasets import DATASET_REGISTRY
 19 | 
 20 | hostname = socket.gethostbyname(socket.gethostname())
 21 | 
 22 | 
 23 | def download_data(cache_dir=None, datasets: list = None):
 24 |     """ Returns files paths for all datasets """
 25 |     data_file_paths = {}
 26 |     for dataset in datasets:
 27 |         # if dataset in dataset_metadata.keys():
 28 |         if dataset in list(DATASET_REGISTRY.keys()):
 29 |             data_class = dataset_metadata[dataset]["data_class"]
 30 |             data_path = download_dataset(data_class, cache_dir)
 31 |             process_dataset(data_path)
 32 |             data_file_paths[dataset] = data_path
 33 |         else:
 34 |             raise ValueError(
 35 |                 f"{dataset} is not a valid dataset."
 36 |                 "for list of valid dataets see: "
 37 |                 "python experiment_driver.py -h"
 38 |             )
 39 |     return data_file_paths
 40 | 
 41 | 
 42 | def resume_training(model_config: dict, output_dir):
 43 |     results, metrics, params = collect_completed_trial_results(output_dir)
 44 |     original_num_samples = model_config["hyperopt"]["sampler"]["num_samples"]
 45 |     new_num_samples = max(original_num_samples - len(metrics), 0)
 46 |     model_config["hyperopt"]["sampler"]["search_alg"][
 47 |         "points_to_evaluate"
 48 |     ] = params
 49 |     model_config["hyperopt"]["sampler"]["search_alg"][
 50 |         "evaluated_rewards"
 51 |     ] = metrics
 52 |     model_config["hyperopt"]["sampler"]["num_samples"] = new_num_samples
 53 |     return model_config, results
 54 | 
 55 | 
 56 | def run_hyperopt_exp(
 57 |     experiment_attr: dict,
 58 |     is_resume_training: bool = False,
 59 |     runtime_env: str = "local",
 60 | ) -> int:
 61 | 
 62 |     dataset = experiment_attr["dataset"]
 63 |     encoder = experiment_attr["encoder"]
 64 |     model_config = experiment_attr["model_config"]
 65 | 
 66 |     # the following are temp solutions for issues in Ray
 67 |     if runtime_env == "local":
 68 |         # temp solution to ray problems
 69 |         os.environ["TUNE_PLACEMENT_GROUP_AUTO_DISABLED"] = "1"
 70 |     os.environ["TUNE_PLACEMENT_GROUP_CLEANUP_DISABLED"] = "1"
 71 | 
 72 |     try:
 73 |         start = datetime.datetime.now()
 74 | 
 75 |         tune_executor = model_config["hyperopt"]["executor"]["type"]
 76 | 
 77 |         num_gpus = 0
 78 |         try:
 79 |             num_gpus = model_config["hyperopt"]["executor"][
 80 |                 "gpu_resources_per_trial"
 81 |             ]
 82 |         except:
 83 |             pass
 84 | 
 85 |         if tune_executor == "ray" and runtime_env == "gcp":
 86 | 
 87 |             if (
 88 |                 "kubernetes_namespace"
 89 |                 not in model_config["hyperopt"]["executor"].keys()
 90 |             ):
 91 |                 raise ValueError(
 92 |                     "Please specify the kubernetes namespace of the Ray cluster"
 93 |                 )
 94 | 
 95 |         if tune_executor == "ray" and runtime_env == "local":
 96 |             if (
 97 |                 "kubernetes_namespace"
 98 |                 in model_config["hyperopt"]["executor"].keys()
 99 |             ):
100 |                 raise ValueError(
101 |                     "You are running locally. "
102 |                     "Please remove the kubernetes_namespace param in hyperopt_config.yaml"
103 |                 )
104 | 
105 |         gpu_list = None
106 |         if tune_executor != "ray":
107 |             gpu_list = get_gpu_list()
108 |             if len(gpu_list) > 0:
109 |                 num_gpus = 1
110 | 
111 |         new_model_config = copy.deepcopy(experiment_attr["model_config"])
112 |         existing_results = None
113 |         if is_resume_training:
114 |             new_model_config, existing_results = resume_training(
115 |                 new_model_config, experiment_attr["output_dir"]
116 |             )
117 | 
118 |         hyperopt_results = hyperopt(
119 |             new_model_config,
120 |             dataset=experiment_attr["dataset_path"],
121 |             model_name=experiment_attr["model_name"],
122 |             gpus=gpu_list,
123 |             output_directory=experiment_attr["output_dir"],
124 |         )
125 | 
126 |         if existing_results is not None:
127 |             hyperopt_results.extend(existing_results)
128 |             hyperopt_results.sort(key=lambda result: result["metric_score"])
129 | 
130 |         logging.info(
131 |             "time to complete: {}".format(datetime.datetime.now() - start)
132 |         )
133 | 
134 |         # Save output locally
135 |         try:
136 |             pickle.dump(
137 |                 hyperopt_results,
138 |                 open(
139 |                     os.path.join(
140 |                         experiment_attr["output_dir"],
141 |                         f"{dataset}_{encoder}_hyperopt_results.pkl",
142 |                     ),
143 |                     "wb",
144 |                 ),
145 |             )
146 |         except:
147 |             pass
148 | 
149 |         # save lbt output w/additional metrics computed locall
150 |         results_w_additional_metrics = compute_additional_metadata(
151 |             experiment_attr, hyperopt_results, tune_executor
152 |         )
153 |         try:
154 |             pickle.dump(
155 |                 results_w_additional_metrics,
156 |                 open(
157 |                     os.path.join(
158 |                         experiment_attr["output_dir"],
159 |                         f"{dataset}_{encoder}_hyperopt_results_w_lbt_metrics.pkl",
160 |                     ),
161 |                     "wb",
162 |                 ),
163 |             )
164 |         except:
165 |             pass
166 | 
167 |         # create .completed file to indicate that experiment is completed
168 |         _ = open(
169 |             os.path.join(experiment_attr["output_dir"], ".completed"), "wb"
170 |         )
171 | 
172 |         logging.info(
173 |             "time to complete: {}".format(datetime.datetime.now() - start)
174 |         )
175 | 
176 |         # save output to db
177 |         if experiment_attr["elastic_config"]:
178 |             try:
179 |                 save_results_to_es(
180 |                     experiment_attr,
181 |                     hyperopt_results,
182 |                     tune_executor=tune_executor,
183 |                     top_n_trials=experiment_attr["top_n_trials"],
184 |                     runtime_env="local",
185 |                     num_gpus=num_gpus,
186 |                 )
187 |             except:
188 |                 logging.warning("Not all files were uploaded to elastic db!")
189 |         return 1
190 |     except:
191 |         logging.warning("Error running experiment...not completed")
192 |         return 0
193 | 
194 | 
195 | def run_experiments(
196 |     data_file_paths: dict,
197 |     config_files: dict,
198 |     top_n_trials: int,
199 |     elastic_config=None,
200 |     run_environment: str = "local",
201 |     resume_existing_exp: bool = False,
202 | ):
203 |     logging.info("Running hyperopt experiments...")
204 |     # check if overall experiment has already been run
205 |     if os.path.exists(
206 |         os.path.join(globals.EXPERIMENT_OUTPUT_DIR, ".completed")
207 |     ):
208 |         logging.info("Experiment is already completed!")
209 |         return
210 | 
211 |     completed_runs, experiment_queue = [], []
212 |     for dataset_name, file_path in data_file_paths.items():
213 |         logging.info("Dataset: {}".format(dataset_name))
214 | 
215 |         for model_config_path in config_files[dataset_name]:
216 |             config_name = model_config_path.split("/")[-1].split(".")[0]
217 |             dataset = config_name.split("_")[1]
218 |             encoder = "_".join(config_name.split("_")[2:])
219 |             experiment_name = dataset + "_" + encoder
220 | 
221 |             logging.info("Experiment: {}".format(experiment_name))
222 | 
223 |             output_dir = os.path.join(
224 |                 globals.EXPERIMENT_OUTPUT_DIR, experiment_name
225 |             )
226 | 
227 |             if not os.path.isdir(output_dir):
228 |                 os.mkdir(output_dir)
229 | 
230 |             output_dir = os.path.join(
231 |                 globals.EXPERIMENT_OUTPUT_DIR, experiment_name
232 |             )
233 | 
234 |             if not os.path.exists(os.path.join(output_dir, ".completed")):
235 | 
236 |                 model_config = load_yaml(model_config_path)
237 |                 experiment_attr = defaultdict()
238 |                 experiment_attr = {
239 |                     "model_config": copy.deepcopy(model_config),
240 |                     "dataset_path": file_path,
241 |                     "top_n_trials": top_n_trials,
242 |                     "model_name": config_name,
243 |                     "output_dir": output_dir,
244 |                     "encoder": encoder,
245 |                     "dataset": dataset,
246 |                     "elastic_config": elastic_config,
247 |                 }
248 |                 if run_environment == "local":
249 |                     completed_runs.append(
250 |                         run_hyperopt_exp(
251 |                             experiment_attr,
252 |                             resume_existing_exp,
253 |                             run_environment,
254 |                         )
255 |                     )
256 | 
257 |                 experiment_queue.append(experiment_attr)
258 |             else:
259 |                 logging.info(
260 |                     f"The {dataset} x {encoder} exp. has already completed!"
261 |                 )
262 | 
263 |     if run_environment != "local":
264 |         completed_runs = ray.get(
265 |             [
266 |                 ray.remote(num_cpus=0, resources={f"node:{hostname}": 0.001})(
267 |                     run_hyperopt_exp
268 |                 ).remote(exp, resume_existing_exp, run_environment)
269 |                 for exp in experiment_queue
270 |             ]
271 |         )
272 | 
273 |     if len(completed_runs) == len(experiment_queue):
274 |         # create .completed file to indicate that entire hyperopt experiment
275 |         # is completed
276 |         _ = open(
277 |             os.path.join(globals.EXPERIMENT_OUTPUT_DIR, ".completed"), "wb"
278 |         )
279 |     else:
280 |         logging.warning("Not all experiments completed!")
281 | 
282 | 
283 | def reproduce_experiment(
284 |     model,
285 |     dataset,
286 |     data_file_paths,
287 |     elastic_config=None,
288 |     experiment_to_replicate=None,
289 |     run_environment: str = "local",
290 | ):
291 |     experiment_config = load_yaml(experiment_to_replicate)
292 |     experiment_name = dataset + "_" + model
293 |     for dataset_name, file_path in data_file_paths.items():
294 | 
295 |         output_dir = os.path.join(
296 |             globals.EXPERIMENT_OUTPUT_DIR, experiment_name
297 |         )
298 | 
299 |         if not os.path.isdir(output_dir):
300 |             os.mkdir(output_dir)
301 | 
302 |         output_dir = os.path.join(
303 |             globals.EXPERIMENT_OUTPUT_DIR, experiment_name
304 |         )
305 | 
306 |         experiment_attr = defaultdict()
307 |         experiment_attr = {
308 |             "model_config": experiment_config,
309 |             "dataset_path": file_path,
310 |             "model_name": model,
311 |             "output_dir": output_dir,
312 |             "encoder": model,
313 |             "dataset": dataset,
314 |             "elastic_config": elastic_config,
315 |         }
316 |         run_hyperopt_exp(
317 |             experiment_attr,
318 |             False,
319 |             run_environment,
320 |         )
321 | 
322 | 
323 | def experiment(
324 |     models: Union[str, list],
325 |     datasets: Union[str, list],
326 |     experiment_configs_dir: str = globals.EXPERIMENT_CONFIGS_DIR,
327 |     experiment_output_dir: str = globals.EXPERIMENT_OUTPUT_DIR,
328 |     datasets_cache_dir: str = globals.DATASET_CACHE_DIR,
329 |     run_environment: str = "local",
330 |     elastic_search_config: str = None,
331 |     resume_existing_exp: bool = False,
332 | ):
333 |     if isinstance(datasets, str):
334 |         datasets = [datasets]
335 |     data_file_paths = download_data(datasets_cache_dir, datasets)
336 | 
337 |     config_files = build_config_files()
338 |     elastic_config = None
339 |     if elastic_search_config is not None:
340 |         elastic_config = load_yaml(elastic_search_config)
341 | 
342 |     if run_environment == "gcp":
343 |         ray.init(address="auto")
344 | 
345 |     run_experiments(
346 |         data_file_paths,
347 |         config_files,
348 |         top_n_trials=None,
349 |         elastic_config=elastic_config,
350 |         run_environment=run_environment,
351 |         resume_existing_exp=resume_existing_exp,
352 |     )
353 | 


--------------------------------------------------------------------------------
/lbt/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | from lbt.metrics.base_metric import LBTMetric
 2 | import ray
 3 | import importlib
 4 | import sys
 5 | import json
 6 | import os
 7 | 
 8 | LOCATION = os.path.abspath(os.path.dirname(__file__))
 9 | INSTANCE_PRICES_FILEPATH = os.path.join(LOCATION, "instance_prices.json")
10 | METRIC_REGISTERY = {}
11 | INSTANCE_PRICES = {}
12 | 
13 | 
14 | def register_metric(name):
15 |     """
16 |     New dataset types can be added to LBT with the `register_metric`
17 |     function decorator.
18 |     :
19 |         @register_metric('personal_metric')
20 |         class PersonalMetric():
21 |             (...)
22 |     Args:
23 |         name (str): the name of the dataset
24 |     """
25 | 
26 |     def register_metric_cls(cls):
27 |         if not issubclass(cls, LBTMetric):
28 |             raise ValueError(
29 |                 "Metric ({}: {}) must extend lbt.metrics.base_metric".format(
30 |                     name, cls.__name__
31 |                 )
32 |             )
33 |         METRIC_REGISTERY[name] = cls
34 |         return cls
35 | 
36 |     return register_metric_cls
37 | 
38 | 
39 | def get_experiment_metadata(
40 |     document: dict,
41 |     model_path: str,
42 |     data_path: str,
43 |     run_stats: dict,
44 |     train_batch_size: int = 16,
45 |     num_gpus=0,
46 | ):
47 |     for key, metrics_class in METRIC_REGISTERY.items():
48 |         try:
49 |             remote_class = ray.remote(num_cpus=1, num_gpus=num_gpus)(
50 |                 metrics_class
51 |             ).remote()
52 |             output = remote_class.run.remote(
53 |                 model_path=model_path,
54 |                 dataset_path=data_path,
55 |                 train_batch_size=train_batch_size,
56 |                 run_stats=run_stats,
57 |             )
58 |             document.update({key: ray.get(output)})
59 |         except:
60 |             print(f"FAILURE PROCESSING: {key}")
61 | 
62 | 
63 | INSTANCE_PRICES = json.load(open(INSTANCE_PRICES_FILEPATH, "rb"))
64 | 
65 | PRE_BUILT_METRICS = {
66 |     "lbt_metrics": "lbt.metrics.lbt_metrics",
67 | }
68 | 
69 | for name, module in PRE_BUILT_METRICS.items():
70 |     if module not in sys.modules:
71 |         importlib.import_module("lbt.metrics.lbt_metrics")
72 | 


--------------------------------------------------------------------------------
/lbt/metrics/base_metric.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from abc import ABC, ABCMeta, abstractmethod
 3 | from typing import Tuple, Union
 4 | 
 5 | import pandas as pd
 6 | from ludwig.api import LudwigModel
 7 | 
 8 | 
 9 | class LBTMetric(ABC):
10 |     def __init__(self):
11 |         super().__init__()
12 | 
13 |     @classmethod
14 |     def run(cls, model_path, dataset_path, train_batch_size, run_stats):
15 |         pass
16 | 
17 |     def load_model(self, model_path: str) -> LudwigModel:
18 |         return LudwigModel.load(model_path)
19 | 
20 |     def evaluate(
21 |         self,
22 |         model: LudwigModel,
23 |         dataset: Union[str, dict, pd.DataFrame] = None,
24 |         **kwargs
25 |     ) -> Tuple[dict, Union[dict, pd.DataFrame], str]:
26 |         return model.evaluate(dataset, **kwargs)
27 | 
28 |     def predict(
29 |         self,
30 |         model: LudwigModel,
31 |         dataset: Union[str, dict, pd.DataFrame] = None,
32 |         **kwargs
33 |     ) -> Tuple[Union[dict, pd.DataFrame], str]:
34 |         return model.predict(dataset, **kwargs)
35 | 


--------------------------------------------------------------------------------
/lbt/metrics/instance_prices.json:
--------------------------------------------------------------------------------
1 | {
2 |     "A100": 2.93,
3 |     "Tesla T4": 0.35,
4 |     "Tesla P4": 0.60,
5 |     "Tesla V100": 2.48,
6 |     "Tesla P100": 1.46,
7 |     "Tesla K80": 0.45
8 | }


--------------------------------------------------------------------------------
/lbt/metrics/lbt_metrics.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | import shutil
  4 | import tempfile
  5 | 
  6 | import GPUtil
  7 | import ludwig
  8 | import numpy as np
  9 | import pandas as pd
 10 | import psutil
 11 | import ray
 12 | from experiment_impact_tracker.compute_tracker import ImpactTracker
 13 | from experiment_impact_tracker.data_interface import DataInterface
 14 | from globals import ENERGY_LOGGING_DIR
 15 | from lbt.metrics import register_metric
 16 | from lbt.metrics import INSTANCE_PRICES
 17 | from lbt.metrics.base_metric import LBTMetric
 18 | from lbt.metrics.utils import scale_bytes
 19 | from ludwig.api import LudwigModel
 20 | from ludwig.collect import collect_weights
 21 | 
 22 | 
 23 | @register_metric("ludwig_version")
 24 | class LudwigVersion(LBTMetric):
 25 |     def __init__(self):
 26 |         pass
 27 | 
 28 |     def run(cls, **kwargs):
 29 |         return ludwig.__version__
 30 | 
 31 | 
 32 | @register_metric("hardware_metadata")
 33 | class HardwareMetadata(LBTMetric):
 34 |     num_gpus = 0
 35 | 
 36 |     def run(cls, **kwargs):
 37 |         machine_info = {}
 38 |         # GPU
 39 |         gpus = GPUtil.getGPUs()
 40 |         if len(gpus) != 0:
 41 |             machine_info["total_gpus"] = len(gpus)
 42 |             gpu_type = {}
 43 |             for gpu_id, gpu in enumerate(gpus):
 44 |                 gpu_type[gpu_id] = gpu.name
 45 |             machine_info["gpu_info"] = gpu_type
 46 |         else:
 47 |             machine_info["total_gpus"] = 0
 48 |         # CPU
 49 |         total_cores = psutil.cpu_count(logical=True)
 50 |         machine_info["total_cores"] = total_cores
 51 |         # RAM
 52 |         svmem = psutil.virtual_memory()
 53 |         total_RAM = scale_bytes(svmem.total)
 54 |         machine_info["RAM"] = total_RAM
 55 |         return machine_info
 56 | 
 57 | 
 58 | @register_metric("inference_latency")
 59 | class InferenceLatencyMetric(LBTMetric):
 60 |     num_samples = 25
 61 |     num_gpus = 0
 62 | 
 63 |     def run(cls, model_path, dataset_path, **kwargs):
 64 |         """
 65 |         Returns avg. time to perform inference on 1 sample
 66 | 
 67 |         # Inputs
 68 |         :param model_path: (str) filepath to pre-trained model (directory that
 69 |             contains the model_hyperparameters.json).
 70 |         :param dataset_path: (str) filepath to dataset
 71 |         :param dataset_path: (int) number of dev samples to randomly sample
 72 | 
 73 |         # Return
 74 |         :return: (str) avg. time per inference step
 75 |         """
 76 |         # Create smaller datasets w/10 samples from original dev set
 77 |         full_dataset = pd.read_csv(dataset_path)
 78 |         # Note: split == 1 indicates the dev set
 79 |         if "split" in full_dataset:
 80 |             if len(full_dataset[full_dataset["split"] == 1]) > 0:
 81 |                 sampled_dataset = full_dataset[
 82 |                     full_dataset["split"] == 1
 83 |                 ].sample(n=cls.num_samples)
 84 |             elif len(full_dataset[full_dataset["split"] == 2]) > 0:
 85 |                 sampled_dataset = full_dataset[
 86 |                     full_dataset["split"] == 2
 87 |                 ].sample(n=cls.num_samples)
 88 |             else:
 89 |                 sampled_dataset = full_dataset[
 90 |                     full_dataset["split"] == 0
 91 |                 ].sample(n=cls.num_samples)
 92 |         else:
 93 |             sampled_dataset = full_dataset.sample(n=cls.num_samples)
 94 |         ludwig_model = LudwigModel.load(model_path)
 95 |         start = datetime.datetime.now()
 96 |         _, _ = ludwig_model.predict(
 97 |             dataset=sampled_dataset,
 98 |             batch_size=1,
 99 |         )
100 |         total_time = datetime.datetime.now() - start
101 |         avg_time_per_sample = total_time / cls.num_samples
102 |         formatted_time = "{:0>8}".format(str(avg_time_per_sample))
103 |         return formatted_time
104 | 
105 | 
106 | @register_metric("training_cost")
107 | class TrainingCost(LBTMetric):
108 |     default_gpu_cost_per_hr = 0.35  # GCP cost for Tesla T4
109 | 
110 |     def run(cls, run_stats: dict, **kwargs) -> float:
111 |         """
112 |         Return total cost to train model using GCP compute resource
113 |         """
114 |         get_GPUS = GPUtil.getGPUs()
115 |         instance_cost = None
116 |         if len(get_GPUS) > 0:
117 |             gpu_type = get_GPUS[0].name
118 |             if gpu_type in INSTANCE_PRICES.keys():
119 |                 instance_cost = INSTANCE_PRICES[gpu_type]
120 |         if instance_cost is None:
121 |             instance_cost = cls.default_gpu_cost_per_hr
122 | 
123 |         total_time_s = int(run_stats["hyperopt_results"]["time_total_s"])
124 |         total_time_hr = total_time_s / 3600
125 |         return float(total_time_hr * instance_cost)
126 | 
127 | 
128 | @register_metric("training_speed")
129 | class TrainingSpeed(LBTMetric):
130 |     num_gpus = 0
131 | 
132 |     def run(
133 |         cls,
134 |         dataset_path: str,
135 |         train_batch_size: int,
136 |         run_stats: dict,
137 |         **kwargs,
138 |     ) -> str:
139 |         """
140 |         Returns avg. time per training step
141 | 
142 |         # Inputs
143 |         :param model_path: (str) filepath to pre-trained model (directory that
144 |             contains the model_hyperparameters.json).
145 |         :param dataset_path: (str) filepath to dataset
146 | 
147 |         # Return
148 |         :return: (str) avg. time per training step
149 |         """
150 | 
151 |         train_split_size = 0.7
152 |         full_dataset = pd.read_csv(dataset_path)
153 |         if "split" in full_dataset:
154 |             total_samples = len(full_dataset[full_dataset["split"] == 0])
155 |         else:
156 |             total_samples = int(train_split_size * len(full_dataset))
157 |         total_training_steps = int(total_samples / train_batch_size)
158 |         time_per_batch = (
159 |             int(run_stats["hyperopt_results"]["time_this_iter_s"])
160 |             / total_training_steps
161 |         )
162 |         formatted_time = "{:0>8}".format(
163 |             str(datetime.timedelta(seconds=time_per_batch))
164 |         )
165 |         return formatted_time
166 | 
167 | 
168 | @register_metric("model_size")
169 | class ModelSize(LBTMetric):
170 |     num_gpus = 0
171 | 
172 |     def run(cls, model_path: str, **kwargs):
173 |         """
174 |         Computes minimum bytes required to store model to memory
175 | 
176 |         # Inputs
177 |         :param model_path: (str) filepath to pre-trained model.
178 | 
179 |         # Return
180 |         :return: (int) total bytes
181 |         :return: (str) total bytes scaled in string format
182 |         """
183 |         tensor_filepaths = collect_weights(
184 |             model_path=model_path,
185 |             tensors=None,
186 |             output_directory=".model_tensors",
187 |         )
188 |         total_size = 0
189 |         for fp in tensor_filepaths:
190 |             weight_tensor = np.load(fp)
191 |             total_size += weight_tensor.size
192 |         total_bytes = total_size * 32
193 |         scaled_bytes = scale_bytes(total_bytes)
194 |         model_size = {"total_bytes": total_bytes, "scaled_bytes": scaled_bytes}
195 |         return model_size
196 | 
197 | 
198 | @register_metric("carbon_footprint")
199 | class Energy(LBTMetric):
200 |     num_gpus = 0
201 | 
202 |     def run(cls, model_path: str, dataset_path, train_batch_size, run_stats):
203 |         """
204 |         Computes energy metrics for one training epoch
205 | 
206 |         # Inputs
207 |         :param model_path: (str) filepath to pre-trained model.
208 | 
209 |         # Return
210 |         :return: (int) total bytes
211 |         :return: (str) total bytes scaled in string format
212 |         """
213 |         # First copy model_path to temp directory
214 |         logging_path = os.path.join(
215 |             ENERGY_LOGGING_DIR, run_stats["hyperopt_results"]["experiment_id"]
216 |         )
217 |         tempdir = os.path.join(logging_path, "temp_model")
218 |         shutil.copytree(model_path, tempdir)
219 |         model = LudwigModel.load(tempdir)
220 | 
221 |         with ImpactTracker(logging_path):
222 |             model.train_online(dataset=dataset_path)
223 | 
224 |         data_interface = DataInterface([logging_path])
225 |         carbon_output = {
226 |             "kg_carbon": data_interface.kg_carbon,
227 |             "total_power": data_interface.total_power,
228 |             "PUE": data_interface.PUE,
229 |             "duration_of_train_step": data_interface.exp_len_hours,
230 |         }
231 | 
232 |         shutil.rmtree(tempdir)
233 | 
234 |         return carbon_output
235 | 


--------------------------------------------------------------------------------
/lbt/metrics/utils.py:
--------------------------------------------------------------------------------
1 | def scale_bytes(bytes: int, suffix: str = "B") -> str:
2 |     factor = 1024
3 |     for unit in ["", "K", "M", "G", "T", "P"]:
4 |         if bytes < factor:
5 |             return f"{bytes:.2f}{unit}{suffix}"
6 |         bytes /= factor
7 | 


--------------------------------------------------------------------------------
/lbt/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/tools/__init__.py


--------------------------------------------------------------------------------
/lbt/tools/robustnessgym/__init__.py:
--------------------------------------------------------------------------------
 1 | RGSUBPOPULATION_REGISTRY = {}
 2 | 
 3 | import importlib
 4 | import sys
 5 | import inspect
 6 | 
 7 | from .base_subpopulation import BaseSubpopulation
 8 | from .robustnessgym import RG
 9 | from robustnessgym.slicebuilders.subpopulation import Subpopulation
10 | 
11 | # from lbt.tools.robustnessgym imort RG
12 | 
13 | 
14 | def register_lbtsubpop(name):
15 |     def register_subpop_cls(cls):
16 |         if not issubclass(cls, BaseSubpopulation):
17 |             raise ValueError(
18 |                 "Metric ({}: {}) must extend lbt.tools.robustnessgym.base_subpopulation".format(
19 |                     name, cls.__name__
20 |                 )
21 |             )
22 |         RGSUBPOPULATION_REGISTRY[name] = cls
23 |         return cls
24 | 
25 |     return register_subpop_cls
26 | 
27 | 
28 | LBT_SUBPOPULATIONS = {
29 |     "lbt_subpops": "lbt.tools.robustnessgym.lbt_subpopulations",
30 | }
31 | 
32 | RG_SUBPOPULATIONS = {
33 |     "hans": "robustnessgym.slicebuilders.subpopulations.hans",
34 |     "phrase": "robustnessgym.slicebuilders.subpopulations.phrase",
35 | }
36 | 
37 | for name, module_name in LBT_SUBPOPULATIONS.items():
38 |     if module_name not in sys.modules:
39 |         importlib.import_module(module_name)
40 | 
41 | for name, module_name in RG_SUBPOPULATIONS.items():
42 |     for name, obj in inspect.getmembers(sys.modules[module_name]):
43 |         if inspect.isclass(obj):
44 |             if issubclass(obj, Subpopulation):
45 |                 RGSUBPOPULATION_REGISTRY[name] = obj
46 | 


--------------------------------------------------------------------------------
/lbt/tools/robustnessgym/base_subpopulation.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from abc import ABC
 3 | import pandas as pd
 4 | 
 5 | 
 6 | class BaseSubpopulation(ABC):
 7 |     def __init__(self, name):
 8 |         self.name = name
 9 | 
10 |     @abc.abstractmethod
11 |     def score_fn(self):
12 |         """ scores a sample based on subpopulation the sample is a part of """
13 |         raise NotImplementedError()
14 | 
15 |     @abc.abstractmethod
16 |     def get_subpops(self):
17 |         raise NotImplementedError()
18 | 
19 |     @property
20 |     def slice_name(self):
21 |         return self.name


--------------------------------------------------------------------------------
/lbt/tools/robustnessgym/lbt_subpopulations.py:
--------------------------------------------------------------------------------
  1 | from lbt.tools.robustnessgym.base_subpopulation import BaseSubpopulation
  2 | from lbt.tools.robustnessgym import register_lbtsubpop
  3 | from robustnessgym import (
  4 |     LengthSubpopulation,
  5 |     HasPhrase,
  6 |     HasAnyPhrase,
  7 | )
  8 | 
  9 | import requests
 10 | 
 11 | from robustnessgym import Spacy
 12 | from robustnessgym import ScoreSubpopulation, Identifier
 13 | import pandas as pd
 14 | import itertools
 15 | from functools import partial
 16 | 
 17 | # TODO: ASN -->  Identity Phrases, Emoji,
 18 | 
 19 | 
 20 | @register_lbtsubpop("entities")
 21 | class EntitySubpopulation(BaseSubpopulation):
 22 |     def __init__(self):
 23 |         self.name = "entities"
 24 |         self.entity_types = [
 25 |             "PERSON",
 26 |             "NORP",
 27 |             "FAC",
 28 |             "ORG",
 29 |             "GPE",
 30 |             "LOC",
 31 |             "PRODUCT",
 32 |             "EVENT",
 33 |             "WORK_OF_ART",
 34 |             "LAW",
 35 |             "LANGUAGE",
 36 |             "DATE",
 37 |             "TIME",
 38 |             "PERCENT",
 39 |             "MONEY",
 40 |             "QUANTITY",
 41 |             "ORDINAL",
 42 |             "CARDINAL",
 43 |         ]
 44 | 
 45 |     def score_fn(self, batch, columns, entity, spacy):
 46 |         try:
 47 |             entites_list = Spacy.retrieve(
 48 |                 batch, columns, proc_fns=Spacy.entities
 49 |             )
 50 |         except ValueError:
 51 |             spacy_op = spacy(batch, columns)
 52 |             entites_list = Spacy.retrieve(
 53 |                 spacy_op, columns, proc_fns=Spacy.entities
 54 |             )
 55 |         overall_batch_score = []
 56 |         for entities in entites_list:
 57 |             ents = set(entity["label"] for entity in entities)
 58 |             if entity in ents:
 59 |                 overall_batch_score.append(1)
 60 |             else:
 61 |                 overall_batch_score.append(0)
 62 |         return overall_batch_score
 63 | 
 64 |     def get_subpops(self, spacy):
 65 |         EntitiesSubpopulation = lambda entity, score_fn: ScoreSubpopulation(
 66 |             identifiers=[Identifier(f"{entity}")],
 67 |             intervals=[(1, 1)],
 68 |             score_fn=score_fn,
 69 |         )
 70 | 
 71 |         entity_subpops = []
 72 |         for entity in self.entity_types:
 73 |             entity_subpops.append(
 74 |                 EntitiesSubpopulation(
 75 |                     entity, partial(self.score_fn, entity=entity, spacy=spacy)
 76 |                 )
 77 |             )
 78 |         return entity_subpops
 79 | 
 80 | 
 81 | @register_lbtsubpop("pos")
 82 | class POSSubpopulation(BaseSubpopulation):
 83 |     def __init__(self):
 84 |         self.name = "POS"
 85 |         self.universalpos = [
 86 |             "ADJ",
 87 |             "ADP",
 88 |             "ADV",
 89 |             "AUX",
 90 |             "CONJ",
 91 |             "CCONJ",
 92 |             "DET",
 93 |             "INTJ",
 94 |             "NOUN",
 95 |             "NUM",
 96 |             "PART",
 97 |             "PRON",
 98 |             "PROPN",
 99 |             "PUNCT",
100 |             "SCONJ",
101 |             "SYM",
102 |             "VERB",
103 |             "X",
104 |             "EOL",
105 |             "SPACE",
106 |         ]
107 | 
108 |     def score_fn(self, batch, columns, pos, spacy):
109 |         try:
110 |             spacy_annotations = Spacy.retrieve(batch, columns)
111 |         except ValueError:
112 |             spacy_op = spacy(batch, columns)
113 |             spacy_annotations = Spacy.retrieve(spacy_op, columns)
114 | 
115 |         overall_batch_score = []
116 |         for sample_annotation in spacy_annotations:
117 |             pos_in_sample = set(
118 |                 token["pos"] for token in sample_annotation["tokens"]
119 |             )
120 |             if pos in pos_in_sample:
121 |                 overall_batch_score.append(1)
122 |             else:
123 |                 overall_batch_score.append(0)
124 | 
125 |         return overall_batch_score
126 | 
127 |     def get_subpops(self, spacy):
128 |         POSSubpopulation = lambda pos, score_fn: ScoreSubpopulation(
129 |             identifiers=[Identifier(f"{pos}")],
130 |             intervals=[(1, 1)],
131 |             score_fn=score_fn,
132 |         )
133 | 
134 |         pos_subpops = []
135 |         for pos in self.universalpos:
136 |             pos_subpops.append(
137 |                 POSSubpopulation(
138 |                     pos, partial(self.score_fn, pos=pos, spacy=spacy)
139 |                 )
140 |             )
141 |         return pos_subpops
142 | 
143 | 
144 | @register_lbtsubpop("gender_bias")
145 | class GenderBiasSubpopulation(BaseSubpopulation):
146 |     def __init__(self):
147 |         """
148 |         Measures performance on gender co-occurence pairs
149 |         """
150 |         self.name = "gender_bias"
151 |         self.female_identity = [
152 |             "she",
153 |             "her",
154 |             "herself",
155 |             "girl",
156 |             "woman",
157 |             "women",
158 |             "females",
159 |             "female",
160 |             "girls",
161 |             "feminine",
162 |         ]
163 |         self.male_identity = [
164 |             "he",
165 |             "him",
166 |             "himself",
167 |             "boy",
168 |             "man",
169 |             "men",
170 |             "males",
171 |             "male",
172 |             "boys",
173 |             "masculine",
174 |         ]
175 |         self.non_binary_identity = [
176 |             "they",
177 |             "them",
178 |             "theirs",
179 |             "their",
180 |             "themself",
181 |         ]
182 |         self.gender_categories = {
183 |             "female": self.female_identity,
184 |             "male": self.male_identity,
185 |             "non_binary": self.non_binary_identity,
186 |         }
187 | 
188 |         self.career_words = [
189 |             "executive",
190 |             "professional",
191 |             "corporation",
192 |             "salary",
193 |             "office",
194 |             "business",
195 |             "career",
196 |         ]
197 |         self.family_words = [
198 |             "home",
199 |             "parents",
200 |             "children",
201 |             "family",
202 |             "cousin",
203 |             "marriage",
204 |             "wedding",
205 |             "relatives",
206 |         ]
207 |         self.math_words = [
208 |             "math",
209 |             "algebra",
210 |             "geometry",
211 |             "calculus",
212 |             "equation",
213 |             "compute",
214 |             "numbers",
215 |             "addition",
216 |         ]
217 |         self.arts_words = [
218 |             "poetry",
219 |             "art",
220 |             "dance",
221 |             "literature",
222 |             "novel",
223 |             "symphony",
224 |             "drama",
225 |         ]
226 |         self.science_words = [
227 |             "science",
228 |             "technology",
229 |             "physics",
230 |             "chemistry",
231 |             "Einstein",
232 |             "NASA",
233 |             "experiment",
234 |             "astronomy",
235 |         ]
236 | 
237 |         self.domains = {
238 |             "career": self.career_words,
239 |             "family": self.family_words,
240 |             "math": self.math_words,
241 |             "arts": self.arts_words,
242 |             "science": self.science_words,
243 |         }
244 | 
245 |     def score_fn(self, batch, columns, pair):
246 |         overall_batch_score = []
247 |         for text in batch[columns[0]]:
248 |             if pair[0] in text and pair[1] in text:
249 |                 overall_batch_score.append(1)
250 |             else:
251 |                 overall_batch_score.append(0)
252 |         return overall_batch_score
253 | 
254 |     def build_cooccurence_pairs(self, gender_categories: dict, domains: dict):
255 |         bias_pairs = []
256 |         for _, gender_list in gender_categories.items():
257 |             for _, phrase_list in domains.items():
258 |                 bias_pairs.extend(
259 |                     [
260 |                         pair
261 |                         for pair in itertools.product(gender_list, phrase_list)
262 |                     ]
263 |                 )
264 |         return bias_pairs
265 | 
266 |     def get_subpops(self, spacy):
267 |         bias_pairs = self.build_cooccurence_pairs(
268 |             self.gender_categories, self.domains
269 |         )
270 |         BiasCooccurenceSubpopulation = (
271 |             lambda pair, score_fn: ScoreSubpopulation(
272 |                 identifiers=[Identifier(f"{pair[0]}_{pair[1]}")],
273 |                 intervals=[(1, 1)],
274 |                 score_fn=self.score_fn,
275 |             )
276 |         )
277 | 
278 |         bias_subpops = []
279 |         for pair in bias_pairs:
280 |             bias_subpops.append(
281 |                 BiasCooccurenceSubpopulation(
282 |                     pair, partial(self.score_fn, pair=pair)
283 |                 )
284 |             )
285 |         return bias_subpops
286 | 
287 | 
288 | @register_lbtsubpop("positive_sentiment")
289 | class PositiveSentimentSubpopulation(BaseSubpopulation):
290 |     def __init__(self):
291 |         """
292 |         Slice of dataset which contains positive sentiment carrying words
293 |         """
294 |         self.name = "positive_sentiment"
295 |         self.positive_words_list = "https://gist.githubusercontent.com/mkulakowski2/4289437/raw/1bb4d7f9ee82150f339f09b5b1a0e6823d633958/positive-words.txt"
296 | 
297 |     def score_fn(self, batch, columns):
298 |         pass
299 | 
300 |     def get_positive_words(self):
301 |         response = requests.get(self.positive_words_list)
302 |         _, words = (
303 |             response.text.split("\n\n")[0],
304 |             response.text.split("\n\n")[1],
305 |         )
306 |         word_list = words.split("\n")
307 |         return word_list
308 | 
309 |     def get_subpops(self, spacy):
310 |         return [
311 |             HasAnyPhrase(
312 |                 phrase_groups=[self.get_positive_words()],
313 |                 identifiers=[Identifier("Positive Sentiment Words")],
314 |             )
315 |         ]
316 | 
317 | 
318 | @register_lbtsubpop("negative_sentiment")
319 | class NegativeSentimentSubpopulation(BaseSubpopulation):
320 |     def __init__(self):
321 |         """
322 |         Slice of dataset which contains negative sentiment carrying words
323 |         """
324 |         self.name = "positive_sentiment"
325 |         self.negative_words_list = "https://gist.githubusercontent.com/mkulakowski2/4289441/raw/dad8b64b307cd6df8068a379079becbb3f91101a/negative-words.txt"
326 | 
327 |     def score_fn(self, batch, columns):
328 |         pass
329 | 
330 |     def get_negative_words(self):
331 |         response = requests.get(self.negative_words_list)
332 |         _, words = (
333 |             response.text.split("\n\n")[0],
334 |             response.text.split("\n\n")[1],
335 |         )
336 |         word_list = words.split("\n")
337 |         return word_list
338 | 
339 |     def get_subpops(self, spacy):
340 |         return [
341 |             HasAnyPhrase(
342 |                 phrase_groups=[self.get_negative_words()],
343 |                 identifiers=[Identifier("Negative Sentiment Words")],
344 |             )
345 |         ]
346 | 
347 | 
348 | @register_lbtsubpop("naughty_and_obscene")
349 | class NaughtyObsceneSubpopulation(BaseSubpopulation):
350 |     def __init__(self):
351 |         """
352 |         Slice of dataset which contains naught + obscene words
353 |         """
354 |         self.name = "naughty_and_obscene"
355 |         self.word_list = "https://raw.githubusercontent.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/master/en"
356 | 
357 |     def score_fn(self, batch, columns):
358 |         pass
359 | 
360 |     def get_naughty_obscene_word_list(self):
361 |         response = requests.get(self.word_list)
362 |         return response.text.split("\n")
363 | 
364 |     def get_subpops(self, spacy):
365 |         return [
366 |             HasAnyPhrase(
367 |                 phrase_groups=[self.get_naughty_obscene_word_list()],
368 |                 identifiers=[Identifier("Naughty and Obscene Words")],
369 |             )
370 |         ]
371 | 
372 | 
373 | @register_lbtsubpop("sentence_length")
374 | class SentenceLengthSubpopulation(BaseSubpopulation):
375 |     def __init__(self):
376 |         """
377 |         Sentence length based slices
378 |         """
379 |         self.name = "sentence_length"
380 | 
381 |     def score_fn(self, batch, columns):
382 |         pass
383 | 
384 |     def get_subpops(self, spacy):
385 |         return [
386 |             LengthSubpopulation(
387 |                 intervals=[
388 |                     (0, 20),
389 |                     (20, 40),
390 |                     (40, 60),
391 |                     (60, 80),
392 |                     (80, 100),
393 |                     (100, 120),
394 |                     (120, 140),
395 |                 ]
396 |             )
397 |         ]
398 | 


--------------------------------------------------------------------------------
/lbt/tools/robustnessgym/robustnessgym.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from functools import partial
  3 | from typing import Union
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | from lbt.datasets import DATASET_REGISTRY
  8 | from lbt.tools.robustnessgym import RGSUBPOPULATION_REGISTRY
  9 | from ludwig.api import LudwigModel
 10 | from lbt.tools.utils import get_dataset_features
 11 | 
 12 | from robustnessgym import Dataset, Identifier, Spacy
 13 | from robustnessgym.core.testbench import DevBench
 14 | 
 15 | from .base_subpopulation import BaseSubpopulation
 16 | 
 17 | OUTPUT_FEATURES = None
 18 | 
 19 | 
 20 | def get_dataset_with_predictions(
 21 |     dataset: pd.DataFrame,
 22 |     models: dict,
 23 |     output_features: list,
 24 | ):
 25 |     for model_name, path_to_model in models.items():
 26 |         model = LudwigModel.load(model_dir=path_to_model)
 27 |         (predictions, output_directory) = model.predict(dataset)
 28 |         for output_feat in output_features:
 29 |             dataset[f"{model_name}_{output_feat}_pred"] = (
 30 |                 predictions[f"{output_feat}_predictions"]
 31 |                 .astype(float)
 32 |                 .tolist()
 33 |             )
 34 |             dataset.rename(
 35 |                 {output_feat: f"{output_feat}_label"}, axis=1, inplace=True
 36 |             )
 37 |     return dataset
 38 | 
 39 | 
 40 | def accuracy_eval_fn(model, dataset):
 41 |     global OUTPUT_FEATURES
 42 |     output_feat_accuracy = []
 43 |     # aggregate accuracy over all output features
 44 |     for output_feat in OUTPUT_FEATURES:
 45 |         accuracy = np.mean(
 46 |             np.array(dataset[f"{model}_{output_feat}_pred"])
 47 |             == (np.array(dataset[f"{output_feat}_label"]))
 48 |         )
 49 |         output_feat_accuracy.append(accuracy)
 50 |     return np.mean(output_feat_accuracy)
 51 | 
 52 | 
 53 | def RG(
 54 |     dataset_name: str,
 55 |     models: dict,
 56 |     path_to_dataset: str,
 57 |     subpopulations: list,
 58 |     output_directory: str,
 59 |     input_features: Union[str, list] = None,
 60 |     output_features: Union[str, list] = None,
 61 |     output_report_name: str = "rg_report.png",
 62 | ):
 63 |     """
 64 |     Runs RG  evaluation on dataset across specified models
 65 | 
 66 |     # Inputs
 67 |     :param dataset_name: (str) name of dataset
 68 |     :param models: (dict) mapping between model name and saved model directory
 69 |     :param path_to_dataset: (str) location of dataset
 70 |     :param input_features: (list or str) names of input feature
 71 |     :param output_features: (list or str) names of output feature
 72 |     :param subpopulations: (list) subpopulations to evaluate model performance
 73 |     :param output_directory: (str) location to save all outputs of RG analysis
 74 |     :param output_report_name: (str) name of generated file
 75 | 
 76 | 
 77 |     # Return
 78 |     :return: (pd.DataFrame) performance metrics from RG analysis
 79 |     """
 80 | 
 81 |     # first check if slices are valid
 82 |     for subpop in subpopulations:
 83 |         if subpop not in RGSUBPOPULATION_REGISTRY.keys():
 84 |             raise ValueError(
 85 |                 f"{subpop} is not in the list of supported RG Subpopulations\n"
 86 |                 f"Please see lbt.tools.robustnessgym.RGSUBPOPULATION_REGISTRY for available subpopulations"
 87 |             )
 88 | 
 89 |     # if user has not provided input/output feature info, collect it manually
 90 |     if input_features is None or output_features is None:
 91 |         (input_features, output_features) = get_dataset_features(dataset_name)
 92 | 
 93 |     else:
 94 |         if isinstance(input_features, str):
 95 |             input_features = [input_features]
 96 |         if isinstance(output_features, str):
 97 |             output_features = [output_features]
 98 | 
 99 |     global OUTPUT_FEATURES
100 |     OUTPUT_FEATURES = output_features
101 | 
102 |     # load data
103 |     # TODO (ASN): fix logic for extracting eval set
104 |     dataset = pd.read_csv(path_to_dataset)
105 | 
106 |     # get preds
107 |     dataset = get_dataset_with_predictions(dataset, models, output_features)
108 |     # caste as RG Dataset
109 |     dataset = Dataset.from_pandas(dataset, Identifier(dataset_name))
110 | 
111 |     # initialize spacy
112 |     spacy = Spacy()
113 |     dataset = spacy(dataset, input_features)
114 | 
115 |     # for each subopulation, get subpopulation functions
116 |     selected_subpopulations = []
117 |     for subpop in subpopulations:
118 |         if issubclass(RGSUBPOPULATION_REGISTRY[subpop], BaseSubpopulation):
119 |             subpops = RGSUBPOPULATION_REGISTRY[subpop]().get_subpops(spacy)
120 |         else:
121 |             subpops = RGSUBPOPULATION_REGISTRY[subpop]()
122 |         if not isinstance(subpops, list):
123 |             subpops = [subpops]
124 |         selected_subpopulations.extend(subpops)
125 | 
126 |     # for each subpopulation get slcies
127 |     slices = []
128 |     for subpop in selected_subpopulations:
129 |         slices.extend(subpop(dataset, input_features)[0])
130 | 
131 |     # build test bench
132 |     dataset_db = DevBench(dataset)
133 |     # add slices to test bench
134 |     dataset_db.add_slices(slices)
135 | 
136 |     dataset_db.add_aggregators(
137 |         {
138 |             model: {"accuracy": partial(accuracy_eval_fn, model)}
139 |             for model in models.keys()
140 |         }
141 |     )
142 |     # compute metrics
143 |     metrics = dataset_db.metrics
144 | 
145 |     # save metrics dataframe
146 |     metrics_df = pd.DataFrame(metrics)
147 |     metrics_df.to_csv(os.path.join(output_directory, f"{dataset_name}_rg.csv"))
148 | 
149 |     # create report
150 |     dataset_db.create_report().figure().write_image(
151 |         output_report_name, engine="kaleido"
152 |     )
153 |     return metrics
154 | 


--------------------------------------------------------------------------------
/lbt/tools/textattack/__init__.py:
--------------------------------------------------------------------------------
1 | from .textattack import (
2 |     attack,
3 |     augment,
4 |     ATTACKRECIPE_REGISTRY,
5 |     AUGMENTATIONRECIPE_REGISTRY,
6 | )
7 | 


--------------------------------------------------------------------------------
/lbt/tools/textattack/textattack.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import sys
  3 | import os
  4 | import pandas as pd
  5 | from pandas.core.common import SettingWithCopyWarning
  6 | 
  7 | import warnings
  8 | 
  9 | warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
 10 | 
 11 | 
 12 | from ludwig.api import LudwigModel
 13 | 
 14 | from textattack.attack_recipes import AttackRecipe
 15 | from textattack.attack_results import (
 16 |     MaximizedAttackResult,
 17 |     SuccessfulAttackResult,
 18 | )
 19 | from textattack.augmentation import Augmenter
 20 | from textattack.models.wrappers import ModelWrapper
 21 | 
 22 | from lbt.tools.utils import get_dataset_features
 23 | 
 24 | 
 25 | ATTACKRECIPE_REGISTRY = {}
 26 | AUGMENTATIONRECIPE_REGISTRY = {}
 27 | 
 28 | for key, obj in inspect.getmembers(sys.modules["textattack.attack_recipes"]):
 29 |     if inspect.isclass(obj):
 30 |         if issubclass(obj, AttackRecipe) and key != "AttackRecipe":
 31 |             ATTACKRECIPE_REGISTRY[key] = obj
 32 | 
 33 | 
 34 | for key, obj in inspect.getmembers(sys.modules["textattack.augmentation"]):
 35 |     if inspect.isclass(obj):
 36 |         if issubclass(obj, Augmenter) and key != "Augmenter":
 37 |             AUGMENTATIONRECIPE_REGISTRY[key] = obj
 38 | 
 39 | 
 40 | class CustomLudwigModelWrapper(ModelWrapper):
 41 |     def __init__(
 42 |         self,
 43 |         path_to_model: str,
 44 |         input_feature_name: str,
 45 |         output_feature_name: str,
 46 |     ):
 47 |         self.model = LudwigModel.load(path_to_model)
 48 |         self.input_feature_name = input_feature_name
 49 |         self.output_feature_name = output_feature_name
 50 | 
 51 |     def __call__(self, text_list):
 52 |         input_text_df = pd.DataFrame(
 53 |             text_list, columns=[self.input_feature_name]
 54 |         )
 55 |         model_outputs = self.model.predict(input_text_df)
 56 |         pred_outputs = model_outputs[0]
 57 |         columns = [
 58 |             col
 59 |             for col in pred_outputs.columns
 60 |             if self.output_feature_name in col
 61 |         ]
 62 |         preds = pred_outputs[columns].iloc[:, 1:-1].to_numpy()
 63 |         return preds
 64 | 
 65 | 
 66 | def load_dataset(
 67 |     path_to_dataset: str, input_feature_name: str, output_feature_name: str
 68 | ):
 69 |     dataset = pd.read_csv(path_to_dataset)
 70 |     dataset = dataset[0:10]
 71 |     if "split" not in dataset.columns:
 72 |         warnings.warn(
 73 |             "Dataset doesn't contain split column. Attacking entire dataset"
 74 |         )
 75 |         test_split = dataset[[input_feature_name, output_feature_name]]
 76 |     else:
 77 |         test_split = dataset[dataset["split"] == 2][
 78 |             [input_feature_name, output_feature_name]
 79 |         ]
 80 |     return test_split
 81 | 
 82 | 
 83 | def build_custom_ta_dataset(
 84 |     path_to_dataset: str, input_feature_name: str, output_feature_name: str
 85 | ):
 86 |     dataset = load_dataset(
 87 |         path_to_dataset, input_feature_name, output_feature_name
 88 |     )
 89 |     dataset[output_feature_name] = (
 90 |         dataset[output_feature_name].astype(int).tolist()
 91 |     )
 92 |     tupelize = dataset.to_records(index=False)
 93 |     return list(tupelize)
 94 | 
 95 | 
 96 | def attack(
 97 |     dataset_name: str,
 98 |     path_to_dataset: str,
 99 |     path_to_model: str,
100 |     input_feature_name: str = None,
101 |     output_feature_name: str = None,
102 |     attack_recipe: str = "DeepWordBugGao2018",
103 |     output_directory: str = "./",
104 | ):
105 |     if input_feature_name is None or output_feature_name is None:
106 |         (input_features, output_features) = get_dataset_features(dataset_name)
107 |         input_feature_name = input_features[0]
108 |         output_feature_name = output_features[0]
109 | 
110 |     custom_model = CustomLudwigModelWrapper(
111 |         path_to_model=path_to_model,
112 |         input_feature_name=input_feature_name,
113 |         output_feature_name=output_feature_name,
114 |     )
115 | 
116 |     custom_datset = build_custom_ta_dataset(
117 |         path_to_dataset=path_to_dataset,
118 |         input_feature_name=input_feature_name,
119 |         output_feature_name=output_feature_name,
120 |     )
121 | 
122 |     if attack_recipe not in ATTACKRECIPE_REGISTRY.keys():
123 |         raise ValueError(
124 |             f"{attack_recipe} not valid.\n"
125 |             f"Please check ATTACKRECIPE_REGISTRY to see valid recipes"
126 |         )
127 |     attack = ATTACKRECIPE_REGISTRY[attack_recipe].build(custom_model)
128 |     results_iterable = attack.attack_dataset(custom_datset)
129 | 
130 |     results = {
131 |         "original_text": [],
132 |         "perturbed_text": [],
133 |         "original_result": [],
134 |         "original_confidence_score": [],
135 |         "perturbed_result": [],
136 |         "perturbed_confidence_score": [],
137 |         "success": [],
138 |     }
139 | 
140 |     for result in results_iterable:
141 |         results["original_text"].append(result.original_text())
142 |         results["perturbed_text"].append(result.perturbed_text())
143 |         results["original_result"].append(
144 |             result.original_result.raw_output.argmax().item()
145 |         )
146 |         results["original_confidence_score"].append(
147 |             result.original_result.raw_output[
148 |                 result.original_result.raw_output.argmax()
149 |             ].item()
150 |         )
151 |         results["perturbed_result"].append(
152 |             result.perturbed_result.raw_output.argmax().item()
153 |         )
154 |         results["perturbed_confidence_score"].append(
155 |             result.perturbed_result.raw_output[
156 |                 result.perturbed_result.raw_output.argmax()
157 |             ].item()
158 |         )
159 |         if type(result) in [SuccessfulAttackResult, MaximizedAttackResult]:
160 |             results["success"].append(1)
161 |         else:
162 |             results["success"].append(0)
163 | 
164 |     results_df = pd.DataFrame.from_dict(results)
165 |     output_path = os.path.join(
166 |         output_directory, f"{dataset_name}_{attack_recipe}.csv"
167 |     )
168 |     results_df.to_csv(output_path)
169 |     return results_df
170 | 
171 | 
172 | def augment(
173 |     dataset_name: str,
174 |     path_to_dataset: str,
175 |     input_feature_name: str = None,
176 |     output_feature_name: str = None,
177 |     augmenter_name: str = "CharSwapAugmenter",
178 |     pct_words_to_swap: float = 0.1,
179 |     transformations_per_example: int = 1,
180 |     save_path: str = "augmented_ds.csv",
181 |     save=True,
182 | ):
183 |     if input_feature_name is None or output_feature_name is None:
184 |         (input_features, output_features) = get_dataset_features(dataset_name)
185 |         input_feature_name = input_features[0]
186 |         output_feature_name = output_features[0]
187 | 
188 |     dataset = load_dataset(
189 |         path_to_dataset, input_feature_name, output_feature_name
190 |     )
191 | 
192 |     if augmenter_name not in AUGMENTATIONRECIPE_REGISTRY.keys():
193 |         raise ValueError(
194 |             f"{augmenter_name} not valid.\n"
195 |             f"Please check AUGMENTATIONRECIPE_REGISTRY to see valid recipes"
196 |         )
197 | 
198 |     augmenter = AUGMENTATIONRECIPE_REGISTRY[augmenter_name](
199 |         pct_words_to_swap=pct_words_to_swap,
200 |         transformations_per_example=transformations_per_example,
201 |     )
202 | 
203 |     text_df = (
204 |         dataset[[input_feature_name]]
205 |         .applymap(augmenter.augment)
206 |         .applymap(lambda sent: sent[0])
207 |     )
208 | 
209 |     augmented_ds = dataset
210 |     augmented_ds.loc[:, input_feature_name] = text_df[input_feature_name]
211 | 
212 |     if save:
213 |         augmented_ds.to_csv(save_path)
214 | 
215 |     return augmented_ds
216 | 


--------------------------------------------------------------------------------
/lbt/tools/utils.py:
--------------------------------------------------------------------------------
 1 | from lbt.utils.experiment_utils import load_yaml
 2 | from globals import DATASET_METADATA_FILE
 3 | from lbt.datasets import DATASET_REGISTRY
 4 | 
 5 | 
 6 | def get_dataset_features(dataset_name):
 7 | 
 8 |     if dataset_name not in DATASET_REGISTRY:
 9 |         raise ValueError(
10 |             f"{dataset_name} not found in dataset registry\n"
11 |             f"Please check that it has been properly added"
12 |         )
13 | 
14 |     dataset_metadata = load_yaml(DATASET_METADATA_FILE)
15 |     for ds, ds_metadata in dataset_metadata.items():
16 |         if dataset_name == ds_metadata["data_class"]:
17 |             input_features = [
18 |                 input_feat["name"]
19 |                 for input_feat in ds_metadata["input_features"]
20 |             ]
21 |             output_features = [
22 |                 output_feat["name"]
23 |                 for output_feat in ds_metadata["output_features"]
24 |             ]
25 |             return (input_features, output_features)
26 | 
27 |     raise ValueError(
28 |         f"{dataset_name} not found in {DATASET_METADATA_FILE}\n"
29 |         f"Please check that it has been properly added"
30 |     )
31 | 


--------------------------------------------------------------------------------
/lbt/utils/__pycache__/experiment_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/experiment_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/lbt/utils/__pycache__/experiment_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/experiment_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/lbt/utils/__pycache__/experiment_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/experiment_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/lbt/utils/__pycache__/metadata_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/metadata_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/lbt/utils/__pycache__/metadata_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/utils/__pycache__/metadata_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/lbt/utils/experiment_utils.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import copy
  3 | import hashlib
  4 | import json
  5 | import logging
  6 | import math
  7 | import os
  8 | from typing import Union
  9 | from lbt.datasets import build_dataset
 10 | from lbt.metrics import get_experiment_metadata
 11 | 
 12 | import globals
 13 | import pandas as pd
 14 | import yaml
 15 | 
 16 | 
 17 | def get_gpu_list():
 18 |     try:
 19 |         return os.environ["CUDA_VISIBLE_DEVICES"]
 20 |     except KeyError:
 21 |         return None
 22 | 
 23 | 
 24 | def compute_additional_metadata(
 25 |     experiment_attr: dict,
 26 |     hyperopt_results: list,
 27 |     tune_executor: str,
 28 | ):
 29 |     hyperopt_run_data = get_model_ckpt_paths(
 30 |         hyperopt_results, experiment_attr["output_dir"], executor=tune_executor
 31 |     )
 32 |     sampled_params = {}
 33 |     all_experiment_results = []
 34 |     # ensures that all numerical values are of type float
 35 |     format_fields_float(hyperopt_results)
 36 |     for run in hyperopt_run_data:
 37 |         new_config = substitute_dict_parameters(
 38 |             copy.deepcopy(experiment_attr["model_config"]),
 39 |             parameters=run["hyperopt_results"]["parameters"],
 40 |         )
 41 |         del new_config["hyperopt"]
 42 | 
 43 |         # do some accounting of duplicate hyperparam configs (this count will
 44 |         # be added to the dict which will be hashed for the elastic document
 45 |         # id
 46 |         param_hash = hash_dict(run["hyperopt_results"]["parameters"])
 47 |         if param_hash in sampled_params:
 48 |             sampled_params[param_hash] += 1
 49 |         else:
 50 |             sampled_params[param_hash] = 1
 51 | 
 52 |         document = {
 53 |             "hyperopt_results": run["hyperopt_results"],
 54 |             "model_path": run["model_path"],
 55 |         }
 56 |         
 57 |         num_gpus = len(GPUtil.getGPUs())
 58 | 
 59 |         get_experiment_metadata(
 60 |             document,
 61 |             model_path=run["model_path"],
 62 |             data_path=experiment_attr["dataset_path"],
 63 |             run_stats=run,
 64 |             num_gpus=num_gpus
 65 |         )
 66 | 
 67 |         formatted_document = {
 68 |             "encoder": experiment_attr["encoder"],
 69 |             "dataset": experiment_attr["dataset"],
 70 |         }
 71 |         formatted_document.update(document)
 72 |         formatted_document.update(
 73 |             {"hyperopt_exp_config": experiment_attr["model_config"]}
 74 |         )
 75 | 
 76 |         formatted_document["sampled_run_config"] = new_config
 77 |         all_experiment_results.append(formatted_document)
 78 |     return all_experiment_results
 79 | 
 80 | 
 81 | def download_dataset(dataset_class: str, cache_dir: str) -> str:
 82 |     data = build_dataset(dataset_name=dataset_class, cache_dir=cache_dir)
 83 |     if dataset_class == "SST2":
 84 |         data = build_dataset(
 85 |             dataset_name=dataset_class,
 86 |             cache_dir=cache_dir,
 87 |             include_subtrees=True,
 88 |             remove_duplicates=True,
 89 |         )
 90 |     elif dataset_class == "SST5":
 91 |         data = build_dataset(
 92 |             dataset_name=dataset_class,
 93 |             cache_dir=cache_dir,
 94 |             include_subtrees=True,
 95 |         )
 96 |     elif dataset_class == "MDGenderBias":
 97 |         data = build_dataset(
 98 |             dataset_name=dataset_class,
 99 |             cache_dir=cache_dir,
100 |             task="wizard",
101 |         )
102 | 
103 |     return os.path.join(
104 |         data.processed_dataset_path, data.config["csv_filename"]
105 |     )
106 | 
107 | 
108 | def process_dataset(dataset_path: str):
109 |     dataset = pd.read_csv(dataset_path)
110 |     if "split" in dataset.columns:
111 |         train_df = dataset[dataset["split"] == 0]
112 |         val_df = dataset[dataset["split"] == 1]
113 |         test_df = dataset[dataset["split"] == 2]
114 | 
115 |         # no validation set provided, sample 10% of train set
116 |         if len(val_df) == 0:
117 |             val_df = train_df.sample(frac=0.1, replace=False)
118 |             train_df = train_df.drop(val_df.index)
119 | 
120 |         val_df.split = 1
121 | 
122 |         concat_df = pd.concat([train_df, val_df, test_df], ignore_index=True)
123 |         concat_df.to_csv(dataset_path, index=False)
124 |     return
125 | 
126 | 
127 | def hash_dict(d: dict, max_length: Union[int, None] = 6) -> bytes:
128 |     s = json.dumps(d, sort_keys=True, ensure_ascii=True)
129 |     h = hashlib.md5(s.encode())
130 |     d = h.digest()
131 |     b = base64.b64encode(d)
132 |     return b[:max_length]
133 | 
134 | 
135 | def load_yaml(filename: str) -> dict:
136 |     with open(filename) as f:
137 |         file_contents = yaml.load(f, Loader=yaml.SafeLoader)
138 |     return file_contents
139 | 
140 | 
141 | def set_globals(args):
142 |     """ set global vars based on command line args """
143 |     globals.EXPERIMENT_CONFIGS_DIR = args.hyperopt_config_dir
144 |     logging.info(f"EXPERIMENT_CONFIG_DIR set to {args.hyperopt_config_dir}")
145 |     globals.EXPERIMENT_OUTPUT_DIR = args.experiment_output_dir
146 |     logging.info(f"EXPERIMENT_OUTPUT_DIR set to {args.experiment_output_dir}")
147 |     globals.RUNTIME_ENV = args.run_environment
148 |     logging.info(f"RUNTIME_ENV set to {args.run_environment}")
149 |     globals.DATASET_CACHE_DIR = args.dataset_cache_dir
150 |     logging.info(f"DATASET_CACHE_DIR set to {args.dataset_cache_dir}")
151 | 
152 |     if args.datasets is None:
153 |         raise ValueError(
154 |             "Please specify a dataset or list of dataset."
155 |             "Use python experiment_driver.py --h to see: list of available datasets."
156 |         )
157 |     else:
158 |         if "smoke" in args.datasets:
159 |             globals.DATASET_LIST = list(globals.SMOKE_DATASETS.keys())
160 |             logging.info("Setting global datasets list to smoke datasets...")
161 |         else:
162 |             globals.DATASETS_LIST = args.datasets
163 |             logging.info(f"Setting global datasets list to {args.datasets}")
164 | 
165 |     if "all" not in args.custom_model_list:
166 |         encoders_list = []
167 |         for enc_name in args.custom_model_list:
168 |             if enc_name in globals.ENCODER_HYPEROPT_FILENAMES.keys():
169 |                 encoders_list.append(
170 |                     globals.ENCODER_HYPEROPT_FILENAMES[enc_name]
171 |                 )
172 |         globals.ENCODER_FILE_LIST = encoders_list
173 | 
174 |     # create experiment output directories (if they don't already exist)
175 |     for exp_dir in [
176 |         globals.EXPERIMENT_CONFIGS_DIR,
177 |         globals.EXPERIMENT_OUTPUT_DIR,
178 |         globals.DATASET_CACHE_DIR,
179 |         globals.ENERGY_LOGGING_DIR,
180 |     ]:
181 |         if not os.path.isdir(exp_dir):
182 |             os.mkdir(exp_dir)
183 | 
184 | 
185 | def format_fields_float(field_list: list) -> list:
186 |     """ formats fields in elastic db entries """
187 | 
188 |     def replace_ints(d):
189 |         for k, v in d.items():
190 |             if isinstance(v, dict):
191 |                 replace_ints(v)
192 |             else:
193 |                 if type(v) == int:
194 |                     v = float(v)
195 |                 if type(v) == list and type(v[0]) not in [list, dict]:
196 |                     new_v = []
197 |                     for x in v:
198 |                         if isinstance(x, (int, float)) and math.isnan(x):
199 |                             new_v.append(0.0)
200 |                         else:
201 |                             new_v.append(x)
202 |                     v = new_v
203 |                 if isinstance(v, (int, float)) and math.isnan(v):
204 |                     v = 0.0
205 |                 d.update({k: v})
206 |         return d
207 | 
208 |     formatted_out = [replace_ints(d) for d in field_list]
209 |     return formatted_out
210 | 
211 | 
212 | def decode_str_dicts(d: str) -> dict:
213 |     json_acceptable_string = d.replace("'", '"')
214 |     dct = json.loads(json_acceptable_string)
215 |     return dct
216 | 
217 | 
218 | def substitute_dict_parameters(original_dict: dict, parameters: dict) -> dict:
219 |     """ Fills in original ludwig config w/actual sampled hyperopt values """
220 | 
221 |     def subsitute_param(dct: dict, path: list, val):
222 |         if len(path) == 1:
223 |             dct[path[0]] = val
224 |             return dct
225 |         else:
226 |             key = path.pop(0)
227 |             subsitute_param(dct[key], path, val)
228 | 
229 |     # in some cases the dict is encoded as a str
230 |     if type(parameters) == str:
231 |         parameters = decode_str_dicts(parameters)
232 | 
233 |     for key, value in parameters.items():
234 |         path = key.split(".")
235 |         # Check for input/output parameter edge cases
236 |         if path[0] not in original_dict.keys():
237 |             # check if param is associate with output feature
238 |             for idx, out_feature in enumerate(
239 |                 original_dict["output_features"]
240 |             ):
241 |                 if out_feature["name"] == path[0]:
242 |                     original_dict["output_features"][idx][path[1]] = value
243 |                     break
244 | 
245 |             for idx, out_feature in enumerate(original_dict["input_features"]):
246 |                 if out_feature["name"] == path[0]:
247 |                     original_dict["input_features"][idx][path[1]] = value
248 |                     break
249 |         else:
250 |             subsitute_param(original_dict, path, value)
251 |     return original_dict
252 | 
253 | 
254 | def compare_json_enc_configs(cf_non_encoded, cf_json_encoded):
255 |     """ compars to json encoded dicts """
256 |     for key, value in cf_non_encoded.items():
257 |         value_other = cf_json_encoded[key]
258 |         if type(value) == list:
259 |             value_other = json.loads(value_other)
260 |         if type(value) == str:
261 |             value_other = json.loads(value_other)
262 |         if type(value) == int:
263 |             value_other = int(value_other)
264 |         if value_other != value:
265 |             return False
266 |     else:
267 |         return True
268 | 
269 | 
270 | def decode_json_enc_dict(encoded_dict, json_enc_params: list):
271 |     for key, value in encoded_dict.items():
272 |         if key in json_enc_params and type(value) == str:
273 |             encoded_dict[key] = json.loads(value)
274 |     return encoded_dict
275 | 
276 | 
277 | def get_ray_tune_trial_dirs(base_dir: str, trial_dirs):
278 |     """ returns all output directories of individual ray.tune trials """
279 |     if "params.json" in os.listdir(base_dir):
280 |         trial_dirs.append(base_dir)
281 |     else:
282 |         for d in os.scandir(base_dir):
283 |             if os.path.isdir(d):
284 |                 get_ray_tune_trial_dirs(d, trial_dirs)
285 |         return trial_dirs
286 | 
287 | 
288 | def get_lastest_checkpoint(trial_dir: str, idx: int = -1):
289 |     checkpoints = [
290 |         ckpt_dir
291 |         for ckpt_dir in os.scandir(trial_dir)
292 |         if os.path.isdir(ckpt_dir) and "checkpoint" in ckpt_dir.path
293 |     ]
294 |     sorted_cps = sorted(checkpoints, key=lambda d: d.path)
295 |     if idx >= len(sorted_cps):
296 |         idx = -1
297 |     return sorted_cps[idx]
298 | 
299 | 
300 | def get_model_ckpt_paths(
301 |     hyperopt_training_stats: list, output_dir: str, executor: str = "ray"
302 | ):
303 |     """
304 |     maps output of individual tial run statistics to associated
305 |     output directories. Necessary for accessing model checkpoints
306 |     """
307 |     if executor == "ray":  # folder construction is different
308 |         hyperopt_run_metadata = []
309 |         # populate paths
310 |         trial_dirs = []
311 |         for path in os.scandir(output_dir):
312 |             if os.path.isdir(path):
313 |                 trial_dirs.extend(get_ray_tune_trial_dirs(path, []))
314 |         for hyperopt_run in hyperopt_training_stats:
315 |             hyperopt_run_metadata.append(
316 |                 {
317 |                     "hyperopt_results": decode_json_enc_dict(
318 |                         hyperopt_run,
319 |                         ["parameters", "training_stats", "eval_stats"],
320 |                     ),
321 |                     "model_path": None,
322 |                 }
323 |             )
324 |         for path in trial_dirs:
325 |             if os.path.getsize(os.path.join(path, "progress.csv")) > 0:
326 |                 training_progress = pd.read_csv(
327 |                     os.path.join(path, "progress.csv")
328 |                 )
329 |                 out_parameters = json.loads(
330 |                     training_progress.iloc[-1]["parameters"]
331 |                 )
332 |                 out_eval_stats = json.loads(
333 |                     training_progress.iloc[-1]["eval_stats"]
334 |                 )
335 |                 # compare total time, metric score, and parameters
336 |                 output_total_time = training_progress.iloc[-1]["time_total_s"]
337 |                 output_metric_score = training_progress.iloc[-1][
338 |                     "metric_score"
339 |                 ]
340 |                 for hyperopt_run in hyperopt_run_metadata:
341 |                     run_total_time = hyperopt_run["hyperopt_results"][
342 |                         "time_total_s"
343 |                     ]
344 |                     run_metric_score = hyperopt_run["hyperopt_results"][
345 |                         "metric_score"
346 |                     ]
347 |                     run_params = hyperopt_run["hyperopt_results"]["parameters"]
348 |                     run_eval_stats = hyperopt_run["hyperopt_results"][
349 |                         "eval_stats"
350 |                     ]
351 |                     if hash_dict(run_eval_stats) == hash_dict(out_eval_stats):
352 |                         best_ckpt_idx = training_progress[
353 |                             abs(
354 |                                 training_progress["metric_score"]
355 |                                 - hyperopt_run["hyperopt_results"][
356 |                                     "metric_score"
357 |                                 ]
358 |                             )
359 |                             < 1e-04
360 |                         ].iloc[0]["training_iteration"]
361 |                         best_ckpt_idx -= 1
362 |                         model_path = get_lastest_checkpoint(
363 |                             path, best_ckpt_idx
364 |                         )
365 |                         if hyperopt_run["model_path"] is None:
366 |                             hyperopt_run["model_path"] = os.path.join(
367 |                                 model_path, "model"
368 |                             )
369 |                             break
370 | 
371 |     else:
372 |         hyperopt_run_metadata = []
373 |         for run_dir in os.scandir(output_dir):
374 |             if os.path.isdir(run_dir):
375 |                 sample_training_stats = json.load(
376 |                     open(
377 |                         os.path.join(run_dir.path, "training_statistics.json"),
378 |                         "rb",
379 |                     )
380 |                 )
381 |                 for hyperopt_run in hyperopt_training_stats:
382 |                     if hyperopt_run["training_stats"] == sample_training_stats:
383 |                         hyperopt_run_metadata.append(
384 |                             {
385 |                                 "hyperopt_results": hyperopt_run,
386 |                                 "model_path": os.path.join(
387 |                                     run_dir.path, "model"
388 |                                 ),
389 |                             }
390 |                         )
391 | 
392 |     return hyperopt_run_metadata
393 | 
394 | 
395 | def collect_completed_trial_results(output_dir: str):
396 |     results, metrics, params = [], [], []
397 |     trial_dirs = get_ray_tune_trial_dirs(output_dir, [])
398 |     for trial_dir in trial_dirs:
399 |         for f in os.scandir(trial_dir):
400 |             if "progress" in f.name:
401 |                 try:
402 |                     progress = pd.read_csv(f)
403 |                     last_iter = len(progress) - 1
404 |                     last_iter_eval_stats = json.loads(
405 |                         progress.iloc[last_iter]["eval_stats"]
406 |                     )
407 |                     if (
408 |                         "overall_stats"
409 |                         in last_iter_eval_stats[
410 |                             list(last_iter_eval_stats.keys())[0]
411 |                         ].keys()
412 |                     ):
413 |                         trial_results = decode_json_enc_dict(
414 |                             progress.iloc[last_iter].to_dict(),
415 |                             ["parameters", "training_stats", "eval_stats"],
416 |                         )
417 |                         trial_results["done"] = True
418 |                         metrics.append(
419 |                             progress.iloc[last_iter]["metric_score"]
420 |                         )
421 |                         curr_path = f.path
422 |                         params_path = curr_path.replace(
423 |                             "progress.csv", "params.json"
424 |                         )
425 |                         trial_params = json.load(open(params_path, "rb"))
426 |                         params.append(trial_params)
427 |                         for key, value in trial_params.items():
428 |                             config_key = "config" + "." + key
429 |                             trial_results[config_key] = value
430 |                         results.append(trial_results)
431 |                 except:
432 |                     pass
433 |     return results, metrics, params
434 | 
435 | 
436 | def conditional_decorator(decorator, condition, *args):
437 |     def wrapper(function):
438 |         if condition(*args):
439 |             return decorator(function)
440 |         else:
441 |             return function
442 | 
443 |     return wrapper
444 | 


--------------------------------------------------------------------------------
/lbt/utils/metadata_utils.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | import platform
  4 | 
  5 | import GPUtil
  6 | import ludwig
  7 | import numpy as np
  8 | import pandas as pd
  9 | import psutil
 10 | import ray
 11 | import tensorflow as tf
 12 | from ludwig.api import LudwigModel
 13 | from ludwig.collect import collect_weights
 14 | 
 15 | 
 16 | @ray.remote
 17 | def get_ludwig_version(**kwargs):
 18 |     return ludwig.__version__
 19 | 
 20 | 
 21 | def scale_bytes(bytes: int, suffix: str = "B") -> str:
 22 |     factor = 1024
 23 |     for unit in ["", "K", "M", "G", "T", "P"]:
 24 |         if bytes < factor:
 25 |             return f"{bytes:.2f}{unit}{suffix}"
 26 |         bytes /= factor
 27 | 
 28 | 
 29 | @ray.remote(num_gpus=1, num_returns=1)
 30 | def get_hardware_metadata(**kwargs) -> dict:
 31 |     """Returns GPU, CPU and RAM information"""
 32 | 
 33 |     machine_info = {}
 34 |     # GPU
 35 |     gpus = GPUtil.getGPUs()
 36 |     if len(gpus) != 0:
 37 |         machine_info["total_gpus"] = len(gpus)
 38 |         gpu_type = {}
 39 |         for gpu_id, gpu in enumerate(gpus):
 40 |             gpu_type[gpu_id] = gpu.name
 41 |         machine_info["gpu_info"] = gpu_type
 42 |     else:
 43 |         machine_info["total_gpus"] = 0
 44 |     # CPU
 45 |     total_cores = psutil.cpu_count(logical=True)
 46 |     machine_info["total_cores"] = total_cores
 47 |     # RAM
 48 |     svmem = psutil.virtual_memory()
 49 |     total_RAM = scale_bytes(svmem.total)
 50 |     machine_info["RAM"] = total_RAM
 51 |     return machine_info
 52 | 
 53 | 
 54 | @ray.remote(num_gpus=1, num_returns=1, max_calls=1)
 55 | def get_inference_latency(
 56 |     model_path: str, dataset_path: str, num_samples: int = 20, **kwargs
 57 | ) -> str:
 58 |     """
 59 |     Returns avg. time to perform inference on 1 sample
 60 | 
 61 |     # Inputs
 62 |     :param model_path: (str) filepath to pre-trained model (directory that
 63 |         contains the model_hyperparameters.json).
 64 |     :param dataset_path: (str) filepath to dataset
 65 |     :param dataset_path: (int) number of dev samples to randomly sample
 66 | 
 67 |     # Return
 68 |     :return: (str) avg. time per training step
 69 |     """
 70 | 
 71 |     # Create smaller datasets w/10 samples from original dev set
 72 |     full_dataset = pd.read_csv(dataset_path)
 73 |     # Note: split == 1 indicates the dev set
 74 |     if "split" in full_dataset:
 75 |         if len(full_dataset[full_dataset["split"] == 1]) > 0:
 76 |             sampled_dataset = full_dataset[full_dataset["split"] == 1].sample(
 77 |                 n=num_samples
 78 |             )
 79 |         elif len(full_dataset[full_dataset["split"] == 2]) > 0:
 80 |             sampled_dataset = full_dataset[full_dataset["split"] == 2].sample(
 81 |                 n=num_samples
 82 |             )
 83 |         else:
 84 |             sampled_dataset = full_dataset[full_dataset["split"] == 0].sample(
 85 |                 n=num_samples
 86 |             )
 87 |     else:
 88 |         sampled_dataset = full_dataset.sample(n=num_samples)
 89 |     ludwig_model = LudwigModel.load(model_path)
 90 |     start = datetime.datetime.now()
 91 |     _, _ = ludwig_model.predict(
 92 |         dataset=sampled_dataset,
 93 |         batch_size=1,
 94 |     )
 95 |     total_time = datetime.datetime.now() - start
 96 |     avg_time_per_sample = total_time / num_samples
 97 |     formatted_time = "{:0>8}".format(str(avg_time_per_sample))
 98 |     return formatted_time
 99 | 
100 | 
101 | @ray.remote(num_returns=1)
102 | def get_training_cost(
103 |     model_path: str,
104 |     dataset_path: str,
105 |     train_batch_size: int,
106 |     run_stats: dict,
107 |     gpu_cost_per_hr: float = 0.35,  # GCP cost for Tesla T4
108 | ) -> float:
109 |     """
110 |     Return total cost to train model using GCP compute resource
111 |     """
112 |     total_time_s = int(run_stats["hyperopt_results"]["time_total_s"])
113 |     total_time_hr = total_time_s / 3600
114 |     return float(total_time_hr * gpu_cost_per_hr)
115 | 
116 | 
117 | @ray.remote(num_gpus=1, num_returns=1, max_calls=1)
118 | def get_train_speed(
119 |     model_path: str,
120 |     dataset_path: str,
121 |     train_batch_size: int,
122 |     run_stats: dict,
123 |     **kwargs,
124 | ) -> str:
125 |     """
126 |     Returns avg. time per training step
127 | 
128 |     # Inputs
129 |     :param model_path: (str) filepath to pre-trained model (directory that
130 |         contains the model_hyperparameters.json).
131 |     :param dataset_path: (str) filepath to dataset
132 | 
133 |     # Return
134 |     :return: (str) avg. time per training step
135 |     """
136 | 
137 |     train_split_size = 0.7
138 |     full_dataset = pd.read_csv(dataset_path)
139 |     if "split" in full_dataset:
140 |         total_samples = len(full_dataset[full_dataset["split"] == 0])
141 |     else:
142 |         total_samples = int(train_split_size * len(full_dataset))
143 |     total_training_steps = int(total_samples / train_batch_size)
144 |     time_per_batch = (
145 |         int(run_stats["hyperopt_results"]["time_this_iter_s"])
146 |         / total_training_steps
147 |     )
148 |     formatted_time = "{:0>8}".format(
149 |         str(datetime.timedelta(seconds=time_per_batch))
150 |     )
151 |     return formatted_time
152 | 
153 | 
154 | @ray.remote(num_gpus=1, num_returns=1, max_calls=1)
155 | def get_model_flops(model_path: str, **kwargs) -> int:
156 |     """
157 |     Computes total model flops
158 | 
159 |     # Inputs
160 |     :param model_path: (str) filepath to pre-trained model.
161 | 
162 |     # Return
163 |     :return: (int) total number of flops.
164 |     """
165 |     tf.compat.v1.reset_default_graph()
166 |     session = tf.compat.v1.Session()
167 |     graph = tf.compat.v1.get_default_graph()
168 |     flops = None
169 |     with graph.as_default():
170 |         with session.as_default():
171 |             model = LudwigModel.load(model_path)
172 |             run_meta = tf.compat.v1.RunMetadata()
173 |             opts = tf.compat.v1.profiler.ProfileOptionBuilder.float_operation()
174 |             flops = tf.compat.v1.profiler.profile(
175 |                 graph=graph, run_meta=run_meta, cmd="op", options=opts
176 |             )
177 |     tf.compat.v1.reset_default_graph()
178 |     session.close()
179 |     return flops.total_float_ops
180 | 
181 | 
182 | @ray.remote(num_gpus=1, num_returns=1, max_calls=1)
183 | def get_model_size(model_path: str, **kwargs):
184 |     """
185 |     Computes minimum bytes required to store model to memory
186 | 
187 |     # Inputs
188 |     :param model_path: (str) filepath to pre-trained model.
189 | 
190 |     # Return
191 |     :return: (int) total bytes
192 |     :return: (str) total bytes scaled in string format
193 |     """
194 |     tensor_filepaths = collect_weights(
195 |         model_path=model_path, tensors=None, output_directory=".model_tensors"
196 |     )
197 |     total_size = 0
198 |     for fp in tensor_filepaths:
199 |         weight_tensor = np.load(fp)
200 |         total_size += weight_tensor.size
201 |     total_bytes = total_size * 32
202 |     scaled_bytes = scale_bytes(total_bytes)
203 |     model_size = {"total_bytes": total_bytes, "scaled_bytes": scaled_bytes}
204 |     return model_size
205 | 
206 | 
207 | def append_experiment_metadata(
208 |     document: dict,
209 |     model_path: str,
210 |     data_path: str,
211 |     run_stats: dict,
212 |     train_batch_size: int = 16,
213 | ):
214 |     print("METADATA tracking")
215 |     for key, metrics_func in metadata_registry.items():
216 |         print("currently processing: {}".format(key))
217 |         try:
218 |             output = globals()[metrics_func].remote(
219 |                 model_path=model_path,
220 |                 dataset_path=data_path,
221 |                 train_batch_size=train_batch_size,
222 |                 run_stats=run_stats,
223 |             )
224 |             document.update({key: ray.get(output)})
225 |         except:
226 |             print(f"failure processing: {key}")
227 |             pass
228 | 
229 | 
230 | metadata_registry = {
231 |     "inference_latency": "get_inference_latency",
232 |     "time_per_train_step": "get_train_speed",
233 |     "model_size": "get_model_size",
234 |     "model_flops": "get_model_flops",
235 |     "hardware_metadata": "get_hardware_metadata",
236 |     "ludwig_version": "get_ludwig_version",
237 |     "training_cost": "get_training_cost",
238 | }
239 | 


--------------------------------------------------------------------------------
/lbt/utils/test_utils.py:
--------------------------------------------------------------------------------
 1 | from metadata_utils import *
 2 | 
 3 | DATAPATH = "/sailhome/avanika/.ludwig_cache/sst2_1.0/processed/sst2.csv"
 4 | MODEL_PATH = "/juice/scr/avanika/ludwig-benchmark-dev/ludwig-benchmark/experiment-outputs/sst2_bert/hyperopt_0_config_sst2_bert/model"
 5 | 
 6 | machine_info = get_hardware_metadata()
 7 | print(machine_info)
 8 | 
 9 | #model_flops = model_flops(MODEL_PATH)
10 | #print(model_flops)
11 | 
12 | #model_size = get_model_size(MODEL_PATH)
13 | #print(model_size)
14 | 
15 | #latency = get_inference_latency(MODEL_PATH, DATAPATH)
16 | #print(latency)
17 | 
18 | print(DATAPATH)
19 | train_speed = get_train_speed(MODEL_PATH, DATAPATH, train_batch_size=16)
20 | print(train_speed)
21 | 


--------------------------------------------------------------------------------
/lbt/visualizations/__init__.py:
--------------------------------------------------------------------------------
1 | from .visualize import (
2 |     hyperopt_viz,
3 |     compare_performance_viz,
4 |     learning_curves_viz,
5 | )
6 | 


--------------------------------------------------------------------------------
/lbt/visualizations/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/visualizations/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/lbt/visualizations/__pycache__/visualize.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HazyResearch/ludwig-benchmarking-toolkit/5cae39dba91f83717c467e1f3db982bb0fabaf70/lbt/visualizations/__pycache__/visualize.cpython-37.pyc


--------------------------------------------------------------------------------
/lbt/visualizations/visualize.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import List, Union
  3 | 
  4 | import globals
  5 | import json
  6 | import pickle
  7 | from lbt.datasets import DATASET_REGISTRY
  8 | from ludwig.visualize import (
  9 |     compare_performance,
 10 |     hyperopt_report,
 11 |     learning_curves,
 12 | )
 13 | 
 14 | 
 15 | def hyperopt_viz(
 16 |     hyperopt_stats_path: str = None,
 17 |     dataset_name: str = None,
 18 |     model_name: str = None,
 19 |     output_dir: str = None,
 20 | ):
 21 |     """
 22 |     Produces a report about hyperparameter optimization.
 23 |     Creating one graph per hyperparameter to show the distribution of results
 24 |     and one additional graph of pairwise hyperparameters interactions
 25 |     """
 26 | 
 27 |     if hyperopt_stats_path:
 28 |         return hyperopt_report(
 29 |             hyperopt_stats_path=hyperopt_stats_path,
 30 |             output_directory=output_dir,
 31 |         )
 32 |     elif dataset_name and model_name:
 33 |         if dataset_name not in DATASET_REGISTRY.keys():
 34 |             raise ValueError("The specified dataset is not valid")
 35 |         elif model_name not in globals.ENCODER_HYPEROPT_FILENAMES.keys():
 36 |             raise ValueError("The specified model name is not valid")
 37 | 
 38 |         exp_name = "_".join([dataset_name, model_name])
 39 |         experiment_folder = os.path.join(
 40 |             globals.EXPERIMENT_OUTPUT_DIR, exp_name
 41 |         )
 42 | 
 43 |         hyperopt_stats_json = os.path.join(
 44 |             experiment_folder,
 45 |             "hyperopt_statistics.json",
 46 |         )
 47 |         json_file = json.load(open(hyperopt_stats_json, "rb"))
 48 | 
 49 |         # decode json
 50 |         hyperopt_results = []
 51 |         for result in json_file["hyperopt_results"]:
 52 |             for key, val in result.items():
 53 |                 try:
 54 |                     val = json.loads(val)
 55 |                     result[key] = val
 56 |                 except:
 57 |                     pass
 58 |             hyperopt_results.append(result)
 59 |         json_file["hyperopt_results"] = hyperopt_results
 60 | 
 61 |         with open(
 62 |             os.path.join(
 63 |                 experiment_folder, "hyperopt_statistics_decoded.json"
 64 |             ),
 65 |             "w",
 66 |         ) as outfile:
 67 |             json.dump(json_file, outfile)
 68 | 
 69 |         hyperopt_stats_path = os.path.join(
 70 |             experiment_folder,
 71 |             "hyperopt_statistics_decoded.json",
 72 |         )
 73 |         return hyperopt_report(
 74 |             hyperopt_stats_path=hyperopt_stats_path,
 75 |             output_directory=output_dir,
 76 |         )
 77 |     raise ValueError(
 78 |         "Please specify either a path to the hyperopt output stats json file"
 79 |         "or the dataset and model name of the experiment"
 80 |     )
 81 | 
 82 | 
 83 | def learning_curves_viz(
 84 |     model_name: str,
 85 |     dataset_name: str,
 86 |     output_feature_name: str,
 87 |     output_directory=None,
 88 |     file_format="pdf",
 89 | ):
 90 |     """
 91 |     Visualize how model metrics change over training and validation data
 92 |     epochs.
 93 |     """
 94 | 
 95 |     exp_name = "_".join([dataset_name, model_name])
 96 |     experiment_folder = os.path.join(globals.EXPERIMENT_OUTPUT_DIR, exp_name)
 97 | 
 98 |     results_file = os.path.join(
 99 |         experiment_folder, f"{exp_name}_hyperopt_results.pkl"
100 |     )
101 |     hyperopt_results = pickle.load(open(results_file, "rb"))
102 | 
103 |     training_stats = []
104 |     experiment_ids = []
105 | 
106 |     for model_results in hyperopt_results:
107 |         training_stats.append(json.loads(model_results["training_stats"]))
108 |         experiment_ids.append(model_results["experiment_id"])
109 | 
110 |     return learning_curves(
111 |         train_stats_per_model=training_stats,
112 |         output_feature_name=output_feature_name,
113 |         model_names=experiment_ids,
114 |         output_directory=output_directory,
115 |         file_format=file_format,
116 |     )
117 | 
118 | 
119 | def compare_performance_viz(
120 |     model_name: str,
121 |     dataset_name: str,
122 |     output_feature_name: str,
123 |     output_directory=None,
124 |     file_format="pdf",
125 | ):
126 |     """  Barplot visualization for each overall metric """
127 | 
128 |     exp_name = "_".join([dataset_name, model_name])
129 |     experiment_folder = os.path.join(globals.EXPERIMENT_OUTPUT_DIR, exp_name)
130 | 
131 |     results_file = os.path.join(
132 |         experiment_folder, f"{exp_name}_hyperopt_results.pkl"
133 |     )
134 |     hyperopt_results = pickle.load(open(results_file, "rb"))
135 | 
136 |     eval_stats = []
137 |     experiment_ids = []
138 | 
139 |     for model_results in hyperopt_results:
140 |         eval_stats.append(json.loads(model_results["eval_stats"]))
141 |         experiment_ids.append(model_results["experiment_id"])
142 | 
143 |     return compare_performance(
144 |         test_stats_per_model=eval_stats,
145 |         output_feature_name=output_feature_name,
146 |         model_names=experiment_ids,
147 |         output_directory=output_directory,
148 |         file_format=file_format,
149 |     )


--------------------------------------------------------------------------------
/model-configs/bert_hyperopt.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - preprocessing:
 3 |       word_tokenizer: hf_tokenizer
 4 |     pretrained_model_name_or_path: bert-base-uncased
 5 | 
 6 | training:
 7 |   batch_size: 16
 8 |   early_stop: 3
 9 | 
10 | parameters:
11 |   input_features.name.encoder: bert
12 |   input_features.name.reduced_output:
13 |     space: choice
14 |     type: category
15 |     categories: ["cls_pooled", "sum", "avg"]
16 |   output_features.name.fc_layers:
17 |     # if space is grid_search, change 'categories' to 'values'
18 |     space: choice
19 |     type: category
20 |     categories:
21 |       [
22 |         [{ fc_size: 512 }, { fc_size: 256 }],
23 |         [{ fc_size: 512 }],
24 |         [{ fc_size: 256 }],
25 |       ]
26 | 


--------------------------------------------------------------------------------
/model-configs/distilbert_hyperopt.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - preprocessing:
 3 |       word_tokenizer: hf_tokenizer
 4 |     pretrained_model_name_or_path: distilbert-base-uncased
 5 | 
 6 | training:
 7 |   batch_size: 16
 8 |   early_stop: 3
 9 | 
10 | parameters:
11 |   input_features.name.encoder: distilbert
12 |   output_features.name.fc_layers:
13 |     space: choice
14 |     type: category
15 |     categories:
16 |       [
17 |         [{ fc_size: 512 }, { fc_size: 256 }],
18 |         [{ fc_size: 512 }],
19 |         [{ fc_size: 256 }],
20 |       ]
21 | 


--------------------------------------------------------------------------------
/model-configs/electra_hyperopt.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - preprocessing:
 3 |       word_tokenizer: hf_tokenizer
 4 |     pretrained_model_name_or_path: google/electra-base-generator
 5 | 
 6 | training:
 7 |   batch_size: 16
 8 |   early_stop: 3
 9 | 
10 | parameters:
11 |   input_features.name.encoder: electra
12 |   output_features.name.fc_layers:
13 |     space: choice
14 |     type: category
15 |     categories:
16 |       [
17 |         [{ fc_size: 512 }, { fc_size: 256 }],
18 |         [{ fc_size: 512 }],
19 |         [{ fc_size: 256 }],
20 |       ]
21 | 


--------------------------------------------------------------------------------
/model-configs/resnet_hyperopt.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - encoder: resnet
 3 |     resnet_size: 14
 4 | 
 5 | training:
 6 |   batch_size: 32
 7 |   early_stop: 5
 8 | 
 9 | parameters:
10 |   input_features.name.encoder: resnet
11 |   output_features.name.fc_layers:
12 |     # if space is grid_search, change 'categories' to 'values'
13 |     space: choice
14 |     categories:
15 |       [
16 |         [{ fc_size: 512 }, { fc_size: 256 }],
17 |         [{ fc_size: 512 }],
18 |         [{ fc_size: 256 }],
19 |       ]
20 | 


--------------------------------------------------------------------------------
/model-configs/rnn_hyperopt.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - encoder: rnn
 3 |     preprocessing:
 4 |       word_tokenizer: space
 5 |       pretrained_model_name_or_path: None
 6 |     dropout: 0.5
 7 | 
 8 | training:
 9 |   early_stop: 7
10 |   batch_size: 128
11 |   eval_batch_size: 256
12 | 
13 | parameters:
14 |   input_features.name.encoder: rnn
15 |   input_features.name.num_layers:
16 |     space: randint
17 |     type: int
18 |     lower: 1
19 |     upper: 5
20 |   input_features.name.cell_type:
21 |     space: choice
22 |     type: category
23 |     categories: [rnn, gru, lstm]
24 |   input_features.name.state_size:
25 |     space: choice
26 |     type: category
27 |     categories: [256, 512]
28 |   input_features.name.fc_layers:
29 |     space: choice
30 |     type: category
31 |     categories:
32 |       [
33 |         [{ fc_size: 512 }, { fc_size: 256 }],
34 |         [{ fc_size: 512 }],
35 |         [{ fc_size: 256 }],
36 |       ]
37 | 


--------------------------------------------------------------------------------
/model-configs/roberta_hyperopt.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - preprocessing:
 3 |       word_tokenizer: hf_tokenizer
 4 |     pretrained_model_name_or_path: roberta-base
 5 | 
 6 | training:
 7 |   batch_size: 16
 8 |   early_stop: 3
 9 | 
10 | parameters:
11 |   input_features.name.encoder: roberta
12 |   input_features.name.reduced_output:
13 |     space: choice
14 |     type: category
15 |     categories: [cls_pooled, sum, avg]
16 |   output_features.name.fc_layers:
17 |     space: choice
18 |     type: category
19 |     categories:
20 |       [
21 |         [{ fc_size: 512 }, { fc_size: 256 }],
22 |         [{ fc_size: 512 }],
23 |         [{ fc_size: 256 }],
24 |       ]
25 | 


--------------------------------------------------------------------------------
/model-configs/stackedcnn_hyperopt.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - encoder: stacked_cnn
 3 | 
 4 | training:
 5 |   batch_size: 32
 6 |   early_stop: 5
 7 | 
 8 | parameters:
 9 |   input_features.name.encoder: stacked_cnn
10 |   output_features.name.fc_layers:
11 |     # if space is grid_search, change 'categories' to 'values'
12 |     space: choice
13 |     type: category
14 |     categories:
15 |       [
16 |         [{ fc_size: 512 }, { fc_size: 256 }],
17 |         [{ fc_size: 512 }],
18 |         [{ fc_size: 256 }],
19 |       ]
20 | 


--------------------------------------------------------------------------------
/model-configs/stackedparallelcnn_hyperopt.yaml:
--------------------------------------------------------------------------------
  1 | input_features:
  2 |   - type: sequence
  3 |     encoder: stacked_parallel_cnn
  4 |     pretrained_embeddings: PATH_TO_PRETRAINED_EMBEDDINGS
  5 |     dropout: 0.5
  6 |     preprocessing:
  7 |       word_tokenizer: space
  8 | 
  9 | training:
 10 |   early_stop: 7
 11 |   eval_batch_size: 256
 12 |   batch_size: 128
 13 | 
 14 | parameters:
 15 |   input_features.name.encoder: stacked_parallel_cnn
 16 | 
 17 |   input_features.name.stacked_layers:
 18 |     space: choice
 19 |     type: category
 20 |     categories:
 21 |       [
 22 |         [[{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }]],
 23 |         [
 24 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 25 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 26 |         ],
 27 |         [
 28 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 29 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 30 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 31 |         ],
 32 |         [
 33 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 34 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 35 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 36 |           [{ filter_size: 1 }, { filter_size: 2 }, { filter_size: 3 }],
 37 |         ],
 38 |         [[{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }]],
 39 |         [
 40 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 41 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 42 |         ],
 43 |         [
 44 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 45 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 46 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 47 |         ],
 48 |         [
 49 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 50 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 51 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 52 |           [{ filter_size: 2 }, { filter_size: 3 }, { filter_size: 4 }],
 53 |         ],
 54 |         [[{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }]],
 55 |         [
 56 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 57 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 58 |         ],
 59 |         [
 60 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 61 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 62 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 63 |         ],
 64 |         [
 65 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 66 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 67 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 68 |           [{ filter_size: 3 }, { filter_size: 4 }, { filter_size: 5 }],
 69 |         ],
 70 |         [[{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }]],
 71 |         [
 72 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 73 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 74 |         ],
 75 |         [
 76 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 77 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 78 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 79 |         ],
 80 |         [
 81 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 82 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 83 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 84 |           [{ filter_size: 4 }, { filter_size: 5 }, { filter_size: 6 }],
 85 |         ],
 86 |         [[{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }]],
 87 |         [
 88 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
 89 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
 90 |         ],
 91 |         [
 92 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
 93 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
 94 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
 95 |         ],
 96 |         [
 97 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
 98 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
 99 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
100 |           [{ filter_size: 5 }, { filter_size: 6 }, { filter_size: 7 }],
101 |         ],
102 |         [[{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }]],
103 |         [
104 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
105 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
106 |         ],
107 |         [
108 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
109 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
110 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
111 |         ],
112 |         [
113 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
114 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
115 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
116 |           [{ filter_size: 6 }, { filter_size: 7 }, { filter_size: 8 }],
117 |         ],
118 |         [[{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }]],
119 |         [
120 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
121 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
122 |         ],
123 |         [
124 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
125 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
126 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
127 |         ],
128 |         [
129 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
130 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
131 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
132 |           [{ filter_size: 7 }, { filter_size: 8 }, { filter_size: 9 }],
133 |         ],
134 |         [[{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }]],
135 |         [
136 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
137 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
138 |         ],
139 |         [
140 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
141 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
142 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
143 |         ],
144 |         [
145 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
146 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
147 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
148 |           [{ filter_size: 8 }, { filter_size: 9 }, { filter_size: 10 }],
149 |         ],
150 |       ]
151 | 
152 |   input_features.name.num_filters:
153 |     space: qrandint
154 |     type: int
155 |     lower: 100
156 |     upper: 600
157 |     steps: 100
158 |     scale: linear
159 | 
160 |   input_features.name.activation:
161 |     space: choice
162 |     type: category
163 |     categories: [tanh, relu]
164 | 
165 |   input_features.name.fc_layers:
166 |     space: choice
167 |     type: category
168 |     categories:
169 |       [
170 |         [{ fc_size: 512 }, { fc_size: 256 }],
171 |         [{ fc_size: 512 }],
172 |         [{ fc_size: 256 }],
173 |       ]
174 | 


--------------------------------------------------------------------------------
/model-configs/t5_hyperopt.yaml:
--------------------------------------------------------------------------------
 1 | input_features:
 2 |   - preprocessing:
 3 |       word_tokenizer: hf_tokenizer
 4 |     pretrained_model_name_or_path: t5-base
 5 | 
 6 | training:
 7 |   batch_size: 16
 8 |   early_stop: 3
 9 | 
10 | parameters:
11 |   input_features.name.encoder: t5
12 |   output_features.name.fc_layers:
13 |     space: choice
14 |     type: category
15 |     categories:
16 |       [
17 |         [{ fc_size: 512 }, { fc_size: 256 }],
18 |         [{ fc_size: 512 }],
19 |         [{ fc_size: 256 }],
20 |       ]
21 | 


--------------------------------------------------------------------------------
/upload_to_db.py:
--------------------------------------------------------------------------------
 1 | import ray
 2 | from database import Database, save_results_to_es
 3 | from utils.experiment_utils import *
 4 | 
 5 | # from experiment_driver import map_runstats_to_modelpath
 6 | import pickle
 7 | import os
 8 | import json
 9 | from utils.metadata_utils import append_experiment_metadata
10 | 
11 | ray.init(address="auto")
12 | 
13 | datasets = ["agnews"]
14 | encoders = ["rnn", "distilbert", "t5", "electra"]
15 | 
16 | elastic_config_file = "./elasticsearch_config.yaml"
17 | paths_to_dataset = {
18 |     "agnews": "/experiments/ludwig-bench-textclassification/data/agnews_1.0/processed/agnews.csv"
19 | }
20 | 
21 | 
22 | def main():
23 |     elastic_config = None
24 |     elastic_config = load_yaml(elastic_config_file)
25 | 
26 |     exp_info = []
27 |     for dataset in datasets:
28 |         for enc in encoders:
29 |             path_to_stats_file = f"/experiments/ludwig-bench-textclassification/experiment-outputs/{dataset}_{enc}/{dataset}_{enc}_hyperopt_results.pkl"
30 |             path_to_output_dir = f"/experiments/ludwig-bench-textclassification/experiment-outputs/{dataset}_{enc}/"
31 |             path_to_model_config = f"/experiments/ludwig-bench-textclassification/experiment-configs/config_{dataset}_{enc}.yaml"
32 |             model_config = load_yaml(path_to_model_config)
33 |             path_to_dataset = paths_to_dataset[dataset]
34 |             experiment_attr = {
35 |                 "model_config": copy.deepcopy(model_config),
36 |                 "dataset_path": path_to_dataset,
37 |                 "top_n_trials": None,
38 |                 "model_name": f"config_{dataset}_{enc}",
39 |                 "output_dir": path_to_output_dir,
40 |                 "encoder": enc,
41 |                 "dataset": dataset,
42 |                 "elastic_config": elastic_config,
43 |             }
44 |             hyperopt_results = pickle.load(open(path_to_stats_file, "rb"))
45 |             exp_info.append((experiment_attr, hyperopt_results))
46 | 
47 |     outputs = ray.get(
48 |         [
49 |             save_results_to_es.remote(info[0], info[1], "ray")
50 |             for info in exp_info
51 |         ]
52 |     )
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     main()


--------------------------------------------------------------------------------