├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── aml-tutorial
    ├── ReadMe.md
    ├── components
    │   ├── jsonl_guidance_component.yaml
    │   ├── jsonl_score_multiplechoice_component.yaml
    │   └── src
    │   │   ├── jsonl_guidance_aoai.py
    │   │   └── jsonl_score_multiplechoice.py
    ├── create_dataset.py
    ├── environments
    │   └── promptbase-basic-env.yaml
    ├── guidance_programs
    │   └── zero_shot.py
    ├── other_configs_example.json
    ├── requirements.txt
    └── run_experiment.py
├── azureml
    ├── ReadMe.md
    ├── components
    │   ├── jsonl_embeddings_aoai_component.yaml
    │   ├── jsonl_filter_correct_multiplechoice_component.yaml
    │   ├── jsonl_guidance_component.yaml
    │   ├── jsonl_guidance_phi2_component.yaml
    │   ├── jsonl_key_filter_component.yaml
    │   ├── jsonl_key_rename_component.yaml
    │   ├── jsonl_knn_cosine_similarity_component.yaml
    │   ├── jsonl_mmlu_fetch_component.yaml
    │   ├── jsonl_random_examples_component.yaml
    │   ├── jsonl_schema_checker_component.yaml
    │   ├── jsonl_score_biosbias_json_component.yaml
    │   ├── jsonl_score_multiplechoice_component.yaml
    │   ├── jsonl_to_json_component.yaml
    │   ├── src
    │   │   ├── jsonl_embeddings_aoai.py
    │   │   ├── jsonl_filter_correct_multiplechoice.py
    │   │   ├── jsonl_guidance_aoai.py
    │   │   ├── jsonl_guidance_phi2.py
    │   │   ├── jsonl_key_filter.py
    │   │   ├── jsonl_key_rename.py
    │   │   ├── jsonl_knn_cosine_similarity.py
    │   │   ├── jsonl_mmlu_fetch.py
    │   │   ├── jsonl_random_examples.py
    │   │   ├── jsonl_schema_check.py
    │   │   ├── jsonl_score_biosbias_json.py
    │   │   ├── jsonl_score_multiplechoice.py
    │   │   └── jsonl_to_json.py
    │   └── uri_folder_to_file_component.yaml
    ├── environments
    │   ├── phi2transformer-env.yaml
    │   └── promptbase-env.yaml
    ├── json_schemas
    │   ├── multichoice_schema.json
    │   └── multiplechoice_cot_schema.json
    ├── pipelines
    │   ├── azureml_pipelines.py
    │   ├── azureml_utils.py
    │   ├── configs.py
    │   ├── configs
    │   │   ├── aml_config_template.yaml
    │   │   ├── aoai_config_template.yaml
    │   │   ├── biosbias_json_config.yaml
    │   │   ├── biosbias_json_phi2_config.yaml
    │   │   ├── fewshot_knn_config.yaml
    │   │   ├── fewshot_random_config.yaml
    │   │   ├── knn_fewshot_cot_config.yaml
    │   │   ├── knn_fewshot_cot_ensemble_config.yaml
    │   │   ├── random_fewshot_cot_config.yaml
    │   │   ├── zeroshot_config.yaml
    │   │   └── zeroshot_cot_config.yaml
    │   ├── constants.py
    │   ├── logging_utils.py
    │   ├── submit_mmlu_fewshot_knn_cot.py
    │   ├── submit_mmlu_fewshot_random_cot.py
    │   ├── submit_mmlu_knn_fewshot.py
    │   ├── submit_mmlu_random_fewshot.py
    │   ├── submit_mmlu_zeroshot.py
    │   ├── submit_mmlu_zeroshot_cot.py
    │   ├── submit_simple_biosbias_json.py
    │   └── submit_simple_biosbias_json_phi2.py
    └── requirements.txt
├── guidance_programs
    ├── fewshot.py
    ├── fewshot_as_conversation.py
    ├── fewshot_cot_as_conversation.py
    ├── fewshot_cot_as_conversation_ensemble.py
    ├── simple_biosbias_json.py
    ├── simple_biosbias_json_completion.py
    ├── simple_biosbias_json_completion_v2.py
    ├── zero_or_few_shot.py
    ├── zero_or_few_shot_alpha.py
    ├── zero_or_few_shot_expert.py
    ├── zero_or_few_shot_fortran.py
    └── zero_shot_cot.py
├── images
    ├── medprompt_radar.png
    ├── medprompt_sa_graphic.png
    └── mmlu_accuracy_ablation.png
└── src
    ├── promptbase
        ├── __init__.py
        ├── __main__.py
        ├── bigbench
        │   ├── __init__.py
        │   ├── bigbench.py
        │   ├── bigbench_answer.py
        │   ├── bigbench_cot.py
        │   ├── bigbench_score.py
        │   └── consts.py
        ├── datasets
        │   └── put_datasets_here.txt
        ├── drop
        │   ├── __init__.py
        │   └── drop.py
        ├── format
        │   ├── format_hellaswag.py
        │   └── format_mmlu.py
        ├── generations
        │   └── README.md
        ├── gsm8k
        │   ├── __init__.py
        │   └── gsm8k.py
        ├── humaneval
        │   ├── __init__.py
        │   └── humaneval.py
        ├── math
        │   ├── __init__.py
        │   └── math.py
        ├── mmlu
        │   ├── MMLU.py
        │   ├── __init__.py
        │   ├── analyze.py
        │   ├── embed_problems.py
        │   ├── eval.py
        │   ├── experiment.py
        │   ├── generate.py
        │   ├── mmlu_paths.py
        │   ├── print_results.py
        │   ├── problem_utils.py
        │   ├── prompt_templates.py
        │   ├── test.py
        │   ├── tune_parameter
        │   │   ├── analyze.py
        │   │   └── summarize.py
        │   └── utils.py
        └── utils
        │   ├── __init__.py
        │   └── helpers.py
    └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | **/datasets/*
 2 | **/log.md
 3 | **/warnings.log
 4 | *.pyc
 5 | *.json
 6 | *.json.gz
 7 | env/
 8 | src/*.egg-info/*
 9 | src/promptbase/generations/*
10 | *.log
11 | *.jsonl
12 | src/promptbase/datasets/BigBench/**
13 | 
14 | 
15 | notebooks/*
16 | 
17 | # Don't include my actual configs
18 | azureml/pipelines/configs/aml_config.yaml
19 | azureml/pipelines/configs/aoai_config.yaml
20 | azureml/pipelines/configs/aoai_embedding_config.yaml
21 | 
22 | # Don't include Hydra output directory
23 | azureml/**/outputs/*
24 | 
25 | # DO include our schema jsons
26 | !azureml/json_schemas/*.json
27 | 
28 | # Do include the example other_config
29 | !aml-tutorial/other_configs_example.json


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # TODO: The maintainer of this repo has not yet edited this file
 2 | 
 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
 4 | 
 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
 8 | 
 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 | 
11 | # Support
12 | 
13 | ## How to file issues and get help  
14 | 
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
16 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
17 | feature request as a new Issue.
18 | 
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 | 
23 | ## Microsoft Support Policy  
24 | 
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 | 


--------------------------------------------------------------------------------
/aml-tutorial/components/jsonl_guidance_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_guidance_aoai
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Guidance Azure Open AI
 6 | type: command
 7 | description: Runs a supplied Guidance program on every line of a JSONL file
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   guidance_program:
12 |     type: uri_file
13 |     optional: false
14 |     description: Python file containing the guidance program
15 |   guidance_workers:
16 |     type: integer
17 |     optional: false
18 |     default: 4
19 |     description: Number of workers to use
20 |   max_errors:
21 |     type: integer
22 |     optional: false
23 |     default: 5
24 |     description: Maximum number of failed lines to tolerate
25 |   input_dataset:
26 |     type: uri_file
27 |     optional: false
28 |     description: Dataset containing JSONL input
29 |   azure_openai_endpoint:
30 |     type: string
31 |     optional: false
32 |     description: The AzureAI OpenaAI endpoitn to call
33 |   azure_openai_deployment:
34 |     type: string
35 |     optional: false
36 |     description: The name of the deployment from the portal
37 |   azure_openai_model:
38 |     type: string
39 |     optional: false
40 |     default: gpt-3.5-turbo
41 |     description: The OpenAI model behind the endpoint
42 |   azure_openai_api_version:
43 |     type: string
44 |     optional: false
45 |     description: The API version in use
46 | 
47 | outputs:
48 |   output_dataset:
49 |     type: uri_file
50 |     description: JSONL file
51 |   error_dataset:
52 |     type: uri_file
53 |     description: JSONL file containing failed lines
54 | 
55 | code: ./src/
56 | 
57 | command: >-
58 |   python ./jsonl_guidance_aoai.py
59 |   --guidance_program ${{ inputs.guidance_program }}
60 |   --guidance_workers ${{ inputs.guidance_workers }}
61 |   --max_errors ${{ inputs.max_errors }}
62 |   --input_dataset ${{ inputs.input_dataset }}
63 |   --azure_openai_endpoint ${{ inputs.azure_openai_endpoint }}
64 |   --azure_openai_deployment ${{ inputs.azure_openai_deployment }}
65 |   --azure_openai_model ${{ inputs.azure_openai_model }}
66 |   --azure_openai_api_version ${{ inputs.azure_openai_api_version }}
67 |   --output_dataset ${{ outputs.output_dataset }}
68 |   --error_dataset ${{ outputs.error_dataset }}
69 | 
70 | environment:
71 |   # Will be updated when component uploads
72 |   image: azureml:promptbase_basic@latest


--------------------------------------------------------------------------------
/aml-tutorial/components/jsonl_score_multiplechoice_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_score_multiplechoice
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Multiple Choice Scorer
 6 | type: command
 7 | description: |
 8 |   Takes a JSONL file of multiple choice questions and correct answers and responses
 9 |   from a model, and produces the overall score.
10 |   Results are stored in JSON
11 | is_deterministic: true
12 | 
13 | inputs:
14 |   input_dataset:
15 |     type: uri_file
16 |     optional: false
17 |     description: Dataset containing JSONL input
18 |   correct_key:
19 |     type: string
20 |     optional: false
21 |     description: Which key contains the correct answer
22 |   response_key:
23 |     type: string
24 |     optional: false
25 |     description: Which key contains the answer produced by the model
26 | 
27 | outputs:
28 |   output_dataset:
29 |     type: uri_file
30 |     description: JSON file containing score summary
31 | 
32 | 
33 | code: ./src/
34 | 
35 | command: >-
36 |   python ./jsonl_score_multiplechoice.py
37 |   --input_dataset ${{ inputs.input_dataset }}
38 |   --output_dataset ${{ outputs.output_dataset }}
39 |   --correct_key ${{ inputs.correct_key }}
40 |   --response_key ${{ inputs.response_key }}
41 | 
42 | environment:
43 |   # Will be updated when component uploads
44 |   image: azureml:promptbase_basic@latest


--------------------------------------------------------------------------------
/aml-tutorial/components/src/jsonl_guidance_aoai.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import importlib.util
  3 | import json
  4 | import pathlib
  5 | 
  6 | from typing import Any, Callable, Dict
  7 | 
  8 | from azure.identity import DefaultAzureCredential, get_bearer_token_provider
  9 | 
 10 | import guidance
 11 | 
 12 | from aether_utils.jsonl_utils_multiprocessing import line_map_mp, ItemMapper
 13 | from aether_utils.logging_utils import get_standard_logger_for_file
 14 | 
 15 | 
 16 | _logger = get_standard_logger_for_file(__file__)
 17 | 
 18 | USER_MODULE = "user_module"
 19 | GUIDANCE_FUNCTION = "guidance_generation"
 20 | 
 21 | 
 22 | def parse_args():
 23 |     parser = argparse.ArgumentParser(add_help=True)
 24 | 
 25 |     # Information about the datasets
 26 |     datasets_group = parser.add_argument_group("Datasets")
 27 |     datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
 28 |     datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
 29 |     datasets_group.add_argument("--error_dataset", type=pathlib.Path, required=True)
 30 | 
 31 |     # Information about the guidance program
 32 |     parser.add_argument("--guidance_program", type=pathlib.Path, required=True)
 33 |     parser.add_argument("--guidance_workers", type=int, required=True)
 34 |     parser.add_argument("--max_errors", type=int, required=True)
 35 | 
 36 |     # Information about the model
 37 |     model_group = parser.add_argument_group("Model Endpoint")
 38 |     model_group.add_argument("--azure_openai_endpoint", type=str, required=True)
 39 |     model_group.add_argument("--azure_openai_deployment", type=str, required=True)
 40 |     model_group.add_argument("--azure_openai_model", type=str, required=True)
 41 |     model_group.add_argument("--azure_openai_api_version", type=str, required=True)
 42 | 
 43 |     args = parser.parse_args()
 44 |     return args
 45 | 
 46 | 
 47 | def get_guidance_function(
 48 |     program_path: pathlib.Path,
 49 | ) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
 50 |     _logger.info(f"Importing guidance file: {program_path}")
 51 |     spec = importlib.util.spec_from_file_location(USER_MODULE, program_path)
 52 |     module_definition = importlib.util.module_from_spec(spec)
 53 |     spec.loader.exec_module(module_definition)
 54 | 
 55 |     guidance_func = getattr(module_definition, GUIDANCE_FUNCTION)
 56 |     _logger.info("Guidance program imported")
 57 | 
 58 |     return guidance_func
 59 | 
 60 | 
 61 | class GuidanceAzureML(ItemMapper):
 62 |     def __init__(
 63 |         self,
 64 |         *,
 65 |         program_path: pathlib.Path,
 66 |         endpoint: str,
 67 |         deployment: str,
 68 |         model: str,
 69 |         api_version: str,
 70 |     ):
 71 |         super().__init__()
 72 |         self._program_path = program_path
 73 |         self._endpoint = endpoint
 74 |         self._deployment = deployment
 75 |         self._model = model
 76 |         self._api_version = api_version
 77 | 
 78 |     def start_up(self, worker_id: int) -> None:
 79 |         _logger.info(f"Starting up {worker_id}")
 80 |         self._guidance_function = get_guidance_function(self._program_path)
 81 |         self._azure_credential = DefaultAzureCredential()
 82 |         _logger.info(f"Start up complete {worker_id}")
 83 | 
 84 |     def _get_model(self) -> guidance.models.Model:
 85 |         token_provider = get_bearer_token_provider(
 86 |             self._azure_credential, "https://cognitiveservices.azure.com/.default"
 87 |         )
 88 |         assert token_provider is not None
 89 |         _logger.info(f"Got token_provider")
 90 | 
 91 |         azureai_model = guidance.models.AzureOpenAI(
 92 |             model=self._model,
 93 |             azure_endpoint=self._endpoint,
 94 |             azure_deployment=self._deployment,
 95 |             version=self._api_version,
 96 |             azure_ad_token_provider=token_provider,
 97 |         )
 98 |         _logger.info(f"Created AzureOpenAI model")
 99 | 
100 |         return azureai_model
101 | 
102 |     def map(self, item: dict[str, any]) -> dict[str, any] | None:
103 |         _logger.info(f"map: {item}")
104 |         language_model = self._get_model()
105 |         result = self._guidance_function(language_model, item)
106 |         _logger.debug(f"Checking keys")
107 |         for k in result.keys():
108 |             assert k not in item, f"Duplicate key: {k}"
109 | 
110 |         _logger.info(f"Updating item")
111 |         item.update(**result)
112 | 
113 |         return item
114 | 
115 | 
116 | def main():
117 |     args = parse_args()
118 | 
119 |     # Bind arguments to the processor function
120 |     processor = GuidanceAzureML(
121 |         program_path=args.guidance_program,
122 |         endpoint=args.azure_openai_endpoint,
123 |         deployment=args.azure_openai_deployment,
124 |         model=args.azure_openai_model,
125 |         api_version=args.azure_openai_api_version,
126 |     )
127 | 
128 |     # Run the processing
129 |     line_map_mp(
130 |         mapper=processor,
131 |         source_file=args.input_dataset,
132 |         dest_file=args.output_dataset,
133 |         source_encoding="utf-8-sig",
134 |         dest_encoding="utf-8-sig",
135 |         error_file=args.error_dataset,
136 |         error_encoding="utf-8-sig",
137 |         n_worker_tasks=args.guidance_workers,
138 |         max_errors=args.max_errors,
139 |     )
140 | 
141 |     _logger.info("Complete")
142 | 
143 | 
144 | if __name__ == "__main__":
145 |     main()
146 | 


--------------------------------------------------------------------------------
/aml-tutorial/components/src/jsonl_score_multiplechoice.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import functools
 3 | import json
 4 | import pathlib
 5 | 
 6 | from typing import Any
 7 | 
 8 | import mlflow
 9 | import sklearn.metrics as skm
10 | 
11 | from aether_utils.jsonl_utils import line_reduce
12 | from aether_utils.logging_utils import get_standard_logger_for_file
13 | 
14 | _logger = get_standard_logger_for_file(__file__)
15 | 
16 | 
17 | class Scorer:
18 |     def __init__(self, correct_key: str, response_key: str):
19 |         self.y_true = []
20 |         self.y_pred = []
21 |         self.correct_key = correct_key
22 |         self.response_key = response_key
23 | 
24 |     def __call__(self, line: dict[str, Any]):
25 |         correct_answer = line[self.correct_key]
26 |         response_answer = line[self.response_key]
27 |         self.y_true.append(correct_answer)
28 |         self.y_pred.append(response_answer)
29 | 
30 |     def generate_summary(self) -> dict[str, Any]:
31 |         result = dict()
32 |         result["count"] = len(self.y_true)
33 |         result["accuracy"] = skm.accuracy_score(self.y_true, self.y_pred)
34 |         result["n_correct"] = skm.accuracy_score(
35 |             self.y_true, self.y_pred, normalize=False
36 |         )
37 |         return result
38 | 
39 | 
40 | def parse_args():
41 |     parser = argparse.ArgumentParser(add_help=True)
42 | 
43 |     # Information about the ports
44 |     ports_group = parser.add_argument_group("Ports")
45 |     ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
46 |     ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
47 | 
48 |     # Information about the keys
49 |     keys_group = parser.add_argument_group("Keys")
50 |     keys_group.add_argument("--correct_key", type=str, required=True)
51 |     keys_group.add_argument("--response_key", type=str, required=True)
52 | 
53 |     args = parser.parse_args()
54 | 
55 |     return args
56 | 
57 | 
58 | def main():
59 |     args = parse_args()
60 | 
61 |     scorer = Scorer(correct_key=args.correct_key, response_key=args.response_key)
62 |     line_reduce(
63 |         reducer=scorer,
64 |         source_file=args.input_dataset,
65 |         source_encoding="utf-8-sig",
66 |     )
67 |     summary = scorer.generate_summary()
68 | 
69 |     _logger.info("Logging with mlflow")
70 |     mlflow.log_metrics(summary)
71 |     _logger.info("Writing output file")
72 | 
73 |     with open(args.output_dataset, encoding="utf-8-sig", mode="w") as jf:
74 |         json.dump(summary, jf, indent=4)
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/aml-tutorial/create_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pathlib
  3 | import tempfile
  4 | import time
  5 | 
  6 | from typing import Any
  7 | 
  8 | import datasets
  9 | 
 10 | from azure.ai.ml import MLClient
 11 | from azure.ai.ml.constants import AssetTypes
 12 | from azure.ai.ml.entities import Data
 13 | 
 14 | from azure.identity import DefaultAzureCredential
 15 | 
 16 | from aether_utils.jsonl_file_utils import save_jsonl
 17 | from aether_utils.logging_utils import get_standard_logger_for_file
 18 | 
 19 | _logger = get_standard_logger_for_file(__file__)
 20 | 
 21 | MMLU_DATASETS = [
 22 |     "abstract_algebra",
 23 |     "anatomy",
 24 |     "astronomy",
 25 |     "business_ethics",
 26 |     "clinical_knowledge",
 27 |     "college_biology",
 28 |     "college_chemistry",
 29 |     "college_computer_science",
 30 |     "college_mathematics",
 31 |     "college_medicine",
 32 |     "college_physics",
 33 |     "computer_security",
 34 |     "conceptual_physics",
 35 |     "econometrics",
 36 |     "electrical_engineering",
 37 |     "elementary_mathematics",
 38 |     "formal_logic",
 39 |     "global_facts",
 40 |     "high_school_biology",
 41 |     "high_school_chemistry",
 42 |     "high_school_computer_science",
 43 |     "high_school_european_history",
 44 |     "high_school_geography",
 45 |     "high_school_government_and_politics",
 46 |     "high_school_macroeconomics",
 47 |     "high_school_mathematics",
 48 |     "high_school_microeconomics",
 49 |     "high_school_physics",
 50 |     "high_school_psychology",
 51 |     "high_school_statistics",
 52 |     "high_school_us_history",
 53 |     "high_school_world_history",
 54 |     "human_aging",
 55 |     "human_sexuality",
 56 |     "international_law",
 57 |     "jurisprudence",
 58 |     "logical_fallacies",
 59 |     "machine_learning",
 60 |     "management",
 61 |     "marketing",
 62 |     "medical_genetics",
 63 |     "miscellaneous",
 64 |     "moral_disputes",
 65 |     "moral_scenarios",
 66 |     "nutrition",
 67 |     "philosophy",
 68 |     "prehistory",
 69 |     "professional_accounting",
 70 |     "professional_law",
 71 |     "professional_medicine",
 72 |     "professional_psychology",
 73 |     "public_relations",
 74 |     "security_studies",
 75 |     "sociology",
 76 |     "us_foreign_policy",
 77 |     "virology",
 78 |     "world_religions",
 79 | ]
 80 | 
 81 | SPLITS = ["test", "validation", "dev"]
 82 | 
 83 | 
 84 | def parse_args():
 85 |     parser = argparse.ArgumentParser(add_help=True)
 86 | 
 87 |     mmlu_group = parser.add_argument_group(
 88 |         "MMLU Information", description="Options pertaining to the data"
 89 |     )
 90 |     mmlu_group.add_argument(
 91 |         "--mmlu_dataset",
 92 |         type=str,
 93 |         choices=MMLU_DATASETS,
 94 |         required=True,
 95 |         help="The name of the desired MMLU dataset",
 96 |     )
 97 |     mmlu_group.add_argument(
 98 |         "--split",
 99 |         type=str,
100 |         choices=SPLITS,
101 |         default="validation",
102 |         help="Which of the splits to use",
103 |     )
104 | 
105 |     aml_group = parser.add_argument_group(
106 |         "AzureML Information", description="Options pertaining to AzureML"
107 |     )
108 |     aml_group.add_argument(
109 |         "--workspace_config",
110 |         type=pathlib.Path,
111 |         default=pathlib.Path("./config.json"),
112 |         help="Path to config.json downloaded from AzureML workspace",
113 |     )
114 | 
115 |     args = parser.parse_args()
116 |     return args
117 | 
118 | 
119 | def process_data_split(data, subject: str) -> list[dict[str, Any]]:
120 |     all_questions = []
121 |     for line in data:
122 |         nxt = dict(
123 |             dataset="mmlu",
124 |             subject=subject,
125 |             question=line["question"],
126 |             choices=line["choices"],
127 |             correct_answer=line["answer"],
128 |         )
129 |         all_questions.append(nxt)
130 | 
131 |     return all_questions
132 | 
133 | 
134 | def main():
135 |     args = parse_args()
136 |     assert args.workspace_config.exists(), f"Could not find {args.workspace_config}"
137 | 
138 |     _logger.info("Creating AzureML client")
139 |     credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
140 |     ml_client = MLClient.from_config(credential, path=args.workspace_config)
141 | 
142 |     _logger.info(f"Fetching {args.mmlu_dataset}")
143 |     hf_data = datasets.load_dataset("tasksource/mmlu", args.mmlu_dataset)
144 | 
145 |     _logger.info(f"Reformatting data")
146 |     all_questions = process_data_split(hf_data[args.split], args.mmlu_dataset)
147 | 
148 |     with tempfile.TemporaryDirectory() as temp_dir:
149 |         out_dir = pathlib.Path(temp_dir)
150 | 
151 |         dataset_name = f"mmlu_{args.mmlu_dataset}_{args.split}"
152 | 
153 |         out_file = out_dir / f"{dataset_name}.jsonl"
154 |         save_jsonl(out_file, data=all_questions, destination_encoding="utf-8-sig")
155 | 
156 |         aml_data = Data(
157 |             name=dataset_name,
158 |             version=str(int(time.time())),
159 |             description="Sample multiple choice dataset",
160 |             path=out_file,
161 |             type=AssetTypes.URI_FILE,
162 |         )
163 |         returned_data = ml_client.data.create_or_update(aml_data)
164 |         _logger.info(
165 |             f"Created dataset {returned_data.name} at version {returned_data.version}"
166 |         )
167 | 
168 |     _logger.info("Complete")
169 | 
170 | 
171 | if __name__ == "__main__":
172 |     main()
173 | 


--------------------------------------------------------------------------------
/aml-tutorial/environments/promptbase-basic-env.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
 2 | 
 3 | name: promptbase_basic
 4 | description: |
 5 |   A simple environment for promptbase
 6 | 
 7 | image: mcr.microsoft.com/azureml/inference-base-2004
 8 | conda_file:
 9 |   channels:
10 |     - defaults
11 |   dependencies:
12 |     - python=3.12
13 |     - pip
14 |     - pip:
15 |       - aether-utils==0.0.1.dev1
16 |       - azure-identity
17 |       - azure-keyvault-secrets
18 |       - azureml-mlflow
19 |       - guidance>0.1.5
20 |       - jsonschema
21 |       - mlflow
22 |       - numpy
23 |       - openai>=1
24 |       - scikit-learn


--------------------------------------------------------------------------------
/aml-tutorial/guidance_programs/zero_shot.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for doing zero shot multiple choice questions
 2 | # It is not what generated the reported results
 3 | 
 4 | import logging
 5 | import sys
 6 | 
 7 | from typing import Any, Dict
 8 | 
 9 | import guidance
10 | from guidance import gen, select, system, user, assistant
11 | 
12 | 
13 | _logger = logging.getLogger(__file__)
14 | _logger.setLevel(logging.INFO)
15 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
16 | 
17 | 
18 | @guidance
19 | def zero_shot_multiple_choice(
20 |     lm: guidance.models.Model,
21 |     question: str,
22 |     choices: list[str],
23 | ):
24 |     # Some general instruction to the model
25 |     with system():
26 |         lm += """You are a student taking a multiple choice test.
27 | You will be shown a question, followed by numbered multiple choice answers.
28 | Respond with the number corresponding to the best answer.
29 | """
30 | 
31 |     with user():
32 |         lm += question + "\n"
33 |         for i, choice in enumerate(choices):
34 |             lm += f"{i} : {choice}\n"
35 |         lm += "Correct Answer: "
36 | 
37 |     with assistant():
38 |         lm += select([str(i) for i in range(len(choices))], name="string_choice")
39 | 
40 |     return lm
41 | 
42 | 
43 | def guidance_generation(
44 |     lm: guidance.models.Model,
45 |     input: Dict[str, Any],
46 | ) -> Dict[str, Any]:
47 |     _logger.info("Starting guidance_generation")
48 |     result = lm + zero_shot_multiple_choice(
49 |         question=input["question"], choices=input["choices"]
50 |     )
51 | 
52 |     _logger.info(f"Result: {result}")
53 | 
54 |     result = dict(zero_shot_choice=int(result["string_choice"]))
55 |     return result
56 | 


--------------------------------------------------------------------------------
/aml-tutorial/other_configs_example.json:
--------------------------------------------------------------------------------
1 | {
2 |     "aoai_endpoint": "https://SOMETHING.openai.azure.com/",
3 |     "aoai_deployment": "A_DEPLOYMENT_NAME",
4 |     "aoai_model": "gpt-4-32k",
5 |     "aoai_api_version": "2024-02-01",
6 |     "aoai_compute": "cluster_with_endpoint_permission",
7 |     "general_compute": "any_other_cluster"
8 | }


--------------------------------------------------------------------------------
/aml-tutorial/requirements.txt:
--------------------------------------------------------------------------------
1 | aether-utils==0.0.1.dev1
2 | azure-ai-ml
3 | datasets


--------------------------------------------------------------------------------
/azureml/ReadMe.md:
--------------------------------------------------------------------------------
 1 | # AzureML Pipelines
 2 | 
 3 | This directory contains [AzureML pipelines](https://learn.microsoft.com/en-us/azure/machine-learning/concept-ml-pipelines?view=azureml-api-2) to run various datasets through a given Azure AI endpoints, and assess the results.
 4 | The LLM prompting is done using the [`guidance` package](https://github.com/guidance-ai/guidance).
 5 | It is provided as an 'extra' and was not used to generated the reported results.
 6 | 
 7 | ## Contents
 8 | 
 9 | - `components`
10 |   This directory contains the Python [components](https://learn.microsoft.com/en-us/azure/machine-learning/concept-component?view=azureml-api-2) which are used in the AzureML pipelines
11 | - `environments`
12 |    This directory contains the definition of the [AzureML environment](https://learn.microsoft.com/en-us/azure/machine-learning/concept-environments?view=azureml-api-2) shared by the various components
13 | - `pipelines`
14 |    This directory contains the code required to submit the pipelines
15 | - `requirements.txt`
16 |    A standard `pip` file which will install the necessary packages for the pipeline submission to work
17 | 
18 | Furthermore, the actual `guidance` programs are in the top level `guidance_programs` directory in this repository.
19 | 
20 | ## Preparing to submit a pipeline
21 | 
22 | In order to submit a pipeline, you will need to give various pieces of information to the submission script (e.g. the AzureML workspace information).
23 | Look in the `pipelines/configs` directory, and you will see a number of `*_template.yaml` files.
24 | You will need to make copies without the '_template' suffix, and fill out the contents.
25 | For exmaple, the `aml_config_template.yaml` needs to be copied to `aml_config.yaml` (in the same directory) and filled out with appropriate information.
26 | 
27 | ## Submitting a pipeline
28 | 
29 | The pipeline submission scripts all have names prefixed with `submit_`.
30 | To run one:
31 | ```bash
32 | python ./submit_mmlu_zeroshot.py -cn zeroshot_config
33 | ```
34 | where `zeroshot_config` means the `zeroshot_config.yaml` file in the `configs` directory.


--------------------------------------------------------------------------------
/azureml/components/jsonl_embeddings_aoai_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_embeddings_aoai
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Embeddings Azure OpenAI
 6 | type: command
 7 | description: Get the AOAI embeddings for a given key in a JSONL file
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   workers:
12 |     type: integer
13 |     optional: false
14 |     default: 4
15 |     description: Number of workers to use
16 |   max_errors:
17 |     type: integer
18 |     optional: false
19 |     default: 5
20 |     description: Maximum number of failed lines to tolerate
21 |   input_dataset:
22 |     type: uri_file
23 |     optional: false
24 |     description: Dataset containing JSONL input
25 |   input_encoding:
26 |     type: string
27 |     optional: false
28 |     default: utf-8-sig
29 |     description: Encoding format of the input dataset
30 |   azure_openai_endpoint:
31 |     type: string
32 |     optional: false
33 |     description: The AzureAI OpenaAI endpoitn to call
34 |   source_key:
35 |     type: string
36 |     optional: false
37 |     description: Generate embeddings for this key
38 |   destination_key:
39 |     type: string
40 |     optional: false
41 |     description: Store embeddings in this key
42 | 
43 |   output_encoding:
44 |     type: string
45 |     optional: false
46 |     default: utf-8-sig
47 |     description: Encoding format of the output dataset
48 |   error_encoding:
49 |     type: string
50 |     optional: false
51 |     default: utf-8-sig
52 |     description: Encoding format of the error dataset
53 | 
54 | outputs:
55 |   output_dataset:
56 |     type: uri_file
57 |     description: JSONL file
58 |   error_dataset:
59 |     type: uri_file
60 |     description: JSONL file containing failed lines
61 | 
62 | code: ./src/
63 | 
64 | command: >-
65 |   python ./jsonl_embeddings_aoai.py
66 |   --workers ${{ inputs.workers }}
67 |   --max_errors ${{ inputs.max_errors }}
68 |   --input_dataset ${{ inputs.input_dataset }}
69 |   --input_encoding ${{ inputs.input_encoding }}
70 |   --azure_openai_endpoint ${{ inputs.azure_openai_endpoint }}
71 |   --output_dataset ${{ outputs.output_dataset }}
72 |   --output_encoding ${{ inputs.output_encoding }}
73 |   --error_dataset ${{ outputs.error_dataset }}
74 |   --error_encoding ${{ inputs.error_encoding }}
75 |   --source_key ${{ inputs.source_key }}
76 |   --destination_key ${{ inputs.destination_key }}
77 | 
78 | environment:
79 |   # Will be updated when component uploads
80 |   image: azureml:guidance_aml_env@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_filter_correct_multiplechoice_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_filter_correct_multiplechoice
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Multiple Choice Filter Correct
 6 | type: command
 7 | description: |
 8 |   Takes a JSONL file of multiple choice questions and correct answers and responses
 9 |   from a model, and filter out incorrect responses
10 | is_deterministic: true
11 | 
12 | inputs:
13 |   input_dataset:
14 |     type: uri_file
15 |     optional: false
16 |     description: Dataset containing JSONL input
17 |   input_encoding:
18 |     type: string
19 |     optional: false
20 |     default: utf-8-sig
21 |     description: Encoding format of the input dataset
22 |   correct_key:
23 |     type: string
24 |     optional: false
25 |     description: Which key contains the correct answer
26 |   response_key:
27 |     type: string
28 |     optional: false
29 |     description: Which key contains the answer produced by the model
30 |   output_encoding:
31 |     type: string
32 |     optional: false
33 |     default: utf-8-sig
34 |     description: Encoding format of the output dataset
35 | 
36 | outputs:
37 |   output_dataset:
38 |     type: uri_file
39 |     description: JSON file containing score summary
40 | 
41 | 
42 | code: ./src/
43 | 
44 | command: >-
45 |   python ./jsonl_filter_correct_multiplechoice.py
46 |   --input_dataset ${{ inputs.input_dataset }}
47 |   --input_encoding ${{ inputs.input_encoding }}
48 |   --output_dataset ${{ outputs.output_dataset }}
49 |   --output_encoding ${{ inputs.output_encoding }}
50 |   --correct_key ${{ inputs.correct_key }}
51 |   --response_key ${{ inputs.response_key }}
52 | 
53 | environment:
54 |   # Will be updated when component uploads
55 |   image: azureml:promptbase_aml@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_guidance_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_guidance_aoai
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Guidance Azure Open AI
 6 | type: command
 7 | description: Runs a supplied Guidance program on every line of a JSONL file
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   guidance_program:
12 |     type: uri_file
13 |     optional: false
14 |     description: Python file containing the guidance program
15 |   guidance_workers:
16 |     type: integer
17 |     optional: false
18 |     default: 4
19 |     description: Number of workers to use
20 |   max_errors:
21 |     type: integer
22 |     optional: false
23 |     default: 5
24 |     description: Maximum number of failed lines to tolerate
25 |   input_dataset:
26 |     type: uri_file
27 |     optional: false
28 |     description: Dataset containing JSONL input
29 |   input_encoding:
30 |     type: string
31 |     optional: false
32 |     default: utf-8-sig
33 |     description: Encoding format of the input dataset
34 |   common_dataset:
35 |     type: uri_file
36 |     optional: true
37 |     description: Dataset containing data to be shared with all rows in input
38 |   common_encoding:
39 |     type: string
40 |     optional: true
41 |     default: utf-8-sig
42 |     description: Encoding format of the common dataset
43 |   azure_openai_endpoint:
44 |     type: string
45 |     optional: false
46 |     description: The AzureAI OpenaAI endpoitn to call
47 |   azure_openai_deployed_model:
48 |     type: string
49 |     optional: false
50 |     default: gpt-3.5-turbo
51 |     description: The OpenAI model behind the endpoint
52 |   output_encoding:
53 |     type: string
54 |     optional: false
55 |     default: utf-8-sig
56 |     description: Encoding format of the output dataset
57 |   error_encoding:
58 |     type: string
59 |     optional: false
60 |     default: utf-8-sig
61 |     description: Encoding format of the error dataset
62 | 
63 | outputs:
64 |   output_dataset:
65 |     type: uri_file
66 |     description: JSONL file
67 |   error_dataset:
68 |     type: uri_file
69 |     description: JSONL file containing failed lines
70 | 
71 | code: ./src/
72 | 
73 | command: >-
74 |   python ./jsonl_guidance_aoai.py
75 |   --guidance_program ${{ inputs.guidance_program }}
76 |   --guidance_workers ${{ inputs.guidance_workers }}
77 |   --max_errors ${{ inputs.max_errors }}
78 |   --input_dataset ${{ inputs.input_dataset }}
79 |   --input_encoding ${{ inputs.input_encoding }}
80 |   $[[--common_dataset ${{ inputs.common_dataset }} ]]
81 |   $[[--common_encoding ${{ inputs.common_encoding }} ]]
82 |   --azure_openai_endpoint ${{ inputs.azure_openai_endpoint }}
83 |   --azure_openai_deployed_model ${{ inputs.azure_openai_deployed_model }}
84 |   --output_dataset ${{ outputs.output_dataset }}
85 |   --output_encoding ${{ inputs.output_encoding }}
86 |   --error_dataset ${{ outputs.error_dataset }}
87 |   --error_encoding ${{ inputs.error_encoding }}
88 | 
89 | environment:
90 |   # Will be updated when component uploads
91 |   image: azureml:guidance_aml_env@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_guidance_phi2_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_guidance_phi2
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Guidance Phi2
 6 | type: command
 7 | description: Runs a supplied Guidance program on every line of a JSONL file via Phi2
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   guidance_program:
12 |     type: uri_file
13 |     optional: false
14 |     description: Python file containing the guidance program
15 |   input_dataset:
16 |     type: uri_file
17 |     optional: false
18 |     description: Dataset containing JSONL input
19 |   input_encoding:
20 |     type: string
21 |     optional: false
22 |     default: utf-8-sig
23 |     description: Encoding format of the input dataset
24 |   common_dataset:
25 |     type: uri_file
26 |     optional: true
27 |     description: Dataset containing data to be shared with all rows in input
28 |   common_encoding:
29 |     type: string
30 |     optional: true
31 |     default: utf-8-sig
32 |     description: Encoding format of the common dataset
33 |   output_encoding:
34 |     type: string
35 |     optional: false
36 |     default: utf-8-sig
37 |     description: Encoding format of the output dataset
38 |   error_encoding:
39 |     type: string
40 |     optional: false
41 |     default: utf-8-sig
42 |     description: Encoding format of the error dataset
43 | 
44 | outputs:
45 |   output_dataset:
46 |     type: uri_file
47 |     description: JSONL file
48 |   error_dataset:
49 |     type: uri_file
50 |     description: JSONL file containing failed lines
51 | 
52 | code: ./src/
53 | 
54 | command: >-
55 |   python ./jsonl_guidance_phi2.py 
56 |   --guidance_program ${{ inputs.guidance_program }}
57 |   --input_dataset ${{ inputs.input_dataset }}
58 |   --input_encoding ${{ inputs.input_encoding }}
59 |   $[[--common_dataset ${{ inputs.common_dataset }} ]]
60 |   $[[--common_encoding ${{ inputs.common_encoding }} ]]
61 |   --output_dataset ${{ outputs.output_dataset }}
62 |   --output_encoding ${{ inputs.output_encoding }}
63 |   --error_dataset ${{ outputs.error_dataset }}
64 |   --error_encoding ${{ inputs.error_encoding }}
65 | 
66 | environment:
67 |   # Will be updated when component uploads
68 |   image: azureml:guidance_phi2_env@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_key_filter_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_key_filter
 4 | display_name: 'JSONL Key Filter'
 5 | type: command
 6 | description: |
 7 |   Filters keys in JSONL file. Either keeps keys from a specified list, or
 8 |   drops keys from a specified list
 9 | is_deterministic: true
10 | 
11 | inputs:
12 |   input_dataset:
13 |     type: uri_file
14 |     optional: false
15 |     description: Dataset containing JSONL input
16 |   input_encoding:
17 |     type: string
18 |     optional: false
19 |     default: utf-8-sig
20 |     description: Encoding format of the input dataset
21 |   keep_keys:
22 |     type: string
23 |     optional: true
24 |     description: Stringified JSON list of keys to keep. Mutually exclusive with drop_keys
25 |   drop_keys:
26 |     type: string
27 |     optional: true
28 |     description: Stringified JSON list of keys to drop. Mutually exclusive with keep_keys
29 |   output_encoding:
30 |     type: string
31 |     optional: false
32 |     default: utf-8-sig
33 |     description: Encoding format of the output dataset
34 | 
35 | outputs:
36 |   output_dataset:
37 |     type: uri_file
38 |     description: Dataset containing JSONL filtered keys
39 | 
40 | code: ./src
41 | 
42 | command: >-
43 |   python ./jsonl_key_filter.py
44 |   --input_dataset ${{ inputs.input_dataset }}
45 |   --input_encoding ${{ inputs.input_encoding }}
46 |   $[[--keep_keys '${{ inputs.keep_keys }}']]
47 |   $[[--drop_keys '${{ inputs.drop_keys }}']]
48 |   --output_dataset ${{ outputs.output_dataset }}
49 |   --output_encoding ${{ inputs.output_encoding }}
50 | 
51 | environment:
52 |   # Will be updated when component uploads
53 |   image: azureml:promptbase_aml@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_key_rename_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_key_rename
 4 | display_name: 'JSONL Key Rename'
 5 | type: command
 6 | description: |
 7 |   Renames keys in JSONL file.
 8 |   For example, if the `rename_keys` parameter is set to:
 9 |   ```json
10 |   { "a": "a_new" }
11 |   ```
12 |   then the file:
13 |   ```
14 |   { "a": 1, "b": 2 }
15 |   { "a": 2, "b": 3 }
16 |   ```
17 |   will become:
18 |   ```
19 |   { "a_new": 1, "b": 2 }
20 |   { "a_new": 2, "b": 3 }
21 |   ```
22 | is_deterministic: true
23 | 
24 | inputs:
25 |   input_dataset:
26 |     type: uri_file
27 |     optional: false
28 |     description: Dataset containing JSONL input
29 |   input_encoding:
30 |     type: string
31 |     optional: false
32 |     default: utf-8-sig
33 |     description: Encoding format of the input dataset
34 |   rename_keys:
35 |     type: string
36 |     optional: false
37 |     description: Stringified JSON dictionary of keys to rename
38 |   output_encoding:
39 |     type: string
40 |     optional: false
41 |     default: utf-8-sig
42 |     description: Encoding format of the output dataset
43 | 
44 | outputs:
45 |   output_dataset:
46 |     type: uri_file
47 |     description: Dataset containing JSONL with renamed keys
48 | 
49 | code: ./src
50 | 
51 | command: >-
52 |   python jsonl_key_rename.py
53 |   --input_dataset ${{ inputs.input_dataset }}
54 |   --input_encoding ${{ inputs.input_encoding }}
55 |   --rename_keys '${{ inputs.rename_keys }}'
56 |   --output_dataset ${{ outputs.output_dataset }}
57 |   --output_encoding ${{ inputs.output_encoding }}
58 | 
59 | environment:
60 |   # Will be updated when component uploads
61 |   image: azureml:promptbase_aml@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_knn_cosine_similarity_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_knn_cosine_similarity
 4 | version: 0.0.1pre1
 5 | display_name: JSONL k-Nearest Neighbours Cosine Similarity
 6 | type: command
 7 | description: |
 8 |   Takes two JSONL files, 'input' and 'examples'.
 9 |   Given a key containing a vector in each file, for each line in the input:
10 | 
11 |   1. Compute the cosine similarity to every line in the examples
12 |   2. Store the examples with the k largest values in the designated output key
13 | is_deterministic: true
14 | 
15 | inputs:
16 |   input_dataset:
17 |     type: uri_file
18 |     optional: false
19 |     description: Dataset containing JSONL input
20 |   input_encoding:
21 |     type: string
22 |     optional: false
23 |     default: utf-8-sig
24 |     description: Encoding format of the input dataset
25 |   example_dataset:
26 |     type: uri_file
27 |     optional: false
28 |     description: Dataset containing JSONL example data
29 |   example_encoding:
30 |     type: string
31 |     optional: false
32 |     default: utf-8-sig
33 |     description: Encoding format of the example dataset
34 |   output_encoding:
35 |     type: string
36 |     optional: false
37 |     default: utf-8-sig
38 |     description: Encoding format of the output dataset
39 |   input_vector_key:
40 |     type: string
41 |     optional: false
42 |     description: Key in the input dataset which contains the vector
43 |   example_vector_key:
44 |     type: string
45 |     optional: false
46 |     description: Key in the example dataset which contains the vector
47 |   output_key:
48 |     type: string
49 |     optional: false
50 |     description: Key in which to store the list of k-nearest neighbours
51 |   k_nearest:
52 |     type: integer
53 |     optional: false
54 |     description: How many neighbours to select
55 |   
56 |   
57 | outputs:
58 |   output_dataset:
59 |     type: uri_file
60 |     description: JSONL file containing inputs with k-nearest neighbours appended
61 | 
62 | 
63 | code: ./src/
64 | 
65 | command: >-
66 |   python ./jsonl_knn_cosine_similarity.py
67 |   --input_dataset ${{ inputs.input_dataset }}
68 |   --input_encoding ${{ inputs.input_encoding }}
69 |   --example_dataset ${{ inputs.example_dataset }}
70 |   --example_encoding ${{ inputs.example_encoding }}
71 |   --output_dataset ${{ outputs.output_dataset }}
72 |   --output_encoding ${{ inputs.output_encoding }}
73 |   --input_vector_key ${{ inputs.input_vector_key }}
74 |   --example_vector_key ${{ inputs.example_vector_key }}
75 |   --output_key ${{ inputs.output_key }}
76 |   --k_nearest ${{ inputs.k_nearest }}
77 | 
78 | 
79 | environment:
80 |   # Will be updated when component uploads
81 |   image: azureml:promptbase_aml@latest
82 | 


--------------------------------------------------------------------------------
/azureml/components/jsonl_mmlu_fetch_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_mmlu_fetch
 4 | version: 0.0.1pre1
 5 | display_name: JSONL MMLU Fetcher
 6 | type: command
 7 | description: Fetches a given MMLU dataset and exports to JSONL
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   mmlu_dataset:
12 |     type: string
13 |     optional: false
14 |     enum:
15 |     - anatomy
16 |     - astronomy
17 |     - clinical_knowledge
18 |     - college_biology
19 |     - college_medicine
20 |     - medical_genetics
21 |     - professional_medicine
22 |   output_encoding:
23 |     type: string
24 |     optional: false
25 |     default: utf-8-sig
26 |     description: Encoding format of the output datasets
27 | 
28 | outputs:
29 |   output_dataset:
30 |     type: uri_folder
31 |     description: |
32 |       Folder which will contain 'test.jsonl', 'valdation.jsonl' and 'dev.jsonl'
33 | 
34 | code: ./src/
35 | 
36 | command: >-
37 |   python ./jsonl_mmlu_fetch.py
38 |   --mmlu_dataset ${{ inputs.mmlu_dataset }}
39 |   --output_encoding ${{ inputs.output_encoding }}
40 |   --output_dataset ${{ outputs.output_dataset }}
41 | 
42 | environment:
43 |   # Will be updated when component uploads
44 |   image: azureml:promptbase_aml@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_random_examples_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_random_examples
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Random Examples
 6 | type: command
 7 | description: |
 8 |   Takes two JSONL files, 'input' and 'examples'.
 9 |   For each line in the input, selects a random number of examples
10 |   to include in the output key
11 | is_deterministic: true
12 | 
13 | inputs:
14 |   input_dataset:
15 |     type: uri_file
16 |     optional: false
17 |     description: Dataset containing JSONL input
18 |   input_encoding:
19 |     type: string
20 |     optional: false
21 |     default: utf-8-sig
22 |     description: Encoding format of the input dataset
23 |   example_dataset:
24 |     type: uri_file
25 |     optional: false
26 |     description: Dataset containing JSONL example data
27 |   example_encoding:
28 |     type: string
29 |     optional: false
30 |     default: utf-8-sig
31 |     description: Encoding format of the example dataset
32 |   output_encoding:
33 |     type: string
34 |     optional: false
35 |     default: utf-8-sig
36 |     description: Encoding format of the output dataset
37 |   output_key:
38 |     type: string
39 |     optional: false
40 |     description: Key in which to store the list of examples
41 |   num_examples:
42 |     type: integer
43 |     optional: false
44 |     description: How many examples to select
45 |   random_seed:
46 |     type: integer
47 |     optional: false
48 |     description: Seed for selecting random numbers
49 |   
50 |   
51 | outputs:
52 |   output_dataset:
53 |     type: uri_file
54 |     description: JSONL file containing inputs with examples appended
55 | 
56 | 
57 | code: ./src/
58 | 
59 | command: >-
60 |   python ./jsonl_random_examples.py
61 |   --input_dataset ${{ inputs.input_dataset }}
62 |   --input_encoding ${{ inputs.input_encoding }}
63 |   --example_dataset ${{ inputs.example_dataset }}
64 |   --example_encoding ${{ inputs.example_encoding }}
65 |   --output_dataset ${{ outputs.output_dataset }}
66 |   --output_encoding ${{ inputs.output_encoding }}
67 |   --output_key ${{ inputs.output_key }}
68 |   --num_examples ${{ inputs.num_examples }}
69 |   --random_seed ${{ inputs.random_seed }}
70 | 
71 | 
72 | environment:
73 |   # Will be updated when component uploads
74 |   image: azureml:promptbase_aml@latest
75 | 


--------------------------------------------------------------------------------
/azureml/components/jsonl_schema_checker_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_schema_check
 4 | display_name: 'JSONL Schema Check'
 5 | type: command
 6 | description: |
 7 |   Checks lines in a JSONL against a schema, removing those which do not match
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   input_dataset:
12 |     type: uri_file
13 |     optional: false
14 |     description: Dataset containing JSONL input
15 |   input_encoding:
16 |     type: string
17 |     optional: false
18 |     default: utf-8-sig
19 |     description: Encoding format of the input dataset
20 |   schema_dataset:
21 |     type: uri_file
22 |     optional: false
23 |     description: Dataset containing a JSON schema file
24 |   schema_encoding:
25 |     type: string
26 |     optional: false
27 |     default: utf-8-sig
28 |     description: Encoding format of the schema dataset
29 |   forbidden_keys:
30 |     type: string
31 |     optional: false
32 |     default: "[]"
33 |     description: Stringified JSON list of keys which must not appear in the input
34 |   max_errors:
35 |     type: integer
36 |     optional: false
37 |     default: 10
38 |     description: Maximum number of schema errors to tolerate
39 |   output_encoding:
40 |     type: string
41 |     optional: false
42 |     default: utf-8-sig
43 |     description: Encoding format of the output dataset
44 |   error_encoding:
45 |     type: string
46 |     optional: false
47 |     default: utf-8-sig
48 |     description: Encoding format of the error dataset
49 | 
50 | 
51 | outputs:
52 |   output_dataset:
53 |     type: uri_file
54 |     description: Dataset containing JSONL filtered keys
55 |   error_dataset:
56 |     type: uri_file
57 |     description: JSONL file containing failed lines
58 | 
59 | code: ./src
60 | 
61 | command: >-
62 |   python ./jsonl_schema_check.py
63 |   --input_dataset ${{ inputs.input_dataset }}
64 |   --input_encoding ${{ inputs.input_encoding }}
65 |   --schema_dataset ${{ inputs.schema_dataset }}
66 |   --schema_encoding ${{ inputs.schema_encoding }}
67 |   --forbidden_keys '${{ inputs.forbidden_keys }}'
68 |   --output_dataset ${{ outputs.output_dataset }}
69 |   --output_encoding ${{ inputs.output_encoding }}
70 |   --error_dataset ${{ outputs.error_dataset }}
71 |   --error_encoding ${{ inputs.error_encoding }}
72 |   --max_errors ${{ inputs.max_errors }}
73 | 
74 | environment:
75 |   # Will be updated when component uploads
76 |   image: azureml:promptbase_aml@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_score_biosbias_json_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_score_biosbias_json
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Score BIOSBIAS JSON Component
 6 | type: command
 7 | description: |
 8 |   Takes a JSONL file of results from running the JSON extraction
 9 |   test on BIOSBIAS, and computes correct answers.
10 |   Has a lot of hard coded knowledge
11 | is_deterministic: true
12 | 
13 | inputs:
14 |   input_dataset:
15 |     type: uri_file
16 |     optional: false
17 |     description: Dataset containing JSONL input
18 |   input_encoding:
19 |     type: string
20 |     optional: false
21 |     default: utf-8-sig
22 |     description: Encoding format of the input dataset
23 |   response_key:
24 |     type: string
25 |     optional: false
26 |     description: Which key contains the answer produced by the model
27 | 
28 | 
29 | code: ./src/
30 | 
31 | command: >-
32 |   python ./jsonl_score_biosbias_json.py
33 |   --input_dataset ${{ inputs.input_dataset }}
34 |   --input_encoding ${{ inputs.input_encoding }}
35 |   --response_key ${{ inputs.response_key }}
36 | 
37 | environment:
38 |   # Will be updated when component uploads
39 |   image: azureml:promptbase_aml@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_score_multiplechoice_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | 
 3 | name: jsonl_score_multiplechoice
 4 | version: 0.0.1pre1
 5 | display_name: JSONL Multiple Choice Scorer
 6 | type: command
 7 | description: |
 8 |   Takes a JSONL file of multiple choice questions and correct answers and responses
 9 |   from a model, and produces the overall score.
10 |   Results are stored in JSON
11 | is_deterministic: true
12 | 
13 | inputs:
14 |   input_dataset:
15 |     type: uri_file
16 |     optional: false
17 |     description: Dataset containing JSONL input
18 |   input_encoding:
19 |     type: string
20 |     optional: false
21 |     default: utf-8-sig
22 |     description: Encoding format of the input dataset
23 |   correct_key:
24 |     type: string
25 |     optional: false
26 |     description: Which key contains the correct answer
27 |   response_key:
28 |     type: string
29 |     optional: false
30 |     description: Which key contains the answer produced by the model
31 |   output_encoding:
32 |     type: string
33 |     optional: false
34 |     default: utf-8-sig
35 |     description: Encoding format of the output dataset
36 | 
37 | outputs:
38 |   output_dataset:
39 |     type: uri_file
40 |     description: JSON file containing score summary
41 | 
42 | 
43 | code: ./src/
44 | 
45 | command: >-
46 |   python ./jsonl_score_multiplechoice.py
47 |   --input_dataset ${{ inputs.input_dataset }}
48 |   --input_encoding ${{ inputs.input_encoding }}
49 |   --output_dataset ${{ outputs.output_dataset }}
50 |   --output_encoding ${{ inputs.output_encoding }}
51 |   --correct_key ${{ inputs.correct_key }}
52 |   --response_key ${{ inputs.response_key }}
53 | 
54 | environment:
55 |   # Will be updated when component uploads
56 |   image: azureml:promptbase_aml@latest


--------------------------------------------------------------------------------
/azureml/components/jsonl_to_json_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | type: command
 3 | 
 4 | name: jsonl_to_json
 5 | display_name: 'JSONL to JSON'
 6 | description: Convert a JSONL file to JSON
 7 | is_deterministic: true
 8 | 
 9 | inputs:
10 |   input_dataset:
11 |     type: uri_folder
12 |     optional: false
13 |     description: |
14 |       The source JSONL file
15 |   input_encoding:
16 |     type: string
17 |     optional: false
18 |     default: utf-8-sig
19 |     description: Encoding format of the input dataset
20 |   output_encoding:
21 |     type: string
22 |     optional: false
23 |     default: utf-8-sig
24 |     description: Encoding format of the output dataset
25 | 
26 | outputs:
27 |   output_dataset:
28 |     type: uri_file
29 |     description: The converted JSON file
30 | 
31 | 
32 | code: ./src/
33 | 
34 | command: >-
35 |   python ./jsonl_to_json.py
36 |   --input_dataset ${{ inputs.input_dataset }}
37 |   --input_encoding ${{ inputs.input_encoding }}
38 |   --output_dataset ${{ outputs.output_dataset }}
39 |   --output_encoding ${{ inputs.output_encoding }}
40 | 
41 | environment:
42 |   # Will be updated when component uploads
43 |   image: azureml:guidance_aml_env@latest


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_embeddings_aoai.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pathlib
  3 | 
  4 | from urllib.parse import urlparse, parse_qs
  5 | 
  6 | 
  7 | from azure.identity import DefaultAzureCredential, get_bearer_token_provider
  8 | 
  9 | from openai import AzureOpenAI
 10 | 
 11 | from aether_utils.jsonl_utils_multiprocessing import line_map_mp, ItemMapper
 12 | from aether_utils.logging_utils import get_standard_logger_for_file
 13 | 
 14 | 
 15 | _logger = get_standard_logger_for_file(__file__)
 16 | 
 17 | 
 18 | def parse_args():
 19 |     parser = argparse.ArgumentParser(add_help=True)
 20 | 
 21 |     # Information about the datasets
 22 |     datasets_group = parser.add_argument_group("Datasets")
 23 |     datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
 24 |     datasets_group.add_argument("--input_encoding", type=str, required=True)
 25 |     datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
 26 |     datasets_group.add_argument("--output_encoding", type=str, required=True)
 27 |     datasets_group.add_argument("--error_dataset", type=pathlib.Path, required=True)
 28 |     datasets_group.add_argument("--error_encoding", type=str, required=True)
 29 | 
 30 |     # Processing configuration
 31 |     processing_group = parser.add_argument_group("Processing configuration")
 32 |     processing_group.add_argument("--workers", type=int, required=True)
 33 |     processing_group.add_argument("--max_errors", type=int, required=True)
 34 | 
 35 |     # Information about the embeddings mode
 36 |     model_group = parser.add_argument_group("Model Endpoint")
 37 |     model_group.add_argument("--azure_openai_endpoint", type=str, required=True)
 38 | 
 39 |     # Information about the keys
 40 |     keys_group = parser.add_argument_group("JSON Keys")
 41 |     keys_group.add_argument("--source_key", type=str, required=True)
 42 |     keys_group.add_argument("--destination_key", type=str, required=True)
 43 | 
 44 |     args = parser.parse_args()
 45 |     return args
 46 | 
 47 | 
 48 | class AOAIEmbedder(ItemMapper):
 49 |     def __init__(self, endpoint: str, src_key: str, dst_key: str):
 50 |         super().__init__()
 51 |         self._endpoint = endpoint
 52 |         self._src_key = src_key
 53 |         self._dst_key = dst_key
 54 | 
 55 |     def start_up(self, worker_id: int) -> None:
 56 |         _logger.info(f"Starting up {worker_id}")
 57 |         self._azure_credential = DefaultAzureCredential()
 58 | 
 59 |     def _get_aoai_client(self) -> AzureOpenAI:
 60 |         token_provider = get_bearer_token_provider(
 61 |             self._azure_credential, "https://cognitiveservices.azure.com/.default"
 62 |         )
 63 |         assert token_provider is not None
 64 | 
 65 |         # Pending a fix going into the released version of guidance,
 66 |         # we can only work with chat models
 67 |         parsed_url = urlparse(self._endpoint)
 68 |         parsed_query = parse_qs(parsed_url.query)
 69 | 
 70 |         client = AzureOpenAI(
 71 |             azure_endpoint=self._endpoint,
 72 |             azure_ad_token_provider=token_provider,
 73 |             api_version=parsed_query["api-version"],
 74 |         )
 75 |         return client
 76 | 
 77 |     def map(self, item: dict[str, any]) -> dict[str, any] | None:
 78 |         _logger.debug(f"map: {item}")
 79 | 
 80 |         client = self._get_aoai_client()
 81 | 
 82 |         parsed_url = urlparse(self._endpoint)
 83 |         deployment_name = parsed_url.path.split("/")[3]
 84 |         _logger.debug(f"Got Deployment: {deployment_name}")
 85 | 
 86 |         embeddings = (
 87 |             client.embeddings.create(input=[item[self._src_key]], model=deployment_name)
 88 |             .data[0]
 89 |             .embedding
 90 |         )
 91 | 
 92 |         _logger.debug(f"Updating item")
 93 |         item[self._dst_key] = embeddings
 94 | 
 95 |         return item
 96 | 
 97 | 
 98 | def main():
 99 |     args = parse_args()
100 | 
101 |     # Bind arguments to the processor function
102 |     processor = AOAIEmbedder(
103 |         src_key=args.source_key,
104 |         dst_key=args.destination_key,
105 |         endpoint=args.azure_openai_endpoint,
106 |     )
107 | 
108 |     # Run the processing
109 |     line_map_mp(
110 |         mapper=processor,
111 |         source_file=args.input_dataset,
112 |         dest_file=args.output_dataset,
113 |         source_encoding=args.input_encoding,
114 |         dest_encoding=args.output_encoding,
115 |         error_file=args.error_dataset,
116 |         error_encoding=args.error_encoding,
117 |         n_worker_tasks=args.workers,
118 |         max_errors=args.max_errors,
119 |     )
120 | 
121 |     _logger.info("Complete")
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     main()
126 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_filter_correct_multiplechoice.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import functools
 3 | import pathlib
 4 | 
 5 | from aether_utils.jsonl_utils import line_map
 6 | from aether_utils.logging_utils import get_standard_logger_for_file
 7 | 
 8 | _logger = get_standard_logger_for_file(__file__)
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(add_help=True)
13 | 
14 |     # Information about the ports
15 |     ports_group = parser.add_argument_group("Ports")
16 |     ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
17 |     ports_group.add_argument("--input_encoding", type=str, required=True)
18 |     ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
19 |     ports_group.add_argument("--output_encoding", type=str, required=True)
20 | 
21 |     # Information about the keys
22 |     keys_group = parser.add_argument_group("Keys")
23 |     keys_group.add_argument("--correct_key", type=str, required=True)
24 |     keys_group.add_argument("--response_key", type=str, required=True)
25 | 
26 |     args = parser.parse_args()
27 | 
28 |     return args
29 | 
30 | 
31 | def process_item(
32 |     item: dict[str, any], *, correct_key: str, response_key: str
33 | ) -> dict[str, any]:
34 |     result = None
35 |     if item[correct_key] == item[response_key]:
36 |         result = item
37 |     return result
38 | 
39 | 
40 | def main():
41 |     args = parse_args()
42 | 
43 |     processor = functools.partial(
44 |         process_item, correct_key=args.correct_key, response_key=args.response_key
45 |     )
46 | 
47 |     s, f = line_map(
48 |         map_func=processor,
49 |         source_file=args.input_dataset,
50 |         dest_file=args.output_dataset,
51 |         source_encoding=args.input_encoding,
52 |         dest_encoding=args.output_encoding,
53 |     )
54 |     _logger.info(f"Complete with {s} successes and {f} failures")
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     main()
59 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_guidance_phi2.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import importlib.util
  3 | import json
  4 | import pathlib
  5 | 
  6 | from typing import Any, Callable, Dict
  7 | 
  8 | import guidance
  9 | 
 10 | import torch
 11 | from transformers import AutoModelForCausalLM, AutoTokenizer
 12 | 
 13 | from aether_utils.jsonl_utils import line_map
 14 | from aether_utils.logging_utils import get_standard_logger_for_file
 15 | 
 16 | 
 17 | _logger = get_standard_logger_for_file(__file__)
 18 | 
 19 | USER_MODULE = "user_module"
 20 | GUIDANCE_FUNCTION = "guidance_generation"
 21 | 
 22 | 
 23 | def parse_args():
 24 |     parser = argparse.ArgumentParser(add_help=True)
 25 | 
 26 |     # Information about the datasets
 27 |     datasets_group = parser.add_argument_group("Datasets")
 28 |     datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
 29 |     datasets_group.add_argument("--input_encoding", type=str, required=True)
 30 |     datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
 31 |     datasets_group.add_argument("--output_encoding", type=str, required=True)
 32 |     datasets_group.add_argument("--error_dataset", type=pathlib.Path, required=True)
 33 |     datasets_group.add_argument("--error_encoding", type=str, required=True)
 34 |     datasets_group.add_argument(
 35 |         "--common_dataset", type=pathlib.Path, required=False, default=None
 36 |     )
 37 |     datasets_group.add_argument("--common_encoding", type=str, required=False)
 38 | 
 39 |     # Information about the guidance program
 40 |     parser.add_argument("--guidance_program", type=pathlib.Path, required=True)
 41 | 
 42 |     args = parser.parse_args()
 43 |     return args
 44 | 
 45 | 
 46 | class Phi2Processor:
 47 |     def __init__(
 48 |         self,
 49 |         program_path,
 50 |         model: guidance.models.Model,
 51 |         common_data: dict[str, any] | None,
 52 |     ):
 53 |         self._program_path = program_path
 54 |         self._model = model
 55 |         self._guidance_function = self._get_guidance_function()
 56 |         self._common_data = common_data
 57 | 
 58 |     def __call__(self, item: Dict[str, Any]) -> dict[str, any]:
 59 |         _logger.debug(f"__call__: {item}")
 60 |         result = self._guidance_function(self._model, item, common=self._common_data)
 61 |         _logger.debug(f"Checking keys")
 62 |         for k in result.keys():
 63 |             assert k not in item, f"Duplicate key: {k}"
 64 | 
 65 |         _logger.debug(f"Updating item")
 66 |         item.update(**result)
 67 | 
 68 |         return item
 69 | 
 70 |     def _get_guidance_function(
 71 |         self,
 72 |     ) -> Callable[[Dict[str, Any]], Dict[str, Any]]:
 73 |         _logger.debug("Importing guidance file")
 74 |         spec = importlib.util.spec_from_file_location(USER_MODULE, self._program_path)
 75 |         module_definition = importlib.util.module_from_spec(spec)
 76 |         spec.loader.exec_module(module_definition)
 77 | 
 78 |         guidance_func = getattr(module_definition, GUIDANCE_FUNCTION)
 79 | 
 80 |         return guidance_func
 81 | 
 82 | 
 83 | def main():
 84 |     args = parse_args()
 85 | 
 86 |     # Load the common data (if required)
 87 |     common_data = None
 88 |     if args.common_dataset is not None:
 89 |         _logger.info("Loading common dataset")
 90 |         with open(args.common_dataset, "r", encoding=args.common_encoding) as jf:
 91 |             common_data = json.load(jf)
 92 |     else:
 93 |         _logger.info("No common dataset present")
 94 | 
 95 |     torch.set_default_device("cuda")
 96 |     guidance_model = guidance.models.Transformers(
 97 |         "microsoft/phi-2",
 98 |         device_map="cuda:0",
 99 |         echo=False,
100 |         trust_remote_code=True,
101 |     )
102 |     _logger.info(f"guidance_model.device: {guidance_model.engine.device}")
103 | 
104 |     processor = Phi2Processor(
105 |         program_path=args.guidance_program,
106 |         model=guidance_model,
107 |         common_data=common_data,
108 |     )
109 | 
110 |     s, f = line_map(
111 |         map_func=processor,
112 |         source_file=args.input_dataset,
113 |         dest_file=args.output_dataset,
114 |         source_encoding=args.input_encoding,
115 |         dest_encoding=args.output_encoding,
116 |     )
117 | 
118 |     _logger.info(f"Complete with {s} successes and {f} failures")
119 | 
120 | 
121 | if __name__ == "__main__":
122 |     main()
123 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_key_filter.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import functools
 3 | import pathlib
 4 | 
 5 | from typing import Any, Dict, List
 6 | 
 7 | from aether_utils.argparse_utils import json_loads_fixer
 8 | from aether_utils.jsonl_utils import line_map
 9 | from aether_utils.logging_utils import get_standard_logger_for_file
10 | 
11 | _logger = get_standard_logger_for_file(__file__)
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(add_help=True)
16 | 
17 |     # Information about the datasets
18 |     datasets_group = parser.add_argument_group("Datasets")
19 |     datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
20 |     datasets_group.add_argument("--input_encoding", type=str, required=True)
21 |     datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
22 |     datasets_group.add_argument("--output_encoding", type=str, required=True)
23 | 
24 |     # Filtering config
25 |     filtering_group = parser.add_mutually_exclusive_group(required=True)
26 |     filtering_group.add_argument(
27 |         "--keep_keys",
28 |         type=json_loads_fixer,
29 |         default=[],
30 |         help="JSON list of keys to keep",
31 |     )
32 |     filtering_group.add_argument(
33 |         "--drop_keys",
34 |         type=json_loads_fixer,
35 |         default=[],
36 |         help="JSON list of keys to drop",
37 |     )
38 | 
39 |     args = parser.parse_args()
40 |     return args
41 | 
42 | 
43 | def process_item(
44 |     item: Dict[str, Any], *, keep: List[str], drop: List[str]
45 | ) -> Dict[str, Any]:
46 |     result = dict()
47 | 
48 |     if len(keep) > 0:
49 |         _logger.info("Processing keeps")
50 |         for k in keep:
51 |             result[k] = item[k]
52 |     elif len(drop) > 0:
53 |         _logger.info("Processing drops")
54 |         for k, v in item.items():
55 |             assert k in item, f"Key {k} not in original!"
56 |             if k not in drop:
57 |                 result[k] = v
58 |     else:
59 |         raise ValueError("Shouldn't get here")
60 | 
61 |     return result
62 | 
63 | 
64 | def main():
65 |     args = parse_args()
66 | 
67 |     # Exclusivity taken care of by add_mutually_exclusive_group
68 |     assert (
69 |         len(args.keep_keys) > 0 or len(args.drop_keys) > 0
70 |     ), "Must either keep or drop something!"
71 | 
72 |     processor = functools.partial(
73 |         process_item, keep=args.keep_keys, drop=args.drop_keys
74 |     )
75 | 
76 |     line_map(
77 |         map_func=processor,
78 |         source_file=args.input_dataset,
79 |         dest_file=args.output_dataset,
80 |         source_encoding=args.input_encoding,
81 |         dest_encoding=args.output_encoding,
82 |     )
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     main()
87 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_key_rename.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import functools
 3 | import pathlib
 4 | 
 5 | from typing import Any, Dict, List
 6 | 
 7 | from aether_utils.argparse_utils import json_loads_fixer
 8 | from aether_utils.jsonl_utils import line_map
 9 | from aether_utils.logging_utils import get_standard_logger_for_file
10 | 
11 | 
12 | _logger = get_standard_logger_for_file(__file__)
13 | 
14 | 
15 | def parse_args():
16 |     parser = argparse.ArgumentParser(add_help=True)
17 | 
18 |     # Information about the datasets
19 |     datasets_group = parser.add_argument_group("Datasets")
20 |     datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
21 |     datasets_group.add_argument("--input_encoding", type=str, required=True)
22 |     datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
23 |     datasets_group.add_argument("--output_encoding", type=str, required=True)
24 | 
25 |     # Renaming config
26 |     parser.add_argument("--rename_keys", type=json_loads_fixer, required=True)
27 | 
28 |     args = parser.parse_args()
29 |     return args
30 | 
31 | 
32 | def process_item(item: Dict[str, Any], *, rename: Dict[str, str]) -> Dict[str, Any]:
33 |     result = dict()
34 | 
35 |     _logger.info("Processing renames")
36 |     for k in item:
37 |         if k in rename:
38 |             result[rename[k]] = item[k]
39 |         else:
40 |             result[k] = item[k]
41 |     return result
42 | 
43 | 
44 | def main():
45 |     args = parse_args()
46 | 
47 |     assert len(args.rename_keys) > 0, "Must rename at least one key!"
48 | 
49 |     processor = functools.partial(process_item, rename=args.rename_keys)
50 |     line_map(
51 |         map_func=processor,
52 |         source_file=args.input_dataset,
53 |         dest_file=args.output_dataset,
54 |         source_encoding=args.input_encoding,
55 |         dest_encoding=args.output_encoding,
56 |     )
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     main()
61 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_knn_cosine_similarity.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import functools
  3 | import pathlib
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | from aether_utils.jsonl_file_utils import load_jsonl
  9 | from aether_utils.jsonl_utils import line_map
 10 | from aether_utils.logging_utils import get_standard_logger_for_file
 11 | 
 12 | 
 13 | _logger = get_standard_logger_for_file(__file__)
 14 | 
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser(add_help=True)
 18 | 
 19 |     # Information about the datasets
 20 |     datasets_group = parser.add_argument_group("Datasets")
 21 |     datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
 22 |     datasets_group.add_argument("--input_encoding", type=str, required=True)
 23 |     datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
 24 |     datasets_group.add_argument("--output_encoding", type=str, required=True)
 25 |     datasets_group.add_argument("--example_dataset", type=pathlib.Path, required=True)
 26 |     datasets_group.add_argument("--example_encoding", type=str, required=True)
 27 | 
 28 |     # Information about keys
 29 |     key_group = parser.add_argument_group("Keys")
 30 |     key_group.add_argument("--input_vector_key", type=str, required=True)
 31 |     key_group.add_argument("--example_vector_key", type=str, required=True)
 32 |     key_group.add_argument("--output_key", type=str, required=True)
 33 | 
 34 |     # Information about the algorithm
 35 |     algo_group = parser.add_argument_group("Algorithm")
 36 |     algo_group.add_argument("--k_nearest", type=int, required=True)
 37 | 
 38 |     args = parser.parse_args()
 39 |     return args
 40 | 
 41 | 
 42 | def compute_knn(
 43 |     item: dict[str, any],
 44 |     *,
 45 |     examples: list[dict[str, any]],
 46 |     example_embedding_matrix: np.ndarray,
 47 |     input_vector_key: str,
 48 |     output_key: str,
 49 |     k_nearest: int,
 50 | ) -> dict[str, any]:
 51 |     _logger.debug(f"process_item: {item}")
 52 | 
 53 |     item_embedding = np.asarray(item[input_vector_key])
 54 |     _logger.debug(f"Item embedding {item_embedding.dtype} {item_embedding.shape}")
 55 | 
 56 |     similarities = np.matmul(example_embedding_matrix, item_embedding)
 57 |     # np.argsort is ascending, so we need to reverse
 58 |     sorted_indices = list(reversed(np.argsort(similarities).tolist()))
 59 |     top_k_indices = sorted_indices[0:k_nearest]
 60 |     _logger.debug(f"k nearest: {top_k_indices}")
 61 |     k_examples = []
 62 |     for k in top_k_indices:
 63 |         k_examples.append(examples[k])
 64 |     item[output_key] = k_examples
 65 |     del item[input_vector_key]
 66 | 
 67 |     return item
 68 | 
 69 | 
 70 | def normalised_vector(input: list[float]) -> np.ndarray:
 71 |     result = np.asarray(input)
 72 |     result = result / np.linalg.norm(result)
 73 | 
 74 |     return result
 75 | 
 76 | 
 77 | def main():
 78 |     args = parse_args()
 79 | 
 80 |     example_data = load_jsonl(args.example_dataset, args.example_encoding)
 81 |     example_embedding_matrix = np.stack(
 82 |         [normalised_vector(e[args.example_vector_key]) for e in example_data], axis=0
 83 |     )
 84 |     _logger.info(
 85 |         f"Embedding Matrix: {example_embedding_matrix.dtype} {example_embedding_matrix.shape}"
 86 |     )
 87 | 
 88 |     # Remove the vectors
 89 |     for e in example_data:
 90 |         del e[args.example_vector_key]
 91 | 
 92 |     # Construct the mapping function
 93 |     processor = functools.partial(
 94 |         compute_knn,
 95 |         examples=example_data,
 96 |         example_embedding_matrix=example_embedding_matrix,
 97 |         input_vector_key=args.input_vector_key,
 98 |         output_key=args.output_key,
 99 |         k_nearest=args.k_nearest,
100 |     )
101 | 
102 |     s, f = line_map(
103 |         map_func=processor,
104 |         source_file=args.input_dataset,
105 |         source_encoding=args.input_encoding,
106 |         dest_file=args.output_dataset,
107 |         dest_encoding=args.output_encoding,
108 |     )
109 | 
110 |     _logger.info(f"Complete with {s} successes and {f} failures")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_mmlu_fetch.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pathlib
  3 | 
  4 | from typing import Any
  5 | 
  6 | import datasets
  7 | 
  8 | from aether_utils.jsonl_file_utils import JSONLWriter
  9 | from aether_utils.logging_utils import get_standard_logger_for_file
 10 | 
 11 | _logger = get_standard_logger_for_file(__file__)
 12 | 
 13 | MMLU_DATASETS = [
 14 |     "abstract_algebra",
 15 |     "anatomy",
 16 |     "astronomy",
 17 |     "business_ethics",
 18 |     "clinical_knowledge",
 19 |     "college_biology",
 20 |     "college_chemistry",
 21 |     "college_computer_science",
 22 |     "college_mathematics",
 23 |     "college_medicine",
 24 |     "college_physics",
 25 |     "computer_security",
 26 |     "conceptual_physics",
 27 |     "econometrics",
 28 |     "electrical_engineering",
 29 |     "elementary_mathematics",
 30 |     "formal_logic",
 31 |     "global_facts",
 32 |     "high_school_biology",
 33 |     "high_school_chemistry",
 34 |     "high_school_computer_science",
 35 |     "high_school_european_history",
 36 |     "high_school_geography",
 37 |     "high_school_government_and_politics",
 38 |     "high_school_macroeconomics",
 39 |     "high_school_mathematics",
 40 |     "high_school_microeconomics",
 41 |     "high_school_physics",
 42 |     "high_school_psychology",
 43 |     "high_school_statistics",
 44 |     "high_school_us_history",
 45 |     "high_school_world_history",
 46 |     "human_aging",
 47 |     "human_sexuality",
 48 |     "international_law",
 49 |     "jurisprudence",
 50 |     "logical_fallacies",
 51 |     "machine_learning",
 52 |     "management",
 53 |     "marketing",
 54 |     "medical_genetics",
 55 |     "miscellaneous",
 56 |     "moral_disputes",
 57 |     "moral_scenarios",
 58 |     "nutrition",
 59 |     "philosophy",
 60 |     "prehistory",
 61 |     "professional_accounting",
 62 |     "professional_law",
 63 |     "professional_medicine",
 64 |     "professional_psychology",
 65 |     "public_relations",
 66 |     "security_studies",
 67 |     "sociology",
 68 |     "us_foreign_policy",
 69 |     "virology",
 70 |     "world_religions",
 71 | ]
 72 | 
 73 | DATASET_OPTIONS = [*MMLU_DATASETS, "all_medicine_datasets", "all_mmlu_datasets"]
 74 | 
 75 | SPLITS = ["test", "validation", "dev"]
 76 | 
 77 | 
 78 | def parse_args():
 79 |     parser = argparse.ArgumentParser(add_help=True)
 80 | 
 81 |     # Information about the ports
 82 |     ports_group = parser.add_argument_group("Ports")
 83 |     ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
 84 |     ports_group.add_argument("--output_encoding", type=str, required=True)
 85 | 
 86 |     parser.add_argument(
 87 |         "--mmlu_dataset", type=str, choices=DATASET_OPTIONS, required=True
 88 |     )
 89 | 
 90 |     args = parser.parse_args()
 91 |     return args
 92 | 
 93 | 
 94 | def process_data_split(data, subject: str) -> list[dict[str, Any]]:
 95 |     all_questions = []
 96 |     for line in data:
 97 |         nxt = dict(
 98 |             dataset="mmlu",
 99 |             subject=subject,
100 |             question=line["question"],
101 |             choices=line["choices"],
102 |             correct_answer=line["answer"],
103 |         )
104 |         all_questions.append(nxt)
105 | 
106 |     return all_questions
107 | 
108 | 
109 | def main():
110 |     args = parse_args()
111 |     _logger.info(f"Fetching {args.mmlu_dataset}")
112 | 
113 |     if args.mmlu_dataset == "all_medicine_datasets":
114 |         target_datasets = [
115 |             "anatomy",
116 |             "clinical_knowledge",
117 |             "college_biology",
118 |             "college_medicine",
119 |             "medical_genetics",
120 |             "professional_medicine",
121 |         ]
122 |     elif args.mmlu_dataset == "all_mmlu_datasets":
123 |         target_datasets = MMLU_DATASETS
124 |     else:
125 |         target_datasets = [args.mmlu_dataset]
126 | 
127 |     jsonl_writers: dict[str, JSONLWriter] = dict()
128 |     for split in SPLITS:
129 |         nxt_writer = JSONLWriter(
130 |             args.output_dataset / f"{split}.jsonl", args.output_encoding
131 |         )
132 |         nxt_writer.__enter__()
133 |         jsonl_writers[split] = nxt_writer
134 | 
135 |     for nxt_ds in target_datasets:
136 |         _logger.info(f"Processing dataset {nxt_ds}")
137 |         # Note that tasksource skips the huge 'train' file
138 |         hf_data = datasets.load_dataset("tasksource/mmlu", nxt_ds)
139 | 
140 |         for split in SPLITS:
141 |             _logger.info(f"Extracting split {split}")
142 |             extracted_data = process_data_split(hf_data[split], subject=nxt_ds)
143 |             _logger.info(f"Saving split {split}")
144 |             for line in extracted_data:
145 |                 jsonl_writers[split].write_line(line)
146 | 
147 |     _logger.info("Closing JSONL files")
148 |     for v in jsonl_writers.values():
149 |         v.__exit__()
150 | 
151 |     _logger.info("Complete")
152 | 
153 | 
154 | if __name__ == "__main__":
155 |     main()
156 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_random_examples.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import functools
 3 | import pathlib
 4 | import random
 5 | 
 6 | 
 7 | from aether_utils.jsonl_file_utils import load_jsonl
 8 | from aether_utils.jsonl_utils import line_map
 9 | from aether_utils.logging_utils import get_standard_logger_for_file
10 | 
11 | 
12 | _logger = get_standard_logger_for_file(__file__)
13 | 
14 | 
15 | def parse_args():
16 |     parser = argparse.ArgumentParser(add_help=True)
17 | 
18 |     # Information about the datasets
19 |     datasets_group = parser.add_argument_group("Datasets")
20 |     datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
21 |     datasets_group.add_argument("--input_encoding", type=str, required=True)
22 |     datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
23 |     datasets_group.add_argument("--output_encoding", type=str, required=True)
24 |     datasets_group.add_argument("--example_dataset", type=pathlib.Path, required=True)
25 |     datasets_group.add_argument("--example_encoding", type=str, required=True)
26 | 
27 |     # Information about keys
28 |     key_group = parser.add_argument_group("Keys")
29 |     key_group.add_argument("--output_key", type=str, required=True)
30 | 
31 |     # Information about the algorithm
32 |     algo_group = parser.add_argument_group("Algorithm")
33 |     algo_group.add_argument("--num_examples", type=int, required=True)
34 |     algo_group.add_argument("--random_seed", type=int, required=True)
35 | 
36 |     args = parser.parse_args()
37 |     return args
38 | 
39 | 
40 | def select_examples(
41 |     item: dict[str, any],
42 |     *,
43 |     examples: list[dict[str, any]],
44 |     num_examples: int,
45 |     output_key: str,
46 | ) -> dict[str, any]:
47 |     # Note that random.samples() is _without_ replacement
48 |     selected_examples = random.sample(examples, num_examples)
49 |     item[output_key] = selected_examples
50 |     return item
51 | 
52 | 
53 | def main():
54 |     args = parse_args()
55 | 
56 |     example_data = load_jsonl(args.example_dataset, args.example_encoding)
57 |     _logger.info("Loaded example file")
58 |     random.seed(args.random_seed)
59 | 
60 |     # Construct the mapping function
61 |     processor = functools.partial(
62 |         select_examples,
63 |         examples=example_data,
64 |         output_key=args.output_key,
65 |         num_examples=args.num_examples,
66 |     )
67 | 
68 |     s, f = line_map(
69 |         map_func=processor,
70 |         source_file=args.input_dataset,
71 |         source_encoding=args.input_encoding,
72 |         dest_file=args.output_dataset,
73 |         dest_encoding=args.output_encoding,
74 |     )
75 | 
76 |     _logger.info(f"Complete with {s} successes and {f} failures")
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_schema_check.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import functools
 3 | import json
 4 | import pathlib
 5 | 
 6 | from typing import Any, Dict, List
 7 | 
 8 | from jsonschema.protocols import Validator
 9 | from jsonschema.validators import Draft202012Validator
10 | 
11 | from aether_utils.argparse_utils import json_loads_fixer
12 | from aether_utils.jsonl_utils import line_map
13 | from aether_utils.logging_utils import get_standard_logger_for_file
14 | 
15 | _logger = get_standard_logger_for_file(__file__)
16 | 
17 | 
18 | def parse_args():
19 |     parser = argparse.ArgumentParser(add_help=True)
20 | 
21 |     # Information about the datasets
22 |     datasets_group = parser.add_argument_group("Datasets")
23 |     datasets_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
24 |     datasets_group.add_argument("--input_encoding", type=str, required=True)
25 |     datasets_group.add_argument("--schema_dataset", type=pathlib.Path, required=True)
26 |     datasets_group.add_argument("--schema_encoding", type=str, required=True)
27 |     datasets_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
28 |     datasets_group.add_argument("--output_encoding", type=str, required=True)
29 |     datasets_group.add_argument("--error_dataset", type=pathlib.Path, required=True)
30 |     datasets_group.add_argument("--error_encoding", type=str, required=True)
31 | 
32 |     # Forbidden keys
33 |     parser.add_argument("--forbidden_keys", type=json_loads_fixer, required=True)
34 | 
35 |     # Maximum error count
36 |     parser.add_argument("--max_errors", type=int, required=True)
37 | 
38 |     args = parser.parse_args()
39 |     return args
40 | 
41 | 
42 | def process_item(
43 |     item: Dict[str, Any], *, json_validator: Validator, forbidden_keys=list[str]
44 | ) -> Dict[str, Any]:
45 |     for k in forbidden_keys:
46 |         assert k not in item, f"Key {k} not allowed"
47 | 
48 |     json_validator.validate(item)
49 | 
50 |     return item
51 | 
52 | 
53 | def main():
54 |     args = parse_args()
55 | 
56 |     # Load in the JSON schema
57 |     with open(args.schema_dataset, "r", encoding=args.schema_encoding) as sf:
58 |         json_schema = json.load(sf)
59 | 
60 |     # Check the schema
61 |     Draft202012Validator.check_schema(json_schema)
62 | 
63 |     # Create the validator object
64 |     validator = Draft202012Validator(schema=json_schema)
65 | 
66 |     processor = functools.partial(
67 |         process_item, json_validator=validator, forbidden_keys=args.forbidden_keys
68 |     )
69 | 
70 |     line_map(
71 |         map_func=processor,
72 |         source_file=args.input_dataset,
73 |         dest_file=args.output_dataset,
74 |         source_encoding=args.input_encoding,
75 |         dest_encoding=args.output_encoding,
76 |         error_file=args.error_dataset,
77 |         error_encoding=args.error_encoding,
78 |         max_errors=args.max_errors,
79 |     )
80 |     _logger.info("Complete")
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     main()
85 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_score_biosbias_json.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import functools
  3 | import json
  4 | import pathlib
  5 | 
  6 | from typing import Any
  7 | 
  8 | import mlflow
  9 | import sklearn.metrics as skm
 10 | 
 11 | from aether_utils.jsonl_utils import line_reduce
 12 | from aether_utils.logging_utils import get_standard_logger_for_file
 13 | 
 14 | _logger = get_standard_logger_for_file(__file__)
 15 | 
 16 | 
 17 | class Scorer:
 18 |     def __init__(self, response_key: str):
 19 |         self.total_count = 0
 20 |         self.good_json_count = 0
 21 |         self.json_keys_count = 0
 22 |         self.correct_name_count = 0
 23 |         self.correct_occupation_count = 0
 24 |         self.response_key = response_key
 25 | 
 26 |     def __call__(self, line: dict[str, Any]):
 27 |         self.total_count += 1
 28 |         response_answer = line[self.response_key]
 29 |         try:
 30 |             decoded_response = json.loads(response_answer)
 31 |             self.good_json_count += 1
 32 | 
 33 |             EXPECTED_KEYS = ["name", "occupation"]
 34 | 
 35 |             if all([k in decoded_response.keys() for k in EXPECTED_KEYS]):
 36 |                 self.json_keys_count += 1
 37 | 
 38 |             if self.fuzzy_string_match(
 39 |                 generated=decoded_response["name"], target=line["entity"]
 40 |             ):
 41 |                 self.correct_name_count += 1
 42 |             if self.fuzzy_string_match(
 43 |                 generated=decoded_response["occupation"], target=line["target_mediated"]
 44 |             ):
 45 |                 self.correct_occupation_count += 1
 46 |         except:
 47 |             pass
 48 | 
 49 |     def fuzzy_string_match(self, *, target: str, generated: str) -> bool:
 50 |         # I believe that this is the ultimate comparison done by:
 51 |         # https://github.com/QingruZhang/PASTA/blob/b28e6307896df9f91c282ecf0201fa7bebdad0d6/evaluation/evaluator.py#L233
 52 |         return target.lower() in generated.lower()
 53 | 
 54 |     def generate_summary(self) -> dict[str, Any]:
 55 |         result = dict()
 56 |         result["metrics"] = dict()
 57 | 
 58 |         result["metrics"]["total"] = self.total_count
 59 |         result["metrics"]["good_json"] = self.good_json_count
 60 |         result["metrics"]["json_keys"] = self.json_keys_count
 61 |         result["metrics"]["correct_name"] = self.correct_name_count
 62 |         result["metrics"]["correct_occupation"] = self.correct_occupation_count
 63 |         return result
 64 | 
 65 | 
 66 | def parse_args():
 67 |     parser = argparse.ArgumentParser(add_help=True)
 68 | 
 69 |     # Information about the ports
 70 |     ports_group = parser.add_argument_group("Ports")
 71 |     ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
 72 |     ports_group.add_argument("--input_encoding", type=str, required=True)
 73 | 
 74 |     # Information about the keys
 75 |     keys_group = parser.add_argument_group("Keys")
 76 |     keys_group.add_argument("--response_key", type=str, required=True)
 77 | 
 78 |     args = parser.parse_args()
 79 | 
 80 |     return args
 81 | 
 82 | 
 83 | def main():
 84 |     args = parse_args()
 85 | 
 86 |     scorer = Scorer(response_key=args.response_key)
 87 |     line_reduce(
 88 |         reducer=scorer,
 89 |         source_file=args.input_dataset,
 90 |         source_encoding=args.input_encoding,
 91 |     )
 92 |     summary = scorer.generate_summary()
 93 | 
 94 |     _logger.info("Logging with mlflow")
 95 |     mlflow.log_metrics(summary["metrics"])
 96 | 
 97 | 
 98 | if __name__ == "__main__":
 99 |     main()
100 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_score_multiplechoice.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import functools
  3 | import json
  4 | import pathlib
  5 | 
  6 | from typing import Any
  7 | 
  8 | import fairlearn.metrics as flm
  9 | import mlflow
 10 | import sklearn.metrics as skm
 11 | 
 12 | from aether_utils.jsonl_utils import line_reduce
 13 | from aether_utils.logging_utils import get_standard_logger_for_file
 14 | 
 15 | _logger = get_standard_logger_for_file(__file__)
 16 | 
 17 | 
 18 | class Scorer:
 19 |     def __init__(self, correct_key: str, response_key: str):
 20 |         self.y_true = []
 21 |         self.y_pred = []
 22 |         self.dataset = []
 23 |         self.subject = []
 24 |         self.correct_key = correct_key
 25 |         self.response_key = response_key
 26 | 
 27 |     def __call__(self, line: dict[str, Any]):
 28 |         correct_answer = line[self.correct_key]
 29 |         response_answer = line[self.response_key]
 30 |         self.y_true.append(correct_answer)
 31 |         self.y_pred.append(response_answer)
 32 |         if "dataset" in line:
 33 |             self.dataset.append(line["dataset"])
 34 |         else:
 35 |             self.dataset.append("No dataset")
 36 |         if "subject" in line:
 37 |             self.subject.append(line["subject"])
 38 |         else:
 39 |             self.subject.append("No subject")
 40 | 
 41 |     def generate_summary(self) -> dict[str, Any]:
 42 |         metrics = {
 43 |             "count": flm.count,
 44 |             "accuracy": skm.accuracy_score,
 45 |             "n_correct": functools.partial(skm.accuracy_score, normalize=False),
 46 |         }
 47 | 
 48 |         mf = flm.MetricFrame(
 49 |             metrics=metrics,
 50 |             y_true=self.y_true,
 51 |             y_pred=self.y_pred,
 52 |             sensitive_features=dict(dataset=self.dataset, subject=self.subject),
 53 |         )
 54 | 
 55 |         result = dict()
 56 |         result["metrics"] = mf
 57 |         result["figures"] = dict()
 58 |         cm_display = skm.ConfusionMatrixDisplay.from_predictions(
 59 |             self.y_true, self.y_pred
 60 |         )
 61 |         result["figures"]["confusion_matrix"] = cm_display.figure_
 62 |         return result
 63 | 
 64 | 
 65 | def parse_args():
 66 |     parser = argparse.ArgumentParser(add_help=True)
 67 | 
 68 |     # Information about the ports
 69 |     ports_group = parser.add_argument_group("Ports")
 70 |     ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
 71 |     ports_group.add_argument("--input_encoding", type=str, required=True)
 72 |     ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
 73 |     ports_group.add_argument("--output_encoding", type=str, required=True)
 74 | 
 75 |     # Information about the keys
 76 |     keys_group = parser.add_argument_group("Keys")
 77 |     keys_group.add_argument("--correct_key", type=str, required=True)
 78 |     keys_group.add_argument("--response_key", type=str, required=True)
 79 | 
 80 |     args = parser.parse_args()
 81 | 
 82 |     return args
 83 | 
 84 | 
 85 | def main():
 86 |     args = parse_args()
 87 | 
 88 |     scorer = Scorer(correct_key=args.correct_key, response_key=args.response_key)
 89 |     line_reduce(
 90 |         reducer=scorer,
 91 |         source_file=args.input_dataset,
 92 |         source_encoding=args.input_encoding,
 93 |     )
 94 |     summary = scorer.generate_summary()
 95 | 
 96 |     _logger.info("Logging with mlflow")
 97 |     mlflow.log_metrics(summary["metrics"].overall.to_dict())
 98 |     for k, v in summary["figures"].items():
 99 |         mlflow.log_figure(v, f"{k}.png")
100 | 
101 |     _logger.info("Writing output file")
102 | 
103 |     by_group_dict = dict()
104 |     # Due to how MetricFrame does its indexing, we have to unpack the
105 |     # key into another level of nesting
106 |     for k, v in summary["metrics"].by_group.to_dict(orient="index").items():
107 |         if k[0] not in by_group_dict:
108 |             by_group_dict[k[0]] = dict()
109 |         by_group_dict[k[0]][k[1]] = v
110 | 
111 |     output_dict = dict(
112 |         overall=summary["metrics"].overall.to_dict(),
113 |         details=by_group_dict,
114 |     )
115 |     print(f"output_dict:\n {json.dumps(output_dict,indent=4)}")
116 |     with open(args.output_dataset, encoding=args.output_encoding, mode="w") as jf:
117 |         json.dump(output_dict, jf, indent=4)
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     main()
122 | 


--------------------------------------------------------------------------------
/azureml/components/src/jsonl_to_json.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import pathlib
 4 | 
 5 | 
 6 | from aether_utils.jsonl_utils import line_reduce
 7 | from aether_utils.logging_utils import get_standard_logger_for_file
 8 | 
 9 | _logger = get_standard_logger_for_file(__file__)
10 | 
11 | 
12 | class ContentAccumulator:
13 |     def __init__(self):
14 |         self.contents = []
15 | 
16 |     def __call__(self, line: dict[str, any]):
17 |         self.contents.append(line)
18 | 
19 | 
20 | def parse_args():
21 |     parser = argparse.ArgumentParser(add_help=True)
22 | 
23 |     # Information about the ports
24 |     ports_group = parser.add_argument_group("Ports")
25 |     ports_group.add_argument("--input_dataset", type=pathlib.Path, required=True)
26 |     ports_group.add_argument("--input_encoding", type=str, required=True)
27 |     ports_group.add_argument("--output_dataset", type=pathlib.Path, required=True)
28 |     ports_group.add_argument("--output_encoding", type=str, required=True)
29 | 
30 |     args = parser.parse_args()
31 | 
32 |     return args
33 | 
34 | 
35 | def main():
36 |     args = parse_args()
37 | 
38 |     _logger.info("Starting accumulation")
39 |     acc = ContentAccumulator()
40 |     line_reduce(
41 |         reducer=acc,
42 |         source_file=args.input_dataset,
43 |         source_encoding=args.input_encoding,
44 |     )
45 |     _logger.info("All lines accumulated")
46 | 
47 |     with open(args.output_dataset, "w", encoding=args.output_encoding) as jf:
48 |         json.dump(acc.contents, jf, indent=4)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/azureml/components/uri_folder_to_file_component.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | type: command
 3 | 
 4 | name: uri_folder_to_file
 5 | display_name: 'uri_folder to uri_file'
 6 | description: Extract single uri_file from uri_folder
 7 | is_deterministic: true
 8 | 
 9 | inputs:
10 |   input_dataset:
11 |     type: uri_folder
12 |     optional: false
13 |     description: |
14 |       A folder dataset containing the desired file
15 | 
16 |   filename_pattern:
17 |     type: string
18 |     optional: false
19 |     description: Pattern to select the required file
20 | 
21 | outputs:
22 |   output_dataset:
23 |     type: uri_file
24 |     description: The matched file
25 | 
26 | command: >-
27 |   cp ${{ inputs.input_dataset }}/${{ inputs.filename_pattern }} ${{ outputs.output_dataset }}
28 | 
29 | 
30 | environment:
31 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20231011.v1


--------------------------------------------------------------------------------
/azureml/environments/phi2transformer-env.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
 2 | 
 3 | name: phi2_transformer
 4 | description: |
 5 |   A simple environment running Phi2 from Hugging Face
 6 | 
 7 | image: mcr.microsoft.com/azureml/minimal-ubuntu22.04-py39-cuda11.8-gpu-inference:20240122.v1
 8 | conda_file:
 9 |   channels:
10 |     - defaults
11 |   dependencies:
12 |     - python=3.11
13 |     - pip
14 |     - pip:
15 |       # Note that we have to force torch to install from this index
16 |       # in order to match the CUDA driver...
17 |       - --index-url https://download.pytorch.org/whl/cu118
18 |       - torch
19 |       # ... so we have to add PyPI back in as an alternative index
20 |       - --extra-index-url https://pypi.org/simple
21 |       - accelerate
22 |       - aether-utils==0.0.1.dev1
23 |       - guidance>=0.1.13
24 |       - transformers


--------------------------------------------------------------------------------
/azureml/environments/promptbase-env.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
 2 | 
 3 | name: promptbase_aml
 4 | description: |
 5 |   A simple environment for promptbase
 6 | 
 7 | image: mcr.microsoft.com/azureml/inference-base-2004
 8 | conda_file:
 9 |   channels:
10 |     - defaults
11 |   dependencies:
12 |     - python=3.11
13 |     - pip
14 |     - pip:
15 |       - aether-utils==0.0.1.dev1
16 |       - azure-identity
17 |       - azure-keyvault-secrets
18 |       - azureml-mlflow
19 |       - fairlearn
20 |       - datasets
21 |       - guidance>=0.1.13
22 |       - jsonschema
23 |       - mlflow
24 |       - numpy
25 |       - openai>=1
26 |       - scikit-learn


--------------------------------------------------------------------------------
/azureml/json_schemas/multichoice_schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://json-schema.org/draft/2020-12/schema",
 3 |     "$id": "promptbase.multiplechoice",
 4 |     "title": "Multiple Choice Question",
 5 |     "description": "A sample multiple choice question",
 6 |     "type": "object",
 7 |     "properties": {
 8 |         "question": {
 9 |             "description": "The question being asked",
10 |             "type": "string"
11 |         },
12 |         "choices": {
13 |             "description": "A list of possible answers to the question",
14 |             "type": "array",
15 |             "items": {
16 |                 "type": "string"
17 |             },
18 |             "minItems": 2,
19 |             "uniqueItems": true
20 |         },
21 |         "correct_answer": {
22 |             "description": "The index of the correct answer within the 'choices' array",
23 |             "type": "integer",
24 |             "minimum": 0
25 |         }
26 |     },
27 |     "required": [
28 |         "question",
29 |         "choices",
30 |         "correct_answer"
31 |     ]
32 | }


--------------------------------------------------------------------------------
/azureml/json_schemas/multiplechoice_cot_schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://json-schema.org/draft/2020-12/schema",
 3 |     "$id": "promptbase.multiplechoice_cot",
 4 |     "title": "Multiple Choice Question with Chain-of-Thought",
 5 |     "description": "A sample multiple choice question with a chain of thought",
 6 |     "type": "object",
 7 |     "properties": {
 8 |         "question": {
 9 |             "description": "The question being asked",
10 |             "type": "string"
11 |         },
12 |         "choices": {
13 |             "description": "A list of possible answers to the question",
14 |             "type": "array",
15 |             "items": {
16 |                 "type": "string"
17 |             },
18 |             "minItems": 2,
19 |             "uniqueItems": true
20 |         },
21 |         "chain_of_thought": {
22 |             "description": "A chain of thought leading to the correct answer",
23 |             "type": "string"
24 |         },
25 |         "correct_answer": {
26 |             "description": "The index of the correct answer within the 'choices' array",
27 |             "type": "integer",
28 |             "minimum": 0
29 |         }
30 |     },
31 |     "required": [
32 |         "question",
33 |         "choices",
34 |         "chain_of_thought",
35 |         "correct_answer"
36 |     ]
37 | }


--------------------------------------------------------------------------------
/azureml/pipelines/azureml_utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import pathlib
  3 | 
  4 | 
  5 | from azure.ai.ml import load_component, MLClient, load_environment
  6 | from azure.ai.ml.entities import Component, Environment
  7 | 
  8 | from constants import COMPONENTS_DIR, ENVIRONMENT_FILE, PHI2_ENVIRONMENT_FILE
  9 | 
 10 | 
 11 | _logger = logging.getLogger(__file__)
 12 | _logger.setLevel(logging.INFO)
 13 | 
 14 | # This dictionary lists the attributes to be added to ComponentCollector
 15 | ALL_COMPONENTS = dict(
 16 |     jsonl_embeddings="jsonl_embeddings_aoai_component.yaml",
 17 |     jsonl_filter_correct_multiplechoice="jsonl_filter_correct_multiplechoice_component.yaml",
 18 |     jsonl_guidance="jsonl_guidance_component.yaml",
 19 |     jsonl_key_filter="jsonl_key_filter_component.yaml",
 20 |     jsonl_key_rename="jsonl_key_rename_component.yaml",
 21 |     jsonl_knn_cosine_similarity="jsonl_knn_cosine_similarity_component.yaml",
 22 |     jsonl_mmlu_fetch="jsonl_mmlu_fetch_component.yaml",
 23 |     jsonl_random_examples="jsonl_random_examples_component.yaml",
 24 |     jsonl_schema_checker="jsonl_schema_checker_component.yaml",
 25 |     jsonl_score_biosbias_json="jsonl_score_biosbias_json_component.yaml",
 26 |     jsonl_score_multiplechoice="jsonl_score_multiplechoice_component.yaml",
 27 |     jsonl_to_json="jsonl_to_json_component.yaml",
 28 |     uri_folder_to_file="uri_folder_to_file_component.yaml",
 29 | )
 30 | 
 31 | 
 32 | def create_component_from_yaml(
 33 |     ml_client: MLClient,
 34 |     yaml_path: pathlib.Path,
 35 |     version_string: str,
 36 |     environment: Environment = None,
 37 | ) -> Component:
 38 |     _logger.info(f"Loading {yaml_path}")
 39 |     loaded_yaml = load_component(source=yaml_path)
 40 |     _logger.info("Changing version")
 41 |     loaded_yaml.version = version_string
 42 |     _logger.info("Changing environment")
 43 |     loaded_yaml.environment = environment
 44 |     _logger.info("Creating component")
 45 |     my_comp = ml_client.components.create_or_update(loaded_yaml)
 46 |     _logger.info(f"Component {my_comp.name}:{my_comp.version} created")
 47 |     return my_comp
 48 | 
 49 | 
 50 | def create_environment_from_yaml(
 51 |     ml_client: MLClient, yaml_path: pathlib.Path, version_string: str
 52 | ) -> Environment:
 53 |     _logger.info(f"Loading {yaml_path}")
 54 |     loaded_yaml = load_environment(source=yaml_path)
 55 |     _logger.info("Changing version")
 56 |     loaded_yaml.version = version_string
 57 |     _logger.info("Creating Environment")
 58 |     my_env = ml_client.environments.create_or_update(loaded_yaml)
 59 |     _logger.info(f"Environment {my_env.name}:{my_env.version} created")
 60 |     return my_env
 61 | 
 62 | 
 63 | class ComponentCollector:
 64 |     def __init__(
 65 |         self,
 66 |         ml_client: MLClient,
 67 |         component_base_dir: pathlib.Path,
 68 |         version_string: str,
 69 |     ):
 70 |         self._client = ml_client
 71 |         self._base_dir = component_base_dir
 72 |         self._version_string = version_string
 73 | 
 74 |     def prepare(self):
 75 |         _logger.info(f"Creating environment")
 76 |         component_environment = create_environment_from_yaml(
 77 |             self._client, ENVIRONMENT_FILE, self._version_string
 78 |         )
 79 |         for attr_name, component_string in ALL_COMPONENTS.items():
 80 |             assert not hasattr(self, attr_name)
 81 |             _logger.info(f"Creating {component_string} from YAML")
 82 |             component = create_component_from_yaml(
 83 |                 self._client,
 84 |                 self._base_dir / component_string,
 85 |                 environment=component_environment,
 86 |                 version_string=self._version_string,
 87 |             )
 88 |             _logger.info(f"Adding attribute {attr_name}")
 89 |             setattr(self, attr_name, component)
 90 | 
 91 |         # Quickly put in the Phi2 environment
 92 |         _logger.info("Working on Phi2 component")
 93 |         phi2_environment = create_environment_from_yaml(
 94 |             self._client, PHI2_ENVIRONMENT_FILE, self._version_string
 95 |         )
 96 |         self.jsonl_guidance_phi2 = create_component_from_yaml(
 97 |             self._client,
 98 |             self._base_dir / "jsonl_guidance_phi2_component.yaml",
 99 |             environment=phi2_environment,
100 |             version_string=self._version_string,
101 |         )
102 | 
103 |         _logger.info("Added all components")
104 | 
105 | 
106 | def get_component_collector(
107 |     ml_client: MLClient, version_string: str
108 | ) -> ComponentCollector:
109 |     components = ComponentCollector(ml_client, COMPONENTS_DIR, version_string)
110 |     components.prepare()
111 | 
112 |     return components
113 | 


--------------------------------------------------------------------------------
/azureml/pipelines/configs.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Union
  2 | 
  3 | from dataclasses import dataclass, field
  4 | 
  5 | 
  6 | @dataclass
  7 | class AMLConfig:
  8 |     workspace_name: str = str()
  9 |     resource_group: str = str()
 10 |     subscription_id: str = str()
 11 | 
 12 | 
 13 | @dataclass
 14 | class PipelineConfig:
 15 |     base_experiment_name: str = str()
 16 |     tags: Dict[str, str] = field(default_factory=dict)
 17 |     default_compute_target: str = str()
 18 | 
 19 | 
 20 | @dataclass
 21 | class AOAIConfig:
 22 |     endpoint: str = str()
 23 |     model: str = str()
 24 |     compute_target: str = str()
 25 |     max_errors: int = int()
 26 |     workers: int = int()
 27 | 
 28 | 
 29 | @dataclass
 30 | class Phi2Config:
 31 |     compute_target: str = str()
 32 | 
 33 | 
 34 | @dataclass
 35 | class ZeroShotRunConfig:
 36 |     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
 37 |     mmlu_dataset: str = str()
 38 |     mmlu_split: str = str()
 39 |     guidance_programs: list[str] = field(default_factory=list)
 40 |     aoai_config: AOAIConfig = field(default_factory=AOAIConfig)
 41 | 
 42 | 
 43 | @dataclass
 44 | class FewShotConfig:
 45 |     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
 46 |     mmlu_dataset: str = str()
 47 |     mmlu_split: str = str()
 48 |     fewshot_split: str = str()
 49 |     guidance_program: str = str()
 50 |     guidance_workers: int = 4
 51 |     max_errors: int = 5
 52 |     aoai_config: AOAIConfig = field(default_factory=AOAIConfig)
 53 | 
 54 | 
 55 | @dataclass
 56 | class KNNConfig:
 57 |     k_nearest: int = int()
 58 | 
 59 | 
 60 | @dataclass
 61 | class KNNFewshotConfig:
 62 |     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
 63 |     mmlu_dataset: str = str()
 64 |     test_split: str = str()
 65 |     example_split: str = str()
 66 |     guidance_programs: list[str] = field(default_factory=list)
 67 |     answer_key: str = str()
 68 |     knn_config: KNNConfig = field(default_factory=KNNConfig)
 69 |     aoai_config: AOAIConfig = field(default_factory=AOAIConfig)
 70 |     aoai_embedding_config: AOAIConfig = field(default_factory=AOAIConfig)
 71 | 
 72 | 
 73 | @dataclass
 74 | class RandomExamplesConfig:
 75 |     num_examples: int = int()
 76 |     random_seed: int = int()
 77 | 
 78 | 
 79 | @dataclass
 80 | class RandomFewshotPipelineConfig:
 81 |     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
 82 |     mmlu_dataset: str = str()
 83 |     test_split: str = str()
 84 |     example_split: str = str()
 85 |     guidance_programs: list[str] = field(default_factory=list)
 86 |     answer_key: str = str()
 87 |     random_examples: RandomExamplesConfig = field(default_factory=RandomExamplesConfig)
 88 |     aoai_config: AOAIConfig = field(default_factory=AOAIConfig)
 89 | 
 90 | 
 91 | @dataclass
 92 | class RandomFewshotCoTPipelineConfig:
 93 |     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
 94 |     mmlu_dataset: str = str()
 95 |     test_split: str = str()
 96 |     example_split: str = str()
 97 |     zeroshot_cot_guidance_program: str = str()
 98 |     fewshot_cot_guidance_program: str = str()
 99 |     random_example_config: RandomExamplesConfig = field(
100 |         default_factory=RandomExamplesConfig
101 |     )
102 |     aoai_config: AOAIConfig = field(default_factory=AOAIConfig)
103 | 
104 | 
105 | @dataclass
106 | class KNNFewshotCoTPipelineConfig:
107 |     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
108 |     mmlu_dataset: str = str()
109 |     test_split: str = str()
110 |     example_split: str = str()
111 |     zeroshot_cot_guidance_program: str = str()
112 |     fewshot_cot_guidance_program: str = str()
113 |     knn_config: KNNConfig = field(default_factory=KNNConfig)
114 |     aoai_config: AOAIConfig = field(default_factory=AOAIConfig)
115 |     aoai_embedding_config: AOAIConfig = field(default_factory=AOAIConfig)
116 | 
117 | 
118 | @dataclass
119 | class BiosBiasJSONPipelineConfig:
120 |     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
121 |     biosbias_dataset: str = str()
122 |     json_guidance_program: str = str()
123 |     aoai_config: AOAIConfig = field(default_factory=AOAIConfig)
124 | 
125 | 
126 | @dataclass
127 | class Phi2BiosBiasJSONPipelineConfig:
128 |     pipeline: PipelineConfig = field(default_factory=PipelineConfig)
129 |     biosbias_dataset: str = str()
130 |     json_guidance_programs: list[str] = field(default_factory=list)
131 |     phi2_config: Phi2Config = field(default_factory=Phi2Config)
132 | 


--------------------------------------------------------------------------------
/azureml/pipelines/configs/aml_config_template.yaml:
--------------------------------------------------------------------------------
1 | azureml_config:
2 |   workspace_name: <Workspace Name>
3 |   resource_group: <Workspace Resource Group>
4 |   subscription_id: <Workspace Subscription ID>


--------------------------------------------------------------------------------
/azureml/pipelines/configs/aoai_config_template.yaml:
--------------------------------------------------------------------------------
1 | default_aoai_config:
2 |   endpoint: <Full deployment URL>
3 |   model: <gpt-4 or gpt-3.5-turbo>
4 |   compute_target: <Compute target with model access>
5 |   max_errors: 10
6 |   workers: 10
7 | 
8 | # If being used as an embedding config, then the endpoint will look like:
9 | # https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/embeddings?api-version=2023-05-15


--------------------------------------------------------------------------------
/azureml/pipelines/configs/biosbias_json_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - aml_config
 4 |   - aoai_config
 5 | 
 6 | zeroshot_config:
 7 |   pipeline:
 8 |     base_experiment_name: biosbias_json
 9 |     tags:
10 |     default_compute_target: isolatedcompute
11 |   biosbias_dataset: biosbias_small:1
12 |   json_guidance_program: simple_biosbias_json.py
13 |   aoai_config: ${ default_aoai_config }
14 | 


--------------------------------------------------------------------------------
/azureml/pipelines/configs/biosbias_json_phi2_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - aml_config
 4 | 
 5 | zeroshot_config:
 6 |   pipeline:
 7 |     base_experiment_name: biosbias_json_phi2
 8 |     tags:
 9 |     default_compute_target: isolatedcompute
10 |   biosbias_dataset: biosbias_small:1
11 |   json_guidance_programs:
12 |     - simple_biosbias_json_completion.py
13 |     - simple_biosbias_json_completion_v2.py
14 |   phi2_config:
15 |     compute_target: gput4
16 | 


--------------------------------------------------------------------------------
/azureml/pipelines/configs/fewshot_knn_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - aml_config
 4 |   - aoai_config
 5 |   - aoai_embedding_config
 6 | 
 7 | knn_fewshot_config:
 8 |   pipeline:
 9 |     base_experiment_name: fewshot_knn
10 |     tags:
11 |     default_compute_target: isolatedcompute
12 |   mmlu_dataset: all_mmlu_datasets
13 |   test_split: test
14 |   example_split: validation
15 |   guidance_programs:
16 |     - fewshot.py
17 |     - fewshot_as_conversation.py
18 |   knn_config:
19 |     k_nearest: 5
20 |   answer_key: fewshot_answer
21 |   aoai_config: ${ default_aoai_config }
22 |   aoai_embedding_config: ${ default_aoai_embedding_config }


--------------------------------------------------------------------------------
/azureml/pipelines/configs/fewshot_random_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - aml_config
 4 |   - aoai_config
 5 | 
 6 | random_fewshot_config:
 7 |   pipeline:
 8 |     base_experiment_name: fewshot_random
 9 |     tags:
10 |     default_compute_target: isolatedcompute
11 |   mmlu_dataset: all_mmlu_datasets
12 |   test_split: test
13 |   example_split: validation
14 |   guidance_programs:
15 |     - fewshot.py
16 |     - fewshot_as_conversation.py
17 |   random_examples:
18 |     num_examples: 5
19 |     random_seed: 1234987
20 |   answer_key: fewshot_answer
21 |   aoai_config: ${ default_aoai_config }


--------------------------------------------------------------------------------
/azureml/pipelines/configs/knn_fewshot_cot_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - aml_config
 4 |   - aoai_config
 5 |   - aoai_embedding_config
 6 | 
 7 | knn_fewshot_cot_config:
 8 |   pipeline:
 9 |     base_experiment_name: fewshot_knn_cot
10 |     tags:
11 |     default_compute_target: isolatedcompute
12 |   mmlu_dataset: all_mmlu_datasets
13 |   test_split: test
14 |   example_split: validation
15 |   zeroshot_cot_guidance_program: zero_shot_cot.py
16 |   fewshot_cot_guidance_program: fewshot_cot_as_conversation.py
17 |   knn_config:
18 |     k_nearest: 5
19 |   aoai_config: ${ default_aoai_config }
20 |   aoai_embedding_config: ${ default_aoai_embedding_config }


--------------------------------------------------------------------------------
/azureml/pipelines/configs/knn_fewshot_cot_ensemble_config.yaml:
--------------------------------------------------------------------------------
 1 | # This is also for the submit_mmlu_fewshot_knn_cot.py script
 2 | 
 3 | defaults:
 4 |   - _self_
 5 |   - aml_config
 6 |   - aoai_config
 7 |   - aoai_embedding_config
 8 | 
 9 | knn_fewshot_cot_config:
10 |   pipeline:
11 |     base_experiment_name: fewshot_knn_cot_ensemble
12 |     tags:
13 |     default_compute_target: isolatedcompute
14 |   mmlu_dataset: all_mmlu_datasets
15 |   test_split: test
16 |   example_split: validation
17 |   zeroshot_cot_guidance_program: zero_shot_cot.py
18 |   fewshot_cot_guidance_program: fewshot_cot_as_conversation_ensemble.py
19 |   knn_config:
20 |     k_nearest: 5
21 |   aoai_config: ${ default_aoai_config }
22 |   aoai_embedding_config: ${ default_aoai_embedding_config }


--------------------------------------------------------------------------------
/azureml/pipelines/configs/random_fewshot_cot_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - aml_config
 4 |   - aoai_config
 5 | 
 6 | random_fewshot_cot_config:
 7 |   pipeline:
 8 |     base_experiment_name: fewshot_random_cot
 9 |     tags:
10 |     default_compute_target: isolatedcompute
11 |   mmlu_dataset: all_mmlu_datasets
12 |   test_split: test
13 |   example_split: validation
14 |   zeroshot_cot_guidance_program: zero_shot_cot.py
15 |   fewshot_cot_guidance_program: fewshot_cot_as_conversation.py
16 |   random_example_config:
17 |     num_examples: 5
18 |     random_seed: 1234987
19 |   aoai_config: ${ default_aoai_config }


--------------------------------------------------------------------------------
/azureml/pipelines/configs/zeroshot_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - aml_config
 4 |   - aoai_config
 5 | 
 6 | zeroshot_config:
 7 |   pipeline:
 8 |     base_experiment_name: zeroshot
 9 |     tags:
10 |     default_compute_target: isolatedcompute
11 |   mmlu_dataset: all_mmlu_datasets
12 |   mmlu_split: test
13 |   guidance_programs:
14 |     - zero_or_few_shot.py
15 |     - zero_or_few_shot_fortran.py
16 |     - zero_or_few_shot_alpha.py
17 |     - zero_or_few_shot_expert.py
18 |   aoai_config: ${ default_aoai_config }
19 | 


--------------------------------------------------------------------------------
/azureml/pipelines/configs/zeroshot_cot_config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - aml_config
 4 |   - aoai_config
 5 | 
 6 | zeroshot_config:
 7 |   pipeline:
 8 |     base_experiment_name: zeroshot_cot
 9 |     tags:
10 |     default_compute_target: isolatedcompute
11 |   mmlu_dataset: all_mmlu_datasets
12 |   mmlu_split: test
13 |   guidance_programs:
14 |     - zero_shot_cot.py
15 |   aoai_config: ${ default_aoai_config }
16 | 


--------------------------------------------------------------------------------
/azureml/pipelines/constants.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | COMPONENTS_DIR = (Path(__file__).parent.parent / "components").absolute()
 4 | 
 5 | ENVIRONMENTS_DIR = (Path(__file__).parent.parent / "environments").absolute()
 6 | 
 7 | GUIDANCE_PROGRAMS_DIR = (
 8 |     Path(__file__).parent.parent.parent / "guidance_programs"
 9 | ).absolute()
10 | 
11 | 
12 | SCHEMA_DIR = (Path(__file__).parent.parent / "json_schemas").absolute()
13 | 
14 | ENVIRONMENT_FILE = ENVIRONMENTS_DIR / "promptbase-env.yaml"
15 | 
16 | PHI2_ENVIRONMENT_FILE = ENVIRONMENTS_DIR / "phi2transformer-env.yaml"
17 | 
18 | assert COMPONENTS_DIR.exists(), f"Did not find {COMPONENTS_DIR}"
19 | assert ENVIRONMENT_FILE.exists(), f"Did not find {ENVIRONMENT_FILE}"
20 | assert GUIDANCE_PROGRAMS_DIR.exists(), f"Did not find {GUIDANCE_PROGRAMS_DIR}"
21 | assert SCHEMA_DIR.exists(), f"Did not find {SCHEMA_DIR}"
22 | 


--------------------------------------------------------------------------------
/azureml/pipelines/logging_utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import pathlib
3 | 
4 | 
5 | def get_standard_logger_for_file(file_path: str) -> logging.Logger:
6 |     _logger = logging.getLogger(pathlib.Path(file_path).name)
7 |     _logger.setLevel(logging.INFO)
8 |     return _logger
9 | 


--------------------------------------------------------------------------------
/azureml/pipelines/submit_mmlu_knn_fewshot.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | from dataclasses import dataclass
  4 | 
  5 | import hydra
  6 | from hydra.core.config_store import ConfigStore
  7 | 
  8 | import omegaconf
  9 | 
 10 | from azure.identity import DefaultAzureCredential
 11 | 
 12 | from azure.ai.ml import dsl, MLClient, Input
 13 | from azure.ai.ml.entities import Pipeline
 14 | 
 15 | from azureml_pipelines import create_knn_fewshot_pipeline
 16 | from azureml_utils import get_component_collector
 17 | from configs import AMLConfig, KNNFewshotConfig, AOAIConfig
 18 | from constants import GUIDANCE_PROGRAMS_DIR
 19 | from logging_utils import get_standard_logger_for_file
 20 | 
 21 | _logger = get_standard_logger_for_file(__file__)
 22 | 
 23 | 
 24 | @dataclass
 25 | class PipelineConfig:
 26 |     knn_fewshot_config: KNNFewshotConfig = omegaconf.MISSING
 27 |     azureml_config: AMLConfig = omegaconf.MISSING
 28 |     aoai_config: AOAIConfig = omegaconf.MISSING
 29 |     aoai_embedding_config: AOAIConfig = omegaconf.MISSING
 30 | 
 31 | 
 32 | cs = ConfigStore.instance()
 33 | cs.store(name="config", node=PipelineConfig)
 34 | 
 35 | 
 36 | def create_knn_fewshot_pipeline_mmlu(
 37 |     ml_client: MLClient, run_config: KNNFewshotConfig, version_string: str
 38 | ):
 39 |     components = get_component_collector(ml_client, version_string)
 40 | 
 41 |     guidance_inputs = dict()
 42 |     for prog_filename in run_config.guidance_programs:
 43 |         k = prog_filename[0:-3]
 44 |         v = Input(
 45 |             type="uri_file",
 46 |             path=GUIDANCE_PROGRAMS_DIR / prog_filename,
 47 |             model="download",
 48 |         )
 49 |         guidance_inputs[k] = v
 50 |     _logger.info(f"Found {len(guidance_inputs)} guidance programs")
 51 | 
 52 |     @dsl.pipeline()
 53 |     def basic_pipeline() -> Pipeline:
 54 |         mmlu_fetch_job = components.jsonl_mmlu_fetch(
 55 |             mmlu_dataset=run_config.mmlu_dataset
 56 |         )
 57 |         mmlu_fetch_job.name = f"fetch_mmlu_{run_config.mmlu_dataset}"
 58 | 
 59 |         split_outputs = dict()
 60 |         for k, v in dict(
 61 |             input=run_config.test_split, example=run_config.example_split
 62 |         ).items():
 63 |             get_split_job = components.uri_folder_to_file(
 64 |                 input_dataset=mmlu_fetch_job.outputs.output_dataset,
 65 |                 filename_pattern=f"{v}.jsonl",
 66 |             )
 67 |             get_split_job.name = f"extract_split_{k}"
 68 |             split_outputs[k] = get_split_job.outputs.output_dataset
 69 | 
 70 |         for progname, prog_input in guidance_inputs.items():
 71 |             answer_ds = create_knn_fewshot_pipeline(
 72 |                 components=components,
 73 |                 embedding_config=run_config.aoai_embedding_config,
 74 |                 inference_config=run_config.aoai_config,
 75 |                 input_dataset=split_outputs["input"],
 76 |                 example_dataset=split_outputs["example"],
 77 |                 guidance_program=prog_input,
 78 |                 num_examples=run_config.knn_config.k_nearest,
 79 |                 output_key=run_config.answer_key,
 80 |             )
 81 | 
 82 |             score_job = components.jsonl_score_multiplechoice(
 83 |                 input_dataset=answer_ds,
 84 |                 correct_key="correct_answer",  # Set when MMLU fetching
 85 |                 response_key=run_config.answer_key,
 86 |             )
 87 |             score_job.name = f"score_fewshot_{progname}"
 88 | 
 89 |     pipeline = basic_pipeline()
 90 |     pipeline.experiment_name = (
 91 |         f"{run_config.pipeline.base_experiment_name}_{run_config.mmlu_dataset}"
 92 |     )
 93 |     pipeline.display_name = None
 94 |     pipeline.compute = run_config.pipeline.default_compute_target
 95 |     if run_config.pipeline.tags:
 96 |         pipeline.tags.update(run_config.tags)
 97 |     _logger.info("Pipeline created")
 98 | 
 99 |     return pipeline
100 | 
101 | 
102 | @hydra.main(config_path="configs", version_base="1.1")
103 | def main(config: PipelineConfig):
104 |     version_string = str(int(time.time()))
105 |     _logger.info(f"AzureML object version for this run: {version_string}")
106 | 
107 |     _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}")
108 |     _logger.info(f"Resource Group: {config.azureml_config.resource_group}")
109 |     _logger.info(f"Workspace : {config.azureml_config.workspace_name}")
110 | 
111 |     credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
112 | 
113 |     ws_client = MLClient(
114 |         credential=credential,
115 |         subscription_id=config.azureml_config.subscription_id,
116 |         resource_group_name=config.azureml_config.resource_group,
117 |         workspace_name=config.azureml_config.workspace_name,
118 |         logging_enable=False,
119 |     )
120 | 
121 |     pipeline = create_knn_fewshot_pipeline_mmlu(
122 |         ws_client, config.knn_fewshot_config, version_string
123 |     )
124 |     _logger.info("Submitting pipeline")
125 |     submitted_job = ws_client.jobs.create_or_update(pipeline)
126 |     _logger.info(f"Submitted: {submitted_job.name}")
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     main()
131 | 


--------------------------------------------------------------------------------
/azureml/pipelines/submit_mmlu_random_fewshot.py:
--------------------------------------------------------------------------------
  1 | # Submit a run using:
  2 | # python .\submit_mmlu_random_fewshot.py -cn fewshot_random_config
  3 | 
  4 | import time
  5 | 
  6 | from dataclasses import dataclass
  7 | 
  8 | import hydra
  9 | from hydra.core.config_store import ConfigStore
 10 | 
 11 | import omegaconf
 12 | 
 13 | from azure.identity import DefaultAzureCredential
 14 | from azure.ai.ml import MLClient
 15 | 
 16 | from azure.ai.ml import dsl, Input, MLClient
 17 | from azure.ai.ml.entities import Pipeline
 18 | 
 19 | from azureml_pipelines import create_random_fewshot_pipeline
 20 | from azureml_utils import get_component_collector
 21 | from configs import AMLConfig, RandomFewshotPipelineConfig
 22 | from constants import GUIDANCE_PROGRAMS_DIR
 23 | from logging_utils import get_standard_logger_for_file
 24 | 
 25 | _logger = get_standard_logger_for_file(__file__)
 26 | 
 27 | 
 28 | @dataclass
 29 | class PipelineConfig:
 30 |     random_fewshot_config: RandomFewshotPipelineConfig = omegaconf.MISSING
 31 |     azureml_config: AMLConfig = omegaconf.MISSING
 32 | 
 33 | 
 34 | cs = ConfigStore.instance()
 35 | cs.store(name="config", node=PipelineConfig)
 36 | 
 37 | 
 38 | def create_fewshot_pipeline(
 39 |     ml_client: MLClient, run_config: RandomFewshotPipelineConfig, version_string: str
 40 | ):
 41 |     components = get_component_collector(ml_client, version_string)
 42 | 
 43 |     guidance_inputs = dict()
 44 |     for prog_filename in run_config.guidance_programs:
 45 |         k = prog_filename[0:-3]
 46 |         v = Input(
 47 |             type="uri_file",
 48 |             path=GUIDANCE_PROGRAMS_DIR / prog_filename,
 49 |             model="download",
 50 |         )
 51 |         guidance_inputs[k] = v
 52 |     _logger.info(f"Found {len(guidance_inputs)} guidance programs")
 53 | 
 54 |     @dsl.pipeline()
 55 |     def basic_pipeline() -> Pipeline:
 56 |         mmlu_fetch_job = components.jsonl_mmlu_fetch(
 57 |             mmlu_dataset=run_config.mmlu_dataset
 58 |         )
 59 |         mmlu_fetch_job.name = f"fetch_mmlu_{run_config.mmlu_dataset}"
 60 | 
 61 |         split_outputs = dict()
 62 |         for k, v in dict(
 63 |             input=run_config.test_split, example=run_config.example_split
 64 |         ).items():
 65 |             get_split_job = components.uri_folder_to_file(
 66 |                 input_dataset=mmlu_fetch_job.outputs.output_dataset,
 67 |                 filename_pattern=f"{v}.jsonl",
 68 |             )
 69 |             get_split_job.name = f"extract_split_{k}"
 70 |             split_outputs[k] = get_split_job.outputs.output_dataset
 71 | 
 72 |         for progname, prog_input in guidance_inputs.items():
 73 |             answer_ds = create_random_fewshot_pipeline(
 74 |                 components=components,
 75 |                 inference_config=run_config.aoai_config,
 76 |                 input_dataset=split_outputs["input"],
 77 |                 example_dataset=split_outputs["example"],
 78 |                 guidance_program=prog_input,
 79 |                 random_examples=run_config.random_examples,
 80 |                 output_key=run_config.answer_key,
 81 |             )
 82 | 
 83 |             score_job = components.jsonl_score_multiplechoice(
 84 |                 input_dataset=answer_ds,
 85 |                 correct_key="correct_answer",  # Set when MMLU fetching
 86 |                 response_key=run_config.answer_key,
 87 |             )
 88 |             score_job.name = f"score_fewshot_{progname}"
 89 | 
 90 |     pipeline = basic_pipeline()
 91 |     pipeline.experiment_name = (
 92 |         f"{run_config.pipeline.base_experiment_name}_{run_config.mmlu_dataset}"
 93 |     )
 94 |     pipeline.display_name = None
 95 |     pipeline.compute = run_config.pipeline.default_compute_target
 96 |     if run_config.pipeline.tags:
 97 |         pipeline.tags.update(run_config.tags)
 98 |     _logger.info("Pipeline created")
 99 | 
100 |     return pipeline
101 | 
102 | 
103 | @hydra.main(config_path="configs", version_base="1.1")
104 | def main(config: PipelineConfig):
105 |     version_string = str(int(time.time()))
106 |     _logger.info(f"AzureML object version for this run: {version_string}")
107 | 
108 |     _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}")
109 |     _logger.info(f"Resource Group: {config.azureml_config.resource_group}")
110 |     _logger.info(f"Workspace : {config.azureml_config.workspace_name}")
111 | 
112 |     credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
113 | 
114 |     ws_client = MLClient(
115 |         credential=credential,
116 |         subscription_id=config.azureml_config.subscription_id,
117 |         resource_group_name=config.azureml_config.resource_group,
118 |         workspace_name=config.azureml_config.workspace_name,
119 |         logging_enable=False,
120 |     )
121 | 
122 |     pipeline = create_fewshot_pipeline(
123 |         ws_client, config.random_fewshot_config, version_string
124 |     )
125 |     _logger.info("Submitting pipeline")
126 |     submitted_job = ws_client.jobs.create_or_update(pipeline)
127 |     _logger.info(f"Submitted: {submitted_job.name}")
128 | 
129 | 
130 | if __name__ == "__main__":
131 |     main()
132 | 


--------------------------------------------------------------------------------
/azureml/pipelines/submit_mmlu_zeroshot.py:
--------------------------------------------------------------------------------
  1 | # Submit a run using:
  2 | # python .\submit_mmlu_zeroshot.py -cn zeroshot_config
  3 | 
  4 | import time
  5 | 
  6 | from dataclasses import dataclass
  7 | 
  8 | import hydra
  9 | from hydra.core.config_store import ConfigStore
 10 | 
 11 | import omegaconf
 12 | 
 13 | from azure.identity import DefaultAzureCredential
 14 | from azure.ai.ml import MLClient
 15 | 
 16 | from azure.ai.ml import dsl, Input, MLClient
 17 | from azure.ai.ml.entities import Pipeline
 18 | 
 19 | from azureml_pipelines import create_zeroshot_pipeline
 20 | from azureml_utils import get_component_collector
 21 | from configs import AMLConfig, ZeroShotRunConfig
 22 | from constants import GUIDANCE_PROGRAMS_DIR
 23 | from logging_utils import get_standard_logger_for_file
 24 | 
 25 | _logger = get_standard_logger_for_file(__file__)
 26 | 
 27 | 
 28 | @dataclass
 29 | class PipelineConfig:
 30 |     zeroshot_config: ZeroShotRunConfig = omegaconf.MISSING
 31 |     azureml_config: AMLConfig = omegaconf.MISSING
 32 | 
 33 | 
 34 | cs = ConfigStore.instance()
 35 | cs.store(name="config", node=PipelineConfig)
 36 | 
 37 | 
 38 | def create_mmlu_zeroshot_pipeline(
 39 |     ml_client: MLClient, run_config: ZeroShotRunConfig, version_string: str
 40 | ):
 41 |     components = get_component_collector(ml_client, version_string)
 42 | 
 43 |     guidance_inputs = dict()
 44 |     for prog_filename in run_config.guidance_programs:
 45 |         k = prog_filename[0:-3]
 46 |         v = Input(
 47 |             type="uri_file",
 48 |             path=GUIDANCE_PROGRAMS_DIR / prog_filename,
 49 |             model="download",
 50 |         )
 51 |         guidance_inputs[k] = v
 52 |     _logger.info(f"Found {len(guidance_inputs)} guidance programs")
 53 | 
 54 |     answer_key = "zeroshot_answer"
 55 | 
 56 |     @dsl.pipeline()
 57 |     def basic_pipeline() -> Pipeline:
 58 |         mmlu_fetch_job = components.jsonl_mmlu_fetch(
 59 |             mmlu_dataset=run_config.mmlu_dataset
 60 |         )
 61 |         mmlu_fetch_job.name = f"fetch_mmlu_{run_config.mmlu_dataset}"
 62 | 
 63 |         get_split_job = components.uri_folder_to_file(
 64 |             input_dataset=mmlu_fetch_job.outputs.output_dataset,
 65 |             filename_pattern=f"{run_config.mmlu_split}.jsonl",
 66 |         )
 67 |         get_split_job.name = f"extract_split_{run_config.mmlu_split}"
 68 | 
 69 |         for progname, prog_input in guidance_inputs.items():
 70 |             answer_ds = create_zeroshot_pipeline(
 71 |                 pipeline_name=f"{progname}_zeroshot",
 72 |                 pipeline_display_name=f"Zero Shot {progname}",
 73 |                 components=components,
 74 |                 inference_config=run_config.aoai_config,
 75 |                 input_dataset=get_split_job.outputs.output_dataset,
 76 |                 guidance_program=prog_input,
 77 |                 output_key=answer_key,
 78 |             )
 79 | 
 80 |             score_job = components.jsonl_score_multiplechoice(
 81 |                 input_dataset=answer_ds,
 82 |                 correct_key="correct_answer",  # Set when MMLU fetching
 83 |                 response_key=answer_key,
 84 |             )
 85 |             score_job.name = f"zeroshot_score_{progname}"
 86 | 
 87 |     pipeline = basic_pipeline()
 88 |     pipeline.experiment_name = (
 89 |         f"{run_config.pipeline.base_experiment_name}_{run_config.mmlu_dataset}"
 90 |     )
 91 |     pipeline.display_name = None
 92 |     pipeline.compute = run_config.pipeline.default_compute_target
 93 |     if run_config.pipeline.tags:
 94 |         pipeline.tags.update(run_config.tags)
 95 |     _logger.info("Pipeline created")
 96 | 
 97 |     return pipeline
 98 | 
 99 | 
100 | @hydra.main(config_path="configs", version_base="1.1")
101 | def main(config: PipelineConfig):
102 |     version_string = str(int(time.time()))
103 |     _logger.info(f"AzureML object version for this run: {version_string}")
104 | 
105 |     _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}")
106 |     _logger.info(f"Resource Group: {config.azureml_config.resource_group}")
107 |     _logger.info(f"Workspace : {config.azureml_config.workspace_name}")
108 | 
109 |     credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
110 | 
111 |     ws_client = MLClient(
112 |         credential=credential,
113 |         subscription_id=config.azureml_config.subscription_id,
114 |         resource_group_name=config.azureml_config.resource_group,
115 |         workspace_name=config.azureml_config.workspace_name,
116 |         logging_enable=False,
117 |     )
118 | 
119 |     pipeline = create_mmlu_zeroshot_pipeline(
120 |         ws_client, config.zeroshot_config, version_string
121 |     )
122 |     _logger.info("Submitting pipeline")
123 |     submitted_job = ws_client.jobs.create_or_update(pipeline)
124 |     _logger.info(f"Submitted: {submitted_job.name}")
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     main()
129 | 


--------------------------------------------------------------------------------
/azureml/pipelines/submit_mmlu_zeroshot_cot.py:
--------------------------------------------------------------------------------
  1 | # Submit a run using:
  2 | # python .\submit_mmlu_zeroshot_cot.py -cn zeroshot_cot_config
  3 | 
  4 | import time
  5 | 
  6 | from dataclasses import dataclass
  7 | 
  8 | import hydra
  9 | from hydra.core.config_store import ConfigStore
 10 | 
 11 | import omegaconf
 12 | 
 13 | from azure.identity import DefaultAzureCredential
 14 | from azure.ai.ml import MLClient
 15 | 
 16 | from azure.ai.ml import dsl, Input, MLClient
 17 | from azure.ai.ml.entities import Pipeline
 18 | 
 19 | from azureml_pipelines import create_zeroshot_cot_pipeline
 20 | from azureml_utils import get_component_collector
 21 | from configs import AMLConfig, ZeroShotRunConfig
 22 | from constants import GUIDANCE_PROGRAMS_DIR
 23 | from logging_utils import get_standard_logger_for_file
 24 | 
 25 | _logger = get_standard_logger_for_file(__file__)
 26 | 
 27 | 
 28 | @dataclass
 29 | class PipelineConfig:
 30 |     zeroshot_config: ZeroShotRunConfig = omegaconf.MISSING
 31 |     azureml_config: AMLConfig = omegaconf.MISSING
 32 | 
 33 | 
 34 | cs = ConfigStore.instance()
 35 | cs.store(name="config", node=PipelineConfig)
 36 | 
 37 | 
 38 | def create_mmlu_zeroshot_cot_pipeline(
 39 |     ml_client: MLClient, run_config: ZeroShotRunConfig, version_string: str
 40 | ):
 41 |     components = get_component_collector(ml_client, version_string)
 42 | 
 43 |     guidance_inputs = dict()
 44 |     for prog_filename in run_config.guidance_programs:
 45 |         k = prog_filename[0:-3]
 46 |         v = Input(
 47 |             type="uri_file",
 48 |             path=GUIDANCE_PROGRAMS_DIR / prog_filename,
 49 |             model="download",
 50 |         )
 51 |         guidance_inputs[k] = v
 52 |     _logger.info(f"Found {len(guidance_inputs)} guidance programs")
 53 | 
 54 |     answer_key = "zeroshot_cot_answer"
 55 |     cot_key = "zeroshot_chain_of_thought"
 56 | 
 57 |     @dsl.pipeline()
 58 |     def basic_pipeline() -> Pipeline:
 59 |         mmlu_fetch_job = components.jsonl_mmlu_fetch(
 60 |             mmlu_dataset=run_config.mmlu_dataset
 61 |         )
 62 |         mmlu_fetch_job.name = f"fetch_mmlu_{run_config.mmlu_dataset}"
 63 | 
 64 |         get_split_job = components.uri_folder_to_file(
 65 |             input_dataset=mmlu_fetch_job.outputs.output_dataset,
 66 |             filename_pattern=f"{run_config.mmlu_split}.jsonl",
 67 |         )
 68 |         get_split_job.name = f"extract_split_{run_config.mmlu_split}"
 69 | 
 70 |         for progname, prog_input in guidance_inputs.items():
 71 |             answer_ds = create_zeroshot_cot_pipeline(
 72 |                 pipeline_name=f"{progname}_zeroshot_cot",
 73 |                 pipeline_display_name=f"Zero Shot CoT {progname}",
 74 |                 components=components,
 75 |                 inference_config=run_config.aoai_config,
 76 |                 input_dataset=get_split_job.outputs.output_dataset,
 77 |                 guidance_program=prog_input,
 78 |                 output_key=answer_key,
 79 |                 cot_key=cot_key,
 80 |             )
 81 | 
 82 |             score_job = components.jsonl_score_multiplechoice(
 83 |                 input_dataset=answer_ds,
 84 |                 correct_key="correct_answer",  # Set when MMLU fetching
 85 |                 response_key=answer_key,
 86 |             )
 87 |             score_job.name = f"zeroshot_cot_score_{progname}"
 88 | 
 89 |     pipeline = basic_pipeline()
 90 |     pipeline.experiment_name = (
 91 |         f"{run_config.pipeline.base_experiment_name}_{run_config.mmlu_dataset}"
 92 |     )
 93 |     pipeline.display_name = None
 94 |     pipeline.compute = run_config.pipeline.default_compute_target
 95 |     if run_config.pipeline.tags:
 96 |         pipeline.tags.update(run_config.tags)
 97 |     _logger.info("Pipeline created")
 98 | 
 99 |     return pipeline
100 | 
101 | 
102 | @hydra.main(config_path="configs", version_base="1.1")
103 | def main(config: PipelineConfig):
104 |     version_string = str(int(time.time()))
105 |     _logger.info(f"AzureML object version for this run: {version_string}")
106 | 
107 |     _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}")
108 |     _logger.info(f"Resource Group: {config.azureml_config.resource_group}")
109 |     _logger.info(f"Workspace : {config.azureml_config.workspace_name}")
110 | 
111 |     credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
112 | 
113 |     ws_client = MLClient(
114 |         credential=credential,
115 |         subscription_id=config.azureml_config.subscription_id,
116 |         resource_group_name=config.azureml_config.resource_group,
117 |         workspace_name=config.azureml_config.workspace_name,
118 |         logging_enable=False,
119 |     )
120 | 
121 |     pipeline = create_mmlu_zeroshot_cot_pipeline(
122 |         ws_client, config.zeroshot_config, version_string
123 |     )
124 |     _logger.info("Submitting pipeline")
125 |     submitted_job = ws_client.jobs.create_or_update(pipeline)
126 |     _logger.info(f"Submitted: {submitted_job.name}")
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     main()
131 | 


--------------------------------------------------------------------------------
/azureml/pipelines/submit_simple_biosbias_json.py:
--------------------------------------------------------------------------------
  1 | # Submit a run using:
  2 | # python .\submit_mmlu_zeroshot.py -cn zeroshot_config
  3 | 
  4 | import time
  5 | 
  6 | from dataclasses import dataclass
  7 | 
  8 | import hydra
  9 | from hydra.core.config_store import ConfigStore
 10 | 
 11 | import omegaconf
 12 | 
 13 | from azure.identity import DefaultAzureCredential
 14 | from azure.ai.ml import MLClient
 15 | 
 16 | from azure.ai.ml import dsl, Input, MLClient
 17 | from azure.ai.ml.entities import Pipeline
 18 | 
 19 | from azureml_pipelines import create_zeroshot_pipeline
 20 | from azureml_utils import get_component_collector
 21 | from configs import AMLConfig, BiosBiasJSONPipelineConfig
 22 | from constants import GUIDANCE_PROGRAMS_DIR
 23 | from logging_utils import get_standard_logger_for_file
 24 | 
 25 | _logger = get_standard_logger_for_file(__file__)
 26 | 
 27 | 
 28 | @dataclass
 29 | class PipelineConfig:
 30 |     zeroshot_config: BiosBiasJSONPipelineConfig = omegaconf.MISSING
 31 |     azureml_config: AMLConfig = omegaconf.MISSING
 32 | 
 33 | 
 34 | cs = ConfigStore.instance()
 35 | cs.store(name="config", node=PipelineConfig)
 36 | 
 37 | 
 38 | def create_biosbias_simple_json_pipeline(
 39 |     ml_client: MLClient, run_config: BiosBiasJSONPipelineConfig, version_string: str
 40 | ):
 41 |     components = get_component_collector(ml_client, version_string)
 42 | 
 43 |     guidance_input = Input(
 44 |         type="uri_file",
 45 |         path=GUIDANCE_PROGRAMS_DIR / run_config.json_guidance_program,
 46 |         model="download",
 47 |     )
 48 | 
 49 |     ds_parts = run_config.biosbias_dataset.split(":")
 50 |     bios_ds = ml_client.data.get(ds_parts[0], version=ds_parts[1])
 51 | 
 52 |     inference_config = run_config.aoai_config
 53 | 
 54 |     @dsl.pipeline()
 55 |     def basic_pipeline() -> Pipeline:
 56 |         guidance_job = components.jsonl_guidance(
 57 |             guidance_program=guidance_input,
 58 |             guidance_workers=inference_config.workers,
 59 |             max_errors=inference_config.max_errors,
 60 |             input_dataset=bios_ds,
 61 |             azure_openai_endpoint=inference_config.endpoint,
 62 |             azure_openai_deployed_model=inference_config.model,
 63 |         )
 64 |         guidance_job.name = f"guidance_simple"
 65 |         guidance_job.compute = inference_config.compute_target
 66 | 
 67 |         score_job = components.jsonl_score_biosbias_json(
 68 |             input_dataset=guidance_job.outputs.output_dataset,
 69 |             response_key="model_answer",
 70 |         )
 71 |         score_job.name = f"score_biosbias_json"
 72 | 
 73 |     pipeline = basic_pipeline()
 74 |     pipeline.experiment_name = (
 75 |         f"{run_config.pipeline.base_experiment_name}_{ds_parts[0]}_{ds_parts[1]}"
 76 |     )
 77 |     pipeline.display_name = None
 78 |     pipeline.compute = run_config.pipeline.default_compute_target
 79 |     if run_config.pipeline.tags:
 80 |         pipeline.tags.update(run_config.tags)
 81 |     _logger.info("Pipeline created")
 82 | 
 83 |     return pipeline
 84 | 
 85 | 
 86 | @hydra.main(config_path="configs", version_base="1.1")
 87 | def main(config: PipelineConfig):
 88 |     version_string = str(int(time.time()))
 89 |     _logger.info(f"AzureML object version for this run: {version_string}")
 90 | 
 91 |     _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}")
 92 |     _logger.info(f"Resource Group: {config.azureml_config.resource_group}")
 93 |     _logger.info(f"Workspace : {config.azureml_config.workspace_name}")
 94 | 
 95 |     credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
 96 | 
 97 |     ws_client = MLClient(
 98 |         credential=credential,
 99 |         subscription_id=config.azureml_config.subscription_id,
100 |         resource_group_name=config.azureml_config.resource_group,
101 |         workspace_name=config.azureml_config.workspace_name,
102 |         logging_enable=False,
103 |     )
104 | 
105 |     pipeline = create_biosbias_simple_json_pipeline(
106 |         ws_client, config.zeroshot_config, version_string
107 |     )
108 |     _logger.info("Submitting pipeline")
109 |     submitted_job = ws_client.jobs.create_or_update(pipeline)
110 |     _logger.info(f"Submitted: {submitted_job.name}")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------
/azureml/pipelines/submit_simple_biosbias_json_phi2.py:
--------------------------------------------------------------------------------
  1 | # Submit a run using:
  2 | # python .\submit_simple_biosbias_json_phi2.py -cn biosbias_json_phi2_config.yaml
  3 | 
  4 | import time
  5 | 
  6 | from dataclasses import dataclass
  7 | 
  8 | import hydra
  9 | from hydra.core.config_store import ConfigStore
 10 | 
 11 | import omegaconf
 12 | 
 13 | from azure.identity import DefaultAzureCredential
 14 | from azure.ai.ml import MLClient
 15 | 
 16 | from azure.ai.ml import dsl, Input, MLClient
 17 | from azure.ai.ml.entities import Pipeline
 18 | 
 19 | from azureml_utils import get_component_collector
 20 | from configs import AMLConfig, Phi2BiosBiasJSONPipelineConfig
 21 | from constants import GUIDANCE_PROGRAMS_DIR
 22 | from logging_utils import get_standard_logger_for_file
 23 | 
 24 | _logger = get_standard_logger_for_file(__file__)
 25 | 
 26 | 
 27 | @dataclass
 28 | class PipelineConfig:
 29 |     zeroshot_config: Phi2BiosBiasJSONPipelineConfig = omegaconf.MISSING
 30 |     azureml_config: AMLConfig = omegaconf.MISSING
 31 | 
 32 | 
 33 | cs = ConfigStore.instance()
 34 | cs.store(name="config", node=PipelineConfig)
 35 | 
 36 | 
 37 | def create_biosbias_simple_json_pipeline(
 38 |     ml_client: MLClient, run_config: Phi2BiosBiasJSONPipelineConfig, version_string: str
 39 | ):
 40 |     components = get_component_collector(ml_client, version_string)
 41 | 
 42 |     guidance_inputs = dict()
 43 |     for prog_filename in run_config.json_guidance_programs:
 44 |         k = prog_filename[0:-3]
 45 |         v = Input(
 46 |             type="uri_file",
 47 |             path=GUIDANCE_PROGRAMS_DIR / prog_filename,
 48 |             model="download",
 49 |         )
 50 |         guidance_inputs[k] = v
 51 |     _logger.info(f"Found {len(guidance_inputs)} guidance programs")
 52 | 
 53 |     ds_parts = run_config.biosbias_dataset.split(":")
 54 |     bios_ds = ml_client.data.get(ds_parts[0], version=ds_parts[1])
 55 | 
 56 |     @dsl.pipeline()
 57 |     def basic_pipeline() -> Pipeline:
 58 |         for progname, prog_input in guidance_inputs.items():
 59 |             guidance_job = components.jsonl_guidance_phi2(
 60 |                 guidance_program=prog_input,
 61 |                 input_dataset=bios_ds,
 62 |             )
 63 |             guidance_job.compute = run_config.phi2_config.compute_target
 64 |             guidance_job.name = f"guidance_simple_{progname}"
 65 | 
 66 |             score_job = components.jsonl_score_biosbias_json(
 67 |                 input_dataset=guidance_job.outputs.output_dataset,
 68 |                 response_key="model_answer",
 69 |             )
 70 |             score_job.name = f"score_biosbias_json_{progname}"
 71 | 
 72 |     pipeline = basic_pipeline()
 73 |     pipeline.experiment_name = (
 74 |         f"{run_config.pipeline.base_experiment_name}_{ds_parts[0]}_{ds_parts[1]}"
 75 |     )
 76 |     pipeline.display_name = None
 77 |     pipeline.compute = run_config.pipeline.default_compute_target
 78 |     if run_config.pipeline.tags:
 79 |         pipeline.tags.update(run_config.tags)
 80 |     _logger.info("Pipeline created")
 81 | 
 82 |     return pipeline
 83 | 
 84 | 
 85 | @hydra.main(config_path="configs", version_base="1.1")
 86 | def main(config: PipelineConfig):
 87 |     version_string = str(int(time.time()))
 88 |     _logger.info(f"AzureML object version for this run: {version_string}")
 89 | 
 90 |     _logger.info(f"Azure Subscription: {config.azureml_config.subscription_id}")
 91 |     _logger.info(f"Resource Group: {config.azureml_config.resource_group}")
 92 |     _logger.info(f"Workspace : {config.azureml_config.workspace_name}")
 93 | 
 94 |     credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
 95 | 
 96 |     ws_client = MLClient(
 97 |         credential=credential,
 98 |         subscription_id=config.azureml_config.subscription_id,
 99 |         resource_group_name=config.azureml_config.resource_group,
100 |         workspace_name=config.azureml_config.workspace_name,
101 |         logging_enable=False,
102 |     )
103 | 
104 |     pipeline = create_biosbias_simple_json_pipeline(
105 |         ws_client, config.zeroshot_config, version_string
106 |     )
107 |     _logger.info("Submitting pipeline")
108 |     submitted_job = ws_client.jobs.create_or_update(pipeline)
109 |     _logger.info(f"Submitted: {submitted_job.name}")
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     main()
114 | 


--------------------------------------------------------------------------------
/azureml/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-ai-ml
2 | hydra-core


--------------------------------------------------------------------------------
/guidance_programs/fewshot.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | import textwrap
 4 | 
 5 | from typing import Any, Dict
 6 | 
 7 | import guidance
 8 | from guidance import gen, select, system, user, assistant
 9 | 
10 | 
11 | _logger = logging.getLogger(__file__)
12 | _logger.setLevel(logging.INFO)
13 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
14 | 
15 | 
16 | @guidance
17 | def few_shot_multiple_choice(
18 |     lm: guidance.models.Chat,
19 |     question: str,
20 |     choices: list[str],
21 |     fewshot_examples: list[dict[str, any]],
22 | ):
23 |     # Some general instruction to the model
24 |     with system():
25 |         lm += textwrap.dedent(
26 |             """You are a student taking a multiple choice test.
27 |             You will be shown a question, followed by numbered multiple choice answers.
28 |             Response with the number corresponding to the best answer.
29 |             """
30 |         )
31 | 
32 |         _logger.debug("Adding few shot examples")
33 |         lm += "\nHere are some examples to help you:\n\n"
34 |         for i, example in enumerate(fewshot_examples):
35 |             lm += f"Example {i}\n"
36 |             lm += example["question"] + "\n"
37 |             for j, choice in enumerate(example["choices"]):
38 |                 lm += f"{j} : {choice}\n"
39 |             lm += f"Correct Answer: {example['correct_answer']}\n\n"
40 | 
41 |         lm += "The question you need to answer will be shown next.\n\n"
42 | 
43 |     with user():
44 |         lm += question + "\n"
45 |         for i, choice in enumerate(choices):
46 |             lm += f"{i} : {choice}\n"
47 |         lm += "Correct Answer: "
48 | 
49 |     with assistant():
50 |         lm += select([str(i) for i in range(len(choices))], name="string_choice")
51 | 
52 |     return lm
53 | 
54 | 
55 | def guidance_generation(
56 |     lm: guidance.models.Chat,
57 |     input: Dict[str, Any],
58 |     common: list[dict[str, Any]] | None = None,
59 | ) -> Dict[str, Any]:
60 |     _logger.debug("Starting guidance_generation")
61 |     assert common is None, "Unexpected common data"
62 |     result = lm + few_shot_multiple_choice(
63 |         question=input["question"],
64 |         choices=input["choices"],
65 |         fewshot_examples=input["fewshot_examples"],
66 |     )
67 | 
68 |     _logger.debug(f"Result: {result}")
69 | 
70 |     result = dict(fewshot_choice=int(result["string_choice"]))
71 |     return result
72 | 


--------------------------------------------------------------------------------
/guidance_programs/fewshot_as_conversation.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | import textwrap
 4 | 
 5 | from typing import Any, Dict
 6 | 
 7 | import guidance
 8 | from guidance import gen, select, system, user, assistant
 9 | 
10 | 
11 | _logger = logging.getLogger(__file__)
12 | _logger.setLevel(logging.INFO)
13 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
14 | 
15 | 
16 | @guidance
17 | def few_shot_multiple_choice(
18 |     lm: guidance.models.Chat,
19 |     question: str,
20 |     choices: list[str],
21 |     fewshot_examples: list[dict[str, any]],
22 | ):
23 |     # Some general instruction to the model
24 |     with system():
25 |         lm += textwrap.dedent(
26 |             """You are a student taking a multiple choice test.
27 |             You will be shown a question, followed by numbered multiple choice answers.
28 |             Response with the number corresponding to the best answer.
29 |             """
30 |         )
31 | 
32 |     for example in fewshot_examples:
33 |         with user():
34 |             lm += example["question"] + "\n"
35 |             for i, choice in enumerate(example["choices"]):
36 |                 lm += f"{i} : {choice}\n"
37 |             lm += f"Correct Answer: "
38 | 
39 |         with assistant():
40 |             lm += str(example["correct_answer"])
41 | 
42 |     with user():
43 |         lm += question + "\n"
44 |         for i, choice in enumerate(choices):
45 |             lm += f"{i} : {choice}\n"
46 |         lm += "Correct Answer: "
47 | 
48 |     with assistant():
49 |         lm += select([str(i) for i in range(len(choices))], name="string_choice")
50 | 
51 |     return lm
52 | 
53 | 
54 | def guidance_generation(
55 |     lm: guidance.models.Chat,
56 |     input: Dict[str, Any],
57 |     common: list[dict[str, Any]] | None = None,
58 | ) -> Dict[str, Any]:
59 |     _logger.debug("Starting guidance_generation")
60 |     assert common is None, "Unexpected common data"
61 |     result = lm + few_shot_multiple_choice(
62 |         question=input["question"],
63 |         choices=input["choices"],
64 |         fewshot_examples=input["fewshot_examples"],
65 |     )
66 | 
67 |     _logger.debug(f"Result: {result}")
68 | 
69 |     result = dict(fewshot_choice=int(result["string_choice"]))
70 |     return result
71 | 


--------------------------------------------------------------------------------
/guidance_programs/fewshot_cot_as_conversation.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | import textwrap
 4 | 
 5 | from typing import Any, Dict
 6 | 
 7 | import guidance
 8 | from guidance import gen, select, system, user, assistant
 9 | 
10 | 
11 | _logger = logging.getLogger(__file__)
12 | _logger.setLevel(logging.INFO)
13 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
14 | 
15 | 
16 | ANSWER_KEY = "string_choice"
17 | COT_KEY = "explanation"
18 | 
19 | 
20 | @guidance
21 | def few_shot_cot_multiple_choice(
22 |     lm: guidance.models.Chat,
23 |     question: str,
24 |     choices: list[str],
25 |     fewshot_examples: list[dict[str, any]],
26 | ):
27 |     # Some general instruction to the model
28 |     with system():
29 |         lm += textwrap.dedent(
30 |             """Answer the following multiple choice **Question**.
31 |             First, think step by step and write an **Explanation** for reasoning through the question.
32 |             Then, when prompted by the user for a **Final Answer**, analyze your explanation and write just the number of the correct answer.
33 |             Do not say the final answer until the user asks for it."""
34 |         )
35 | 
36 |     for example in fewshot_examples:
37 |         with user():
38 |             lm += "**Question**\n"
39 |             lm += example["question"] + "\n"
40 |             for i, choice in enumerate(example["choices"]):
41 |                 lm += f"{i} : {choice}\n"
42 |             lm += "**Explanation**"
43 | 
44 |         with assistant():
45 |             lm += example["chain_of_thought"]
46 | 
47 |         with user():
48 |             lm += f"**Final Answer**"
49 | 
50 |         with assistant():
51 |             lm += str(example["correct_answer"])
52 | 
53 |     with user():
54 |         lm += question + "\n"
55 |         for i, choice in enumerate(choices):
56 |             lm += f"{i} : {choice}\n"
57 |         lm += "**Explanation**"
58 | 
59 |     with assistant():
60 |         lm += gen(name=COT_KEY)
61 | 
62 |     with user():
63 |         lm += f"**Final Answer**"
64 | 
65 |     with assistant():
66 |         lm += select([str(i) for i in range(len(choices))], name=ANSWER_KEY)
67 | 
68 |     return lm
69 | 
70 | 
71 | def guidance_generation(
72 |     lm: guidance.models.Chat,
73 |     input: Dict[str, Any],
74 |     common: list[dict[str, Any]] | None = None,
75 | ) -> Dict[str, Any]:
76 |     _logger.debug("Starting guidance_generation")
77 |     assert common is None, "Unexpected common data"
78 |     result = lm + few_shot_cot_multiple_choice(
79 |         question=input["question"],
80 |         choices=input["choices"],
81 |         fewshot_examples=input["fewshot_examples"],
82 |     )
83 | 
84 |     _logger.debug(f"Result: {result}")
85 | 
86 |     result = dict(fewshot_choice=int(result[ANSWER_KEY]), fewshot_cot=result[COT_KEY])
87 |     return result
88 | 


--------------------------------------------------------------------------------
/guidance_programs/fewshot_cot_as_conversation_ensemble.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import sys
  3 | import textwrap
  4 | 
  5 | from typing import Any, Dict, Iterator, TypeVar
  6 | 
  7 | import guidance
  8 | from guidance import gen, select, system, user, assistant
  9 | 
 10 | 
 11 | _logger = logging.getLogger(__file__)
 12 | _logger.setLevel(logging.INFO)
 13 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
 14 | 
 15 | 
 16 | ANSWER_KEY = "string_choice"
 17 | COT_KEY = "explanation"
 18 | 
 19 | 
 20 | def validate_and_sort_swaps(swaps: list[int], line_len: int) -> list[int]:
 21 |     swap_set = set(swaps)
 22 |     assert len(swap_set) == len(swaps), f"Swaps not unique: {swaps}"
 23 |     for s in swaps:
 24 |         assert s - 1 not in swap_set, f"Swaps too close: {s} {swaps}"
 25 |         assert s + 1 not in swap_set, f"Swaps too close: {s} {swaps}"
 26 |         assert s >= 0, f"Negative swap: {s}"
 27 |         assert s < (line_len - 1), f"Swap too large: {s}"
 28 |     return list(sorted(swaps))
 29 | 
 30 | 
 31 | T = TypeVar("T")
 32 | 
 33 | 
 34 | def apply_swaps(line: list[T], swaps: list[int]) -> list[T]:
 35 |     sorted_swaps = validate_and_sort_swaps(swaps, len(line))
 36 | 
 37 |     i_swap = 0
 38 |     result = []
 39 |     for i in range(len(line)):
 40 |         if i_swap < len(sorted_swaps) and i == sorted_swaps[i_swap]:
 41 |             result.append(line[sorted_swaps[i_swap] + 1])
 42 |         elif i_swap < len(sorted_swaps) and i == sorted_swaps[i_swap] + 1:
 43 |             result.append(line[sorted_swaps[i_swap]])
 44 |             i_swap += 1
 45 |         else:
 46 |             result.append(line[i])
 47 |     return result
 48 | 
 49 | 
 50 | def plain_hunt_generator(starting_line: list[T]) -> Iterator[T]:
 51 |     first_element = starting_line[0]
 52 |     swaps_A = list(range(0, len(starting_line) - (len(starting_line) % 2), 2))
 53 |     swaps_B = list(range(1, len(starting_line) - 1, 2))
 54 |     all_swaps = [swaps_A, swaps_B]
 55 |     current = [x for x in starting_line]
 56 |     line_count = 0
 57 |     yield current
 58 |     while True:
 59 |         current = apply_swaps(current, all_swaps[line_count % len(all_swaps)])
 60 |         yield current
 61 |         line_count += 1
 62 |         if current[0] == first_element:
 63 |             break
 64 | 
 65 | 
 66 | NUM_PERMUTATIONS = 5
 67 | 
 68 | 
 69 | @guidance
 70 | def few_shot_cot_multiple_choice(
 71 |     lm: guidance.models.Chat,
 72 |     question: str,
 73 |     choices: list[str],
 74 |     fewshot_examples: list[dict[str, any]],
 75 |     permutation: list[int],
 76 | ):
 77 |     # Some general instruction to the model
 78 |     with system():
 79 |         lm += textwrap.dedent(
 80 |             """Answer the following multiple choice **Question**.
 81 |             First, think step by step and write an **Explanation** for reasoning through the question.
 82 |             Then, when prompted by the user for a **Final Answer**, analyze your explanation and write just the number of the correct answer.
 83 |             Do not say the final answer until the user asks for it."""
 84 |         )
 85 | 
 86 |     for example in fewshot_examples:
 87 |         with user():
 88 |             lm += "**Question**\n"
 89 |             lm += example["question"] + "\n"
 90 |             for i, choice in enumerate(example["choices"]):
 91 |                 lm += f"{i} : {choice}\n"
 92 |             lm += "**Explanation**"
 93 | 
 94 |         with assistant():
 95 |             lm += example["chain_of_thought"]
 96 | 
 97 |         with user():
 98 |             lm += f"**Final Answer**"
 99 | 
100 |         with assistant():
101 |             lm += str(example["correct_answer"])
102 | 
103 |     with user():
104 |         lm += question + "\n"
105 |         for i in range(len(choices)):
106 |             lm += f"{i}: {choices[permutation[i]]}\n"
107 |         lm += "**Explanation**"
108 | 
109 |     with assistant():
110 |         lm += gen(name=COT_KEY)
111 | 
112 |     with user():
113 |         lm += f"**Final Answer**"
114 | 
115 |     with assistant():
116 |         lm += select([str(i) for i in range(len(choices))], name=ANSWER_KEY)
117 | 
118 |     return lm
119 | 
120 | 
121 | def guidance_generation(
122 |     lm: guidance.models.Chat,
123 |     input: Dict[str, Any],
124 |     common: list[dict[str, Any]] | None = None,
125 | ) -> Dict[str, Any]:
126 |     _logger.debug("Starting guidance_generation")
127 |     assert common is None, "Unexpected common data"
128 | 
129 |     num_choices = len(input["choices"])
130 | 
131 |     votes = [0 for _ in range(num_choices)]
132 |     cots = []
133 |     generator = plain_hunt_generator(list(range(num_choices)))
134 |     for i in range(NUM_PERMUTATIONS):
135 |         current_permutation = next(generator)
136 |         result = lm + few_shot_cot_multiple_choice(
137 |             question=input["question"],
138 |             choices=input["choices"],
139 |             fewshot_examples=input["fewshot_examples"],
140 |             permutation=current_permutation,
141 |         )
142 |         _logger.debug(f"Result: {result}")
143 |         cots.append(result[COT_KEY])
144 |         selected = int(result[ANSWER_KEY])
145 |         actual = current_permutation[selected]
146 |         votes[actual] += 1
147 | 
148 |     _logger.debug(f"Votes: {votes}")
149 |     # Check the votes
150 |     max_idx = -1
151 |     curr_max = 0
152 |     for i in range(len(votes)):
153 |         if votes[i] > curr_max:
154 |             curr_max = votes[i]
155 |             max_idx = i
156 | 
157 |     final_result = dict(fewshot_choice=max_idx, fewshot_cot=cots)
158 |     _logger.debug(f"final_result: {final_result}")
159 |     return final_result
160 | 


--------------------------------------------------------------------------------
/guidance_programs/simple_biosbias_json.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for working on the "produce JSON" task
 2 | # described by PASTA for the BIASBIOS dataset
 3 | 
 4 | import logging
 5 | import json
 6 | import sys
 7 | 
 8 | from textwrap import dedent
 9 | from typing import Any, Dict
10 | 
11 | import guidance
12 | from guidance import gen, select, system, user, assistant
13 | 
14 | 
15 | _logger = logging.getLogger(__file__)
16 | _logger.setLevel(logging.INFO)
17 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
18 | 
19 | 
20 | NAME_KEY = "given_name"
21 | OCCUPATION_KEY = "occupation"
22 | 
23 | 
24 | @guidance
25 | def zeroshot_biosbias_json(lm: guidance.models.Chat, short_biography: str):
26 |     # Some general instruction to the model
27 |     with system():
28 |         lm += dedent(
29 |             """You will be shown a short biography of a person by the user. Answer their questions"""
30 |         )
31 | 
32 |     with user():
33 |         lm += short_biography
34 | 
35 |     with assistant():
36 |         lm += "OK"
37 | 
38 |     with user():
39 |         lm += f"What is the given name of the person? Only reply with their name and nothing else."
40 | 
41 |     with assistant():
42 |         lm += gen(name=NAME_KEY)
43 | 
44 |     with user():
45 |         lm += dedent(
46 |             """Simply state the occupation of the person in lower case.
47 |             For example, if a person were an orthodontist, you should state that they are a dentist.
48 |             If the person were a freighter pilot, you should state that they are a pilot.
49 |             Only reply with their occupation and nothing else."""
50 |         )
51 | 
52 |     with assistant():
53 |         lm += gen(name=OCCUPATION_KEY)
54 | 
55 |     return lm
56 | 
57 | 
58 | def guidance_generation(
59 |     lm: guidance.models.Chat, input: Dict[str, Any], common: Any = None
60 | ) -> Dict[str, Any]:
61 |     _logger.debug("Starting guidance_generation")
62 |     if common is not None:
63 |         _logger.warn("Got unexpected 'common' argument")
64 |     result = lm + zeroshot_biosbias_json(short_biography=input["context"])
65 | 
66 |     result = dict(name=result[NAME_KEY], occupation=result[OCCUPATION_KEY])
67 |     return dict(model_answer=json.dumps(result))
68 | 


--------------------------------------------------------------------------------
/guidance_programs/simple_biosbias_json_completion.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for working on the "produce JSON" task
 2 | # described by PASTA for the BIASBIOS dataset
 3 | # This version is for a completion model
 4 | 
 5 | import logging
 6 | import json
 7 | import sys
 8 | 
 9 | from textwrap import dedent
10 | from typing import Any, Dict
11 | 
12 | import guidance
13 | from guidance import gen
14 | 
15 | 
16 | _logger = logging.getLogger(__file__)
17 | _logger.setLevel(logging.INFO)
18 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
19 | 
20 | 
21 | @guidance
22 | def zeroshot_biosbias_json(lm: guidance.models.Model, short_biography: str):
23 |     lm += dedent(
24 |         f"""Instruct: You will be shown a short biography of a person. Extract their name and occupation, and return
25 |         a JSON object containing these two keys. 
26 | 
27 |         Output: {short_biography}
28 | """
29 |     )
30 |     lm += gen(name="model_answer")
31 | 
32 |     return lm
33 | 
34 | 
35 | def guidance_generation(
36 |     lm: guidance.models.Chat, input: Dict[str, Any], common: Any = None
37 | ) -> Dict[str, Any]:
38 |     _logger.debug("Starting guidance_generation")
39 |     if common is not None:
40 |         _logger.warn("Got unexpected 'common' argument")
41 |     result = lm + zeroshot_biosbias_json(short_biography=input["context"])
42 | 
43 |     result = dict(model_answer=result["model_answer"])
44 |     return result
45 | 


--------------------------------------------------------------------------------
/guidance_programs/simple_biosbias_json_completion_v2.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for working on the "produce JSON" task
 2 | # described by PASTA for the BIASBIOS dataset
 3 | # This version is for a completion model
 4 | 
 5 | import logging
 6 | import json
 7 | import sys
 8 | 
 9 | from textwrap import dedent
10 | from typing import Any, Dict
11 | 
12 | import guidance
13 | from guidance import gen
14 | 
15 | 
16 | _logger = logging.getLogger(__file__)
17 | _logger.setLevel(logging.INFO)
18 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
19 | 
20 | 
21 | @guidance
22 | def zeroshot_biosbias_json(lm: guidance.models.Model, short_biography: str):
23 |     lm += dedent(
24 |         f"""Answer the occupation of {short_biography} and generate the answer as json format.
25 |         Here is an example: {{"name": , "occupation": ,}}. 
26 |         Now generate the answer:
27 | """
28 |     )
29 |     _logger.info(f"lm: {lm}")
30 |     lm += gen(name="model_answer")
31 | 
32 |     return lm
33 | 
34 | 
35 | def guidance_generation(
36 |     lm: guidance.models.Chat, input: Dict[str, Any], common: Any = None
37 | ) -> Dict[str, Any]:
38 |     _logger.debug("Starting guidance_generation")
39 |     if common is not None:
40 |         _logger.warn("Got unexpected 'common' argument")
41 |     result = lm + zeroshot_biosbias_json(short_biography=input["context"])
42 | 
43 |     result = dict(model_answer=result["model_answer"])
44 |     return result
45 | 


--------------------------------------------------------------------------------
/guidance_programs/zero_or_few_shot.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for doing zero shot multiple choice questions
 2 | # It is not what generated the reported results
 3 | 
 4 | import logging
 5 | import sys
 6 | 
 7 | from typing import Any, Dict
 8 | 
 9 | import guidance
10 | from guidance import gen, select, system, user, assistant
11 | 
12 | 
13 | _logger = logging.getLogger(__file__)
14 | _logger.setLevel(logging.INFO)
15 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
16 | 
17 | 
18 | @guidance
19 | def zero_shot_multiple_choice(
20 |     lm: guidance.models.Chat,
21 |     question: str,
22 |     choices: list[str],
23 |     common: list[dict[str, Any]] | None,
24 | ):
25 |     # Some general instruction to the model
26 |     with system():
27 |         lm += """You are a student taking a multiple choice test.
28 | You will be shown a question, followed by numbered multiple choice answers.
29 | Response with the number corresponding to the best answer.
30 | """
31 | 
32 |         if common:
33 |             _logger.debug("Adding few shot examples")
34 |             lm += "\nHere are some examples to help you:\n\n"
35 |             for i, example in enumerate(common):
36 |                 lm += f"Example {i}\n"
37 |                 lm += example["question"] + "\n"
38 |                 for j, choice in enumerate(example["choices"]):
39 |                     lm += f"{j} : {choice}\n"
40 |                 lm += f"Correct Answer: {example['correct_answer']}\n\n"
41 | 
42 |             lm += "The question you need to answer will be shown next.\n\n"
43 | 
44 |     with user():
45 |         lm += question + "\n"
46 |         for i, choice in enumerate(choices):
47 |             lm += f"{i} : {choice}\n"
48 |         lm += "Correct Answer: "
49 | 
50 |     with assistant():
51 |         lm += select([str(i) for i in range(len(choices))], name="string_choice")
52 | 
53 |     return lm
54 | 
55 | 
56 | def guidance_generation(
57 |     lm: guidance.models.Chat,
58 |     input: Dict[str, Any],
59 |     common: list[dict[str, Any]] | None = None,
60 | ) -> Dict[str, Any]:
61 |     _logger.debug("Starting guidance_generation")
62 |     result = lm + zero_shot_multiple_choice(
63 |         question=input["question"], choices=input["choices"], common=common
64 |     )
65 | 
66 |     _logger.debug(f"Result: {result}")
67 | 
68 |     result = dict(zero_or_few_shot_choice=int(result["string_choice"]))
69 |     return result
70 | 


--------------------------------------------------------------------------------
/guidance_programs/zero_or_few_shot_alpha.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for doing zero shot multiple choice questions
 2 | # It is not what generated the reported results
 3 | 
 4 | import logging
 5 | import sys
 6 | 
 7 | from typing import Any, Dict
 8 | 
 9 | import guidance
10 | from guidance import select, system, user, assistant
11 | 
12 | 
13 | _logger = logging.getLogger(__file__)
14 | _logger.setLevel(logging.INFO)
15 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
16 | 
17 | ASCII_OFFSET = ord("a")
18 | 
19 | 
20 | @guidance
21 | def zero_shot_multiple_choice(
22 |     lm: guidance.models.Chat,
23 |     question: str,
24 |     choices: list[str],
25 |     common: list[dict[str, Any]] | None,
26 | ):
27 |     # Some general instruction to the model
28 |     with system():
29 |         lm += """You are a student taking a multiple choice test.
30 | You will be shown a question, followed by numbered multiple choice answers.
31 | Response with the number corresponding to the best answer.
32 | """
33 | 
34 |         if common:
35 |             _logger.debug("Adding few shot examples")
36 |             lm += "\nHere are some examples to help you:\n\n"
37 |             for i, example in enumerate(common):
38 |                 lm += f"Example {i}\n"
39 |                 lm += example["question"] + "\n"
40 |                 for j, choice in enumerate(example["choices"]):
41 |                     lm += f"{chr(j+ASCII_OFFSET)} : {choice}\n"
42 |                 lm += (
43 |                     f"Correct Answer: {chr(example['correct_answer']+ASCII_OFFSET)}\n\n"
44 |                 )
45 | 
46 |             lm += "The question you need to answer will be shown next.\n\n"
47 | 
48 |     with user():
49 |         lm += question + "\n"
50 |         for i, choice in enumerate(choices):
51 |             lm += f"{chr(i+ASCII_OFFSET)} : {choice}\n"
52 |         lm += "Correct Answer: "
53 | 
54 |     with assistant():
55 |         lm += select(
56 |             [chr(i + ASCII_OFFSET) for i in range(len(choices))], name="string_choice"
57 |         )
58 | 
59 |     return lm
60 | 
61 | 
62 | def guidance_generation(
63 |     lm: guidance.models.Chat,
64 |     input: Dict[str, Any],
65 |     common: list[dict[str, Any]] | None = None,
66 | ) -> Dict[str, Any]:
67 |     _logger.debug("Starting guidance_generation")
68 |     result = lm + zero_shot_multiple_choice(
69 |         question=input["question"], choices=input["choices"], common=common
70 |     )
71 | 
72 |     _logger.debug(f"Result: {result}")
73 |     int_result = ord(result["string_choice"]) - ASCII_OFFSET
74 | 
75 |     result = dict(zero_or_few_shot_choice=int_result)
76 |     return result
77 | 


--------------------------------------------------------------------------------
/guidance_programs/zero_or_few_shot_expert.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for doing zero shot multiple choice questions
 2 | # It is not what generated the reported results
 3 | 
 4 | import logging
 5 | import sys
 6 | 
 7 | from typing import Any, Dict
 8 | 
 9 | import guidance
10 | from guidance import gen, select, system, user, assistant
11 | 
12 | 
13 | _logger = logging.getLogger(__file__)
14 | _logger.setLevel(logging.INFO)
15 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
16 | 
17 | 
18 | @guidance
19 | def zero_shot_multiple_choice(
20 |     lm: guidance.models.Chat,
21 |     question: str,
22 |     choices: list[str],
23 |     common: list[dict[str, Any]] | None,
24 | ):
25 |     # Some general instruction to the model
26 |     with system():
27 |         lm += """You are an expert validating a multiple choice test.
28 | You will be shown a question, followed by numbered multiple choice answers.
29 | Use your vast expertise to respond with the number corresponding to the best answer.
30 | """
31 | 
32 |         if common:
33 |             _logger.debug("Adding few shot examples")
34 |             lm += "\nHere are some examples to help you:\n\n"
35 |             for i, example in enumerate(common):
36 |                 lm += f"Example {i}\n"
37 |                 lm += example["question"] + "\n"
38 |                 for j, choice in enumerate(example["choices"]):
39 |                     lm += f"{j} : {choice}\n"
40 |                 lm += f"Correct Answer: {example['correct_answer']}\n\n"
41 | 
42 |             lm += "The question you need to answer will be shown next.\n\n"
43 | 
44 |     with user():
45 |         lm += question + "\n"
46 |         for i, choice in enumerate(choices):
47 |             lm += f"{i} : {choice}\n"
48 |         lm += "Correct Answer: "
49 | 
50 |     with assistant():
51 |         lm += select([str(i) for i in range(len(choices))], name="string_choice")
52 | 
53 |     return lm
54 | 
55 | 
56 | def guidance_generation(
57 |     lm: guidance.models.Chat,
58 |     input: Dict[str, Any],
59 |     common: list[dict[str, Any]] | None = None,
60 | ) -> Dict[str, Any]:
61 |     _logger.debug("Starting guidance_generation")
62 |     result = lm + zero_shot_multiple_choice(
63 |         question=input["question"], choices=input["choices"], common=common
64 |     )
65 | 
66 |     _logger.debug(f"Result: {result}")
67 | 
68 |     result = dict(zero_or_few_shot_choice=int(result["string_choice"]))
69 |     return result
70 | 


--------------------------------------------------------------------------------
/guidance_programs/zero_or_few_shot_fortran.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for doing zero shot multiple choice questions
 2 | # It is not what generated the reported results
 3 | 
 4 | import logging
 5 | import sys
 6 | 
 7 | from typing import Any, Dict
 8 | 
 9 | import guidance
10 | from guidance import select, system, user, assistant
11 | 
12 | 
13 | _logger = logging.getLogger(__file__)
14 | _logger.setLevel(logging.INFO)
15 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
16 | 
17 | 
18 | @guidance
19 | def zero_shot_multiple_choice(
20 |     lm: guidance.models.Chat,
21 |     question: str,
22 |     choices: list[str],
23 |     common: list[dict[str, Any]] | None,
24 | ):
25 |     # Some general instruction to the model
26 |     with system():
27 |         lm += """You are a student taking a multiple choice test.
28 | You will be shown a question, followed by numbered multiple choice answers.
29 | Response with the number corresponding to the best answer.
30 | """
31 | 
32 |         if common:
33 |             _logger.debug("Adding few shot examples")
34 |             lm += "\nHere are some examples to help you:\n\n"
35 |             for i, example in enumerate(common):
36 |                 lm += f"Example {i}\n"
37 |                 lm += example["question"] + "\n"
38 |                 for j, choice in enumerate(example["choices"]):
39 |                     lm += f"{j+1} : {choice}\n"
40 |                 lm += f"Correct Answer: {example['correct_answer']+1}\n\n"
41 | 
42 |             lm += "The question you need to answer will be shown next.\n\n"
43 | 
44 |     with user():
45 |         lm += question + "\n"
46 |         for i, choice in enumerate(choices):
47 |             lm += f"{i+1} : {choice}\n"
48 |         lm += "Correct Answer: "
49 | 
50 |     with assistant():
51 |         lm += select([str(i + 1) for i in range(len(choices))], name="string_choice")
52 | 
53 |     return lm
54 | 
55 | 
56 | def guidance_generation(
57 |     lm: guidance.models.Chat,
58 |     input: Dict[str, Any],
59 |     common: list[dict[str, Any]] | None = None,
60 | ) -> Dict[str, Any]:
61 |     _logger.debug("Starting guidance_generation")
62 |     result = lm + zero_shot_multiple_choice(
63 |         question=input["question"], choices=input["choices"], common=common
64 |     )
65 | 
66 |     _logger.debug(f"Result: {result}")
67 |     int_result = int(result["string_choice"])
68 | 
69 |     result = dict(zero_or_few_shot_choice=int_result - 1)
70 |     return result
71 | 


--------------------------------------------------------------------------------
/guidance_programs/zero_shot_cot.py:
--------------------------------------------------------------------------------
 1 | # This is a very naive guidance program for doing zero shot multiple choice questions
 2 | # with chain-of-thought prompting
 3 | # It is not what generated the reported results
 4 | 
 5 | import logging
 6 | import sys
 7 | 
 8 | from textwrap import dedent
 9 | from typing import Any, Dict
10 | 
11 | import guidance
12 | from guidance import gen, select, system, user, assistant
13 | 
14 | 
15 | _logger = logging.getLogger(__file__)
16 | _logger.setLevel(logging.INFO)
17 | _logger.addHandler(logging.StreamHandler(stream=sys.stdout))
18 | 
19 | 
20 | ANSWER_KEY = "string_choice"
21 | COT_KEY = "explanation"
22 | 
23 | 
24 | @guidance
25 | def zero_shot_cot_multiple_choice(
26 |     lm: guidance.models.Chat, question: str, choices: list[str]
27 | ):
28 |     # Some general instruction to the model
29 |     with system():
30 |         lm += dedent(
31 |             """Answer the following multiple choice **Question**.
32 |             First, think step by step and write an **Explanation** for reasoning through the question.
33 |             Then, when prompted by the user for a **Final Answer**, analyze your explanation and write just the number of the correct answer.
34 |             Do not say the final answer until the user asks for it."""
35 |         )
36 | 
37 |     with user():
38 |         lm += "**Question**\n"
39 |         lm += question + "\n"
40 |         for i, choice in enumerate(choices):
41 |             lm += f"{i} : {choice}" + "\n"
42 |         lm += "**Explanation**"
43 | 
44 |     with assistant():
45 |         lm += gen(name=COT_KEY)
46 | 
47 |     response_choices = [str(i) for i in range(len(choices))]
48 |     with user():
49 |         lm += f"**Final Answer**"
50 | 
51 |     with assistant():
52 |         lm += select(response_choices, name=ANSWER_KEY)
53 | 
54 |     return lm
55 | 
56 | 
57 | def guidance_generation(
58 |     lm: guidance.models.Chat, input: Dict[str, Any], common: Any = None
59 | ) -> Dict[str, Any]:
60 |     _logger.debug("Starting guidance_generation")
61 |     if common is not None:
62 |         _logger.warn("Got unexpected 'common' argument")
63 |     result = lm + zero_shot_cot_multiple_choice(
64 |         question=input["question"], choices=input["choices"]
65 |     )
66 | 
67 |     result = dict(
68 |         zeroshot_cot_choice=int(result[ANSWER_KEY]), zeroshot_cot=result[COT_KEY]
69 |     )
70 |     return result
71 | 


--------------------------------------------------------------------------------
/images/medprompt_radar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/promptbase/bf5d0dcc7f92650e50f351bf3878efbeb6dae385/images/medprompt_radar.png


--------------------------------------------------------------------------------
/images/medprompt_sa_graphic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/promptbase/bf5d0dcc7f92650e50f351bf3878efbeb6dae385/images/medprompt_sa_graphic.png


--------------------------------------------------------------------------------
/images/mmlu_accuracy_ablation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/promptbase/bf5d0dcc7f92650e50f351bf3878efbeb6dae385/images/mmlu_accuracy_ablation.png


--------------------------------------------------------------------------------
/src/promptbase/__init__.py:
--------------------------------------------------------------------------------
1 | from . import utils, gsm8k
2 | 


--------------------------------------------------------------------------------
/src/promptbase/__main__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | 
 4 | from promptbase.gsm8k import gsm8k
 5 | from promptbase.humaneval import humaneval
 6 | from promptbase.math import math
 7 | from promptbase.drop import drop
 8 | from promptbase.bigbench import bigbench
 9 | from promptbase.bigbench.consts import BIGBENCH_SUBJECTS
10 | 
11 | import promptbase.mmlu as mmlu
12 | 
13 | logging.basicConfig(level=logging.INFO)
14 | 
15 | VALID_DATASETS = ["gsm8k", "humaneval", "math", "drop", "bigbench", "mmlu"]
16 | 
17 | 
18 | def parse_arguments():
19 |     p = argparse.ArgumentParser()
20 |     p.add_argument(
21 |         "dataset", type=str, choices=VALID_DATASETS, help="Name of dataset to test"
22 |     )
23 |     p.add_argument("--subject", type=str, help="Specify the subject for the dataset")
24 |     p.add_argument(
25 |         "--mode",
26 |         type=str,
27 |         default="chat",
28 |         choices=["chat", "completion"],
29 |         help="Prompting mode for the model (chat or completion)",
30 |     )
31 |     p.add_argument(
32 |         "--list_subjects",
33 |         action="store_true",
34 |         help="Lists the subjects available for the dataset",
35 |     )
36 |     p.add_argument(
37 |         "--overwrite",
38 |         action="store_true",
39 |         help="Overwrites the results of a previous run",
40 |     )
41 |     return p.parse_args()
42 | 
43 | 
44 | def main():
45 |     args = parse_arguments()
46 | 
47 |     if args.list_subjects:
48 |         if args.dataset == "bigbench":
49 |             print(BIGBENCH_SUBJECTS)
50 |         elif args.dataset == "mmlu":
51 |             pass
52 |         else:
53 |             print(f"Dataset {args.dataset} does not have subjects")
54 |         return
55 | 
56 |     mode = args.mode
57 | 
58 |     if args.dataset == "gsm8k":
59 |         gsm8k.generate()
60 |         gsm8k.evaluate()
61 |     elif args.dataset == "humaneval":
62 |         humaneval.generate()
63 |         humaneval.evaluate()
64 |     elif args.dataset == "math":
65 |         math.generate()
66 |         math.evaluate()
67 |     elif args.dataset == "drop":
68 |         drop.generate()
69 |         drop.evaluate()
70 |     elif args.dataset == "bigbench":
71 |         subject = args.subject if args.subject else "all"
72 |         overwrite = args.overwrite
73 |         bigbench.generate(subject, overwrite, mode)
74 |         bigbench.evaluate(mode)
75 |     elif args.dataset == "mmlu":
76 |         # Note that to run the MMLU tests, you will need to download the
77 |         # data, and then use the 'format_mmlu.py' script
78 |         mmlu.generate(args.subject)
79 |         mmlu.evaluate_all(args.subject)
80 |     else:
81 |         raise ValueError(f"Bad dataset: {args.dataset}")
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     main()
86 | 


--------------------------------------------------------------------------------
/src/promptbase/bigbench/__init__.py:
--------------------------------------------------------------------------------
1 | from .bigbench import generate, evaluate
2 | from .consts import *


--------------------------------------------------------------------------------
/src/promptbase/bigbench/bigbench.py:
--------------------------------------------------------------------------------
 1 | from .bigbench_cot import process_cot
 2 | from .bigbench_score import score
 3 | from .bigbench_answer import process_answers
 4 | from promptbase.bigbench.consts import BIGBENCH_SUBJECTS
 5 | 
 6 | def generate(subject: str, overwrite: bool, mode="chat"):
 7 |   if subject != "all" and subject not in BIGBENCH_SUBJECTS:
 8 |     print(f"Invalid subject: {subject}")
 9 |     return
10 |   print(f"Running BigBench generation for subject {subject}")
11 |   process_cot(subject, overwrite, mode)
12 |   process_answers(subject, overwrite, mode)
13 | 
14 | def evaluate(mode="chat"):
15 |   score(mode)


--------------------------------------------------------------------------------
/src/promptbase/bigbench/bigbench_score.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import json
 3 | import os
 4 | import pathlib
 5 | 
 6 | from promptbase.utils.helpers import get_datasets_path, get_generations_path, get_standard_logger_for_file
 7 | 
 8 | _logger = get_standard_logger_for_file(__file__)
 9 | 
10 | def score(api_type="chat"):
11 |     ground_truth_dir = get_datasets_path() / "BigBench" / "bbh"
12 |     if not ground_truth_dir.exists():
13 |         _logger.error(f"Ground truth directory {ground_truth_dir} does not exist")
14 |         return
15 |     answer_dir = get_generations_path() / "bigbench" / "answers" / api_type
16 | 
17 |     score_dict = {}
18 | 
19 |     # loop through json files in ground truth path
20 |     for gt_filename in os.listdir(ground_truth_dir):
21 |         if not gt_filename.endswith(".json"):
22 |             _logger.warn("Skipping non-json file: " + gt_filename)
23 |             continue
24 |         _logger.info("Processing file: " + gt_filename)
25 |         fname_base = gt_filename.split(".")[0]
26 |         answer_path = answer_dir / f"{fname_base}_{api_type}_answers.json"
27 |         if not os.path.exists(answer_path):
28 |             _logger.warn("Answer file does not exist: %s", answer_path)
29 |             continue
30 |         with open(ground_truth_dir / gt_filename) as f:
31 |             ground_truth_data = json.load(f)
32 |         with open(answer_path) as f:
33 |             answer_data = json.load(f)
34 | 
35 |         _logger.info("Number of ground truth examples: %s", str(len(ground_truth_data["examples"])))
36 |         _logger.info("Number of answer examples: %s", str(len(answer_data)))
37 |         if len(ground_truth_data["examples"]) != len(answer_data):
38 |             _logger.warn("Number of examples does not match for file: %s", gt_filename)
39 |             continue
40 | 
41 |         correct_count = 0
42 |         total_count = len(ground_truth_data["examples"])
43 | 
44 |         for i, gt in enumerate(ground_truth_data["examples"]):
45 |             if gt["target"] == answer_data[i]["completion"]:
46 |                 correct_count += 1
47 | 
48 |         score_dict[fname_base] = {
49 |             "correct": correct_count,
50 |             "total": total_count,
51 |             "score": correct_count / total_count,
52 |         }
53 | 
54 |     total_correct = 0
55 |     total_overall = 0
56 |     for k, v in score_dict.items():
57 |         total_correct += v["correct"]
58 |         total_overall += v["total"]
59 | 
60 |     score_dict["overall"] = {
61 |         "correct": total_correct,
62 |         "total": total_overall,
63 |         "score": total_correct / total_overall,
64 |     }
65 | 
66 |     print("Final scores:", score_dict)
67 | 
68 |     # save as json file
69 |     timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
70 |     score_dir = get_generations_path() / "bigbench" / "scores"
71 |     score_dir.mkdir(parents=True, exist_ok=True)
72 |     with open(score_dir / f"bigbench_scores_{api_type}_{timestamp}.json", "w") as f:
73 |         json.dump(score_dict, f)
74 | 


--------------------------------------------------------------------------------
/src/promptbase/bigbench/consts.py:
--------------------------------------------------------------------------------
 1 | BIGBENCH_SUBJECTS = [
 2 |     "boolean_expressions",
 3 |     "causal_judgement",
 4 |     "date_understanding",
 5 |     "disambiguation_qa",
 6 |     "dyck_languages",
 7 |     "formal_fallacies",
 8 |     "geometric_shapes",
 9 |     "hyperbaton",
10 |     "logical_deduction_five_objects",
11 |     "logical_deduction_seven_objects",
12 |     "logical_deduction_three_objects",
13 |     "movie_recommendation",
14 |     "multistep_arithmetic_two",
15 |     "navigate",
16 |     "object_counting",
17 |     "penguins_in_a_table",
18 |     "reasoning_about_colored_objects",
19 |     "ruin_names",
20 |     "salient_translation_error_detection",
21 |     "snarks",
22 |     "sports_understanding",
23 |     "temporal_sequences",
24 |     "tracking_shuffled_objects_five_objects",
25 |     "tracking_shuffled_objects_seven_objects",
26 |     "tracking_shuffled_objects_three_objects",
27 |     "web_of_lies",
28 |     "word_sorting",
29 | ]
30 | 


--------------------------------------------------------------------------------
/src/promptbase/datasets/put_datasets_here.txt:
--------------------------------------------------------------------------------
1 | Datasets will be loaded from this folder. Put your datasets here as instructed in the readme.


--------------------------------------------------------------------------------
/src/promptbase/drop/__init__.py:
--------------------------------------------------------------------------------
1 | from .drop import generate, evaluate
2 | 


--------------------------------------------------------------------------------
/src/promptbase/format/format_hellaswag.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import csv
 4 | import uuid
 5 | 
 6 | 
 7 | train_path = "../datasets/hellaswag_train.jsonl"
 8 | test_path = "../datasets/hellaswag_test.jsonl"
 9 | val_path = "../datasets/hellaswag_val.jsonl"
10 | 
11 | 
12 | def process_jsonl_file(file_path, split_name):
13 |     questions = []
14 |     with open(file_path, "r", encoding="utf-8") as file:
15 |         lines = file.readlines()
16 |         for i, json_line in enumerate(lines):
17 |             question_data = json.loads(json_line)
18 |             answer_choices = {
19 |                 chr(65 + i): answer for i, answer in enumerate(question_data["endings"])
20 |             }
21 | 
22 |             question_dict = {
23 |                 "question_number": f"{question_data['ind']}",
24 |                 "question": question_data["ctx"],
25 |                 "correct_answer": chr(65 + question_data["label"]),
26 |                 "has_media": False,  # Assuming no media in MMLU dataset
27 |                 "dataset": "hellaswag",
28 |                 "id": f"{uuid.uuid4()}",
29 |                 "split": split_name,
30 |                 "extra": question_data[
31 |                     "activity_label"
32 |                 ],  # Any extra information, if needed
33 |                 "answer_choices": answer_choices,
34 |             }
35 |             questions.append(question_dict)
36 |     return questions
37 | 
38 | 
39 | train_questions = process_jsonl_file(train_path, "train")
40 | # test_questions = process_jsonl_file(test_path, "test")
41 | val_questions = process_jsonl_file(val_path, "val")
42 | 
43 | print("Train questions: ", len(train_questions))
44 | # print("Test questions: ", len(test_questions))
45 | print("Val questions: ", len(val_questions))
46 | 
47 | # all_questions = train_questions + test_questions + val_questions
48 | all_questions = train_questions + val_questions
49 | 
50 | with open("hellaswag.json", "w", encoding="utf-8") as json_file:
51 |     json.dump(all_questions, json_file, ensure_ascii=False, indent=4)
52 | 


--------------------------------------------------------------------------------
/src/promptbase/format/format_mmlu.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import csv
 3 | import json
 4 | import pathlib
 5 | import uuid
 6 | 
 7 | 
 8 | ALL_QUESTIONS = "all_questions.json"
 9 | ALL_FILENAME_FORMAT = "mmlu_all_{0}.json"
10 | 
11 | 
12 | def parse_arguments():
13 |     parser = argparse.ArgumentParser()
14 | 
15 |     parser.add_argument("--mmlu_csv_dir", type=pathlib.Path, required=True)
16 |     parser.add_argument("--output_path", type=pathlib.Path, required=True)
17 | 
18 |     args = parser.parse_args()
19 | 
20 |     return args
21 | 
22 | 
23 | # Function to process a single CSV file and return a list of question dictionaries
24 | def process_csv_file(file_path: pathlib.Path, split_name: str):
25 |     questions = []
26 |     with open(file_path, "r", encoding="utf-8") as file:
27 |         csv_reader = csv.reader(file)
28 |         for i, row in enumerate(csv_reader):
29 |             question_text, *answers, correct_answer = row
30 |             answer_choices = {chr(65 + i): answer for i, answer in enumerate(answers)}
31 |             test_name = file_path.stem
32 | 
33 |             question_dict = {
34 |                 "question_number": f"{test_name}_{i}",
35 |                 "question": question_text,
36 |                 "correct_answer": correct_answer,
37 |                 "has_media": False,  # Assuming no media in MMLU dataset
38 |                 "dataset": "MMLU",
39 |                 "id": f"{uuid.uuid4()}",
40 |                 "split": split_name,
41 |                 "extra": test_name,  # Any extra information, if needed
42 |                 "answer_choices": answer_choices,
43 |             }
44 |             questions.append(question_dict)
45 |     return questions
46 | 
47 | 
48 | def main(mmlu_csv_dir: pathlib.Path, output_path: pathlib.Path):
49 |     assert mmlu_csv_dir.is_dir()
50 |     assert output_path.is_dir()
51 |     all_questions = []
52 | 
53 |     splits = dict(
54 |         train=mmlu_csv_dir / "auxiliary_train",
55 |         dev=mmlu_csv_dir / "dev",
56 |         test=mmlu_csv_dir / "test",
57 |         val=mmlu_csv_dir / "val",
58 |     )
59 |     all_questions_split = dict(train=[], dev=[], test=[], val=[])
60 | 
61 |     for split_name, split_path in splits.items():
62 |         for csv_file in split_path.iterdir():
63 |             questions = process_csv_file(csv_file, split_name)
64 |             print(json.dumps(questions[3], indent=4, ensure_ascii=False))
65 |             file_path = output_path / f"mmlu_{csv_file.stem}.json"
66 |             print(f"Writing {file_path}")
67 |             with open(
68 |                 file_path,
69 |                 "w",
70 |                 encoding="utf-8",
71 |             ) as json_file:
72 |                 json.dump(questions, json_file, ensure_ascii=False, indent=4)
73 |             all_questions.extend(questions)
74 |             all_questions_split[split_name].extend(questions)
75 | 
76 |     print("Writing all questions")
77 |     with open(output_path / ALL_QUESTIONS, "w", encoding="utf-8") as json_file:
78 |         json.dump(all_questions, json_file, ensure_ascii=False, indent=4)
79 | 
80 |     print("Writing all question splits")
81 |     for split_name, split_questions in all_questions_split.items():
82 |         file_path = output_path / ALL_FILENAME_FORMAT.format(split_name)
83 |         print(f"Writing out all questions for split {split_name} to {file_path}")
84 |         with open(file_path, "w", encoding="utf-8") as json_file:
85 |             json.dump(split_questions, json_file, ensure_ascii=False, indent=4)
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     args = parse_arguments()
90 |     main(args.mmlu_csv_dir, args.output_path)
91 | 


--------------------------------------------------------------------------------
/src/promptbase/generations/README.md:
--------------------------------------------------------------------------------
1 | # About
2 | 
3 | This directory is used to store any generated output from language models. For example, intermediate results from chain-of-thought prompting could be stored here.


--------------------------------------------------------------------------------
/src/promptbase/gsm8k/__init__.py:
--------------------------------------------------------------------------------
1 | from .gsm8k import generate, evaluate
2 | 


--------------------------------------------------------------------------------
/src/promptbase/gsm8k/gsm8k.py:
--------------------------------------------------------------------------------
  1 | # generate.py
  2 | import json
  3 | import pathlib
  4 | 
  5 | from promptbase.utils.helpers import text_completion, run_batch_jobs
  6 | from datasets import load_dataset
  7 | 
  8 | 
  9 | my_path = pathlib.Path(__file__).parent.resolve()
 10 | 
 11 | 
 12 | def extract_substrings(text):
 13 |     parts = text.split(r"\boxed")
 14 |     matches = []
 15 | 
 16 |     for part in parts[1:]:  # Skip the first part as it does not start with \boxed
 17 |         if part.startswith("{"):
 18 |             brace_level = 0
 19 |             for i, char in enumerate(part):
 20 |                 if char == "{":
 21 |                     brace_level += 1
 22 |                 elif char == "}":
 23 |                     brace_level -= 1
 24 |                     if brace_level == 0:
 25 |                         matches.append(
 26 |                             part[1:i]
 27 |                         )  # Extract the content inside the braces
 28 |                         break
 29 | 
 30 |     if len(matches) == 0:
 31 |         return None
 32 | 
 33 |     return matches[0]
 34 | 
 35 | 
 36 | def solve(task):
 37 |     idx, prompt = task
 38 | 
 39 |     for retry in range(5):
 40 |         response = text_completion(
 41 |             prompt=prompt,
 42 |             max_tokens=1200 + retry * 500,
 43 |             log_file="gsm8k.log",
 44 |             max_trial=5,
 45 |             temperature=retry * 0.5,
 46 |             model="gpt-4-1106-preview",
 47 |         )
 48 | 
 49 |         if not response["success"]:
 50 |             answer = None
 51 |             text = None
 52 |         else:
 53 |             text = response["text"]
 54 |             answer = extract_substrings(text)
 55 | 
 56 |         if answer:
 57 |             break
 58 | 
 59 |     if answer:
 60 |         with open(my_path.parent / "generations" / "gsm8k.jsonl", "a") as f:
 61 |             f.write(json.dumps({"idx": idx, "answer": answer, "proof": text}) + "\n")
 62 | 
 63 | 
 64 | def generate():
 65 |     ds = load_dataset("gsm8k", "main")["test"]
 66 |     tasks = []
 67 |     for idx, row in enumerate(ds):
 68 |         prompt = (
 69 |             row["question"]
 70 |             + "\nPlease end your solution with Answer: $\\boxed{number}$ where number is the numerical answer without unit.\nSolution:"
 71 |         )
 72 |         tasks.append((idx, prompt))
 73 |     run_batch_jobs(solve, tasks, max_thread=20)
 74 | 
 75 | 
 76 | def evaluate():
 77 |     rows = []
 78 |     ds = load_dataset("gsm8k", "main")["test"]
 79 |     with open(my_path.parent / "generations" / "gsm8k.jsonl", "r") as f:
 80 |         for line in f:
 81 |             row = json.loads(line)
 82 |             row["answer"] = extract_substrings(row["proof"])
 83 |             rows.append(row)
 84 | 
 85 |     def check_answer(official, student):
 86 |         return abs(official - student) < (abs(official) + 1e-6) * 1e-6
 87 | 
 88 |     n_correct = 0
 89 |     for i, row in enumerate(rows):
 90 |         idx = row["idx"]
 91 |         gpt_answer = None
 92 |         official_answer = None
 93 |         official_answer = ds[idx]["answer"].split("####")[1].replace(",", "")
 94 | 
 95 |         try:
 96 |             gpt_answer = (
 97 |                 row["answer"].replace(",", "").split("\n## ")[0].replace("\%", "")
 98 |             )
 99 | 
100 |             if gpt_answer == official_answer:
101 |                 n_correct += 1
102 |                 continue
103 | 
104 |             official_float = float(official_answer)
105 |             gpt_float = float(gpt_answer)
106 |             n_correct += check_answer(official_float, gpt_float)
107 |             continue
108 |         except:
109 |             with open("parse.txt", "a") as f:
110 |                 f.write("=" * 80 + "\n")
111 |                 f.write(f"idx:{idx}\n")
112 |                 f.write("official_answer:" + str(official_answer) + "\n")
113 |                 f.write("gpt_answer:" + str(gpt_answer) + "\n")
114 |                 f.write("-" * 40 + "\n")
115 |                 f.write(ds[idx]["answer"] + "\n")
116 |                 f.write("-" * 40 + "\n")
117 |                 f.write(row["proof"] + "\n")
118 | 
119 |     print(
120 |         "n_correct:",
121 |         n_correct,
122 |         "n_total:",
123 |         len(rows),
124 |         "accuracy:",
125 |         n_correct / len(rows),
126 |     )
127 | 


--------------------------------------------------------------------------------
/src/promptbase/humaneval/__init__.py:
--------------------------------------------------------------------------------
1 | from .humaneval import generate, evaluate
2 | 


--------------------------------------------------------------------------------
/src/promptbase/humaneval/humaneval.py:
--------------------------------------------------------------------------------
  1 | # Generate
  2 | import hashlib
  3 | import json
  4 | import math
  5 | import re
  6 | import traceback
  7 | from promptbase import utils
  8 | from datasets import load_dataset
  9 | from collections import Counter
 10 | 
 11 | _logger = utils.helpers.get_standard_logger_for_file(__file__)
 12 | 
 13 | prompts = []
 14 | chat_mode = False
 15 | ds = None
 16 | 
 17 | 
 18 | def fetch_data():
 19 |     _logger.info("Starting fetch_data")
 20 |     global prompts
 21 |     global ds
 22 |     # data_file = utils.fetch_dataset_blob("humaneval")
 23 |     ds = load_dataset("openai_humaneval")  # Dataset.from_file(data_file
 24 |     _logger.info("Dataset downloaded; starting processing of test split")
 25 |     for row in ds["test"]:
 26 |         if chat_mode:
 27 |             prompt = (
 28 |                 row["prompt"]
 29 |                 + "\n\nPlease complete the function above together with the function header."
 30 |             )
 31 |         else:
 32 |             prompt = (
 33 |                 "## Here is the official solution of one python exercise via only one function:\n"
 34 |                 + row["prompt"]
 35 |             )  # 118
 36 |             # prompt = f"## Solution of the coding exercise `{row['entry_point']}`:\n" + row["prompt"]
 37 |             # prompt = f"## Official solution of the coding exercise `{row['entry_point']}`:\n" + row["prompt"]
 38 |         prompts.append(prompt)
 39 |     _logger.info("Completed fetch_data")
 40 | 
 41 | 
 42 | def extract_substrings(text):
 43 |     return re.findall(r"```(.*?)```", text, re.DOTALL)
 44 | 
 45 | 
 46 | def solve(idx):
 47 |     global prompts
 48 |     _logger.info(f"Starting solve for index {idx}")
 49 | 
 50 |     for retry in range(5):
 51 |         response = utils.helpers.text_completion(
 52 |             prompt=prompts[idx],
 53 |             max_tokens=600,
 54 |             log_file="human_eval.log",
 55 |             max_trial=5,
 56 |             temperature=retry * 0.05,
 57 |             model="gpt-4-1106-preview",
 58 |             stop=["##"],
 59 |         )
 60 | 
 61 |         if not response["success"]:
 62 |             code = None
 63 |         else:
 64 |             if chat_mode:
 65 |                 text = response["text"]
 66 |                 substrings = extract_substrings(text)
 67 |                 substrings = [s for s in substrings if "def " in s]
 68 |                 code = max(substrings, key=len, default="") if substrings else None
 69 |             else:
 70 |                 code = prompts[idx] + response["text"]
 71 | 
 72 |         if code:
 73 |             break
 74 | 
 75 |     if code:
 76 |         with open("gpt4.jsonl", "a") as f:
 77 |             f.write(json.dumps({"idx": idx, "code": code}) + "\n")
 78 | 
 79 | 
 80 | def generate():
 81 |     fetch_data()
 82 |     _logger.info("Running bach jobs")
 83 |     utils.helpers.run_batch_jobs(solve, range(len(prompts)), max_thread=20)
 84 | 
 85 | 
 86 | def evaluate():
 87 |     _logger.info("Starting evaluate")
 88 |     # open gpt4.jsonl
 89 |     rows = []
 90 |     with open("gpt4.jsonl") as f:
 91 |         for line in f:
 92 |             rows.append(json.loads(line))
 93 | 
 94 |     env = {
 95 |         "hashlib": hashlib,
 96 |         "re": re,
 97 |         "Counter": Counter,
 98 |         "factorial": math.factorial,
 99 |     }
100 |     n_success = 0
101 |     for row in rows:
102 |         code = row["code"]
103 |         if code.startswith("python"):
104 |             code = code[6:]
105 |         code = (
106 |             code.split("# Test")[0]
107 |             .split("# test")[0]
108 |             .split("\nprint")[0]
109 |             .split("\nassert")[0]
110 |             .split("# END")[0]
111 |             .split("<|ipynb_marker|>")[0]
112 |             .split("\n# Check your answer")[0]
113 |         )
114 |         code += (
115 |             "\n"
116 |             + ds["test"][row["idx"]]["test"]
117 |             + "\ncheck("
118 |             + ds["test"][row["idx"]]["entry_point"]
119 |             + ")"
120 |         )
121 | 
122 |         try:
123 |             exec(code, env, env)
124 |             n_success += 1
125 |         except Exception as e:
126 |             err = traceback.format_exc()
127 |             if "AssertionError" not in err:
128 |                 print(traceback.format_exc())
129 |                 print(code)
130 |                 print("=" * 100)
131 |             n_success += 0
132 | 
133 |     _logger.info(f"Number of successes: {n_success}")
134 |     _logger.info(f"Number of rows: {len(rows)}")
135 |     _logger.info(f"Success rate: {n_success / len(rows)}")
136 | 


--------------------------------------------------------------------------------
/src/promptbase/math/__init__.py:
--------------------------------------------------------------------------------
1 | from .math import generate, evaluate
2 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/__init__.py:
--------------------------------------------------------------------------------
1 | # from .problem_utils import *
2 | 
3 | from .generate import generate
4 | from .eval import evaluate_all
5 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/analyze.py:
--------------------------------------------------------------------------------
  1 | from .problem_utils import *
  2 | 
  3 | test_problem = "MMLU_test_chemistry"
  4 | 
  5 | subjects = (
  6 |     """Astronomy
  7 | College Biology
  8 | College Chemistry
  9 | College Mathematics
 10 | College Medicine
 11 | College Physics
 12 | Conceptual Physics
 13 | Econometrics
 14 | Electrical Engineering
 15 | Elementary Mathematics
 16 | High School Biology
 17 | High School Chemistry
 18 | High School Macroeconomics
 19 | High School Mathematics
 20 | High School Microeconomics
 21 | High School Physics
 22 | High School Statistics
 23 | Machine Learning
 24 | Professional Accounting
 25 | Professional Medicine""".replace(
 26 |         " ", "_"
 27 |     )
 28 |     .lower()
 29 |     .split("\n")
 30 | )
 31 | 
 32 | 
 33 | # Load problems
 34 | cot_rows_list = [
 35 |     load_problems(f"expt/{test_problem}/cot_knn/result"),
 36 |     load_problems(f"expt/{test_problem}/cot_via_knn/result"),
 37 | ]
 38 | 
 39 | 
 40 | def merge_ds(dataset_list):
 41 |     cot_rows = {}
 42 |     for rows_set in dataset_list:
 43 |         for row in rows_set:
 44 |             if row["question_number"] not in cot_rows:
 45 |                 cot_rows[row["question_number"]] = copy.copy(row)
 46 |                 cot_rows[row["question_number"]]["expt"] = {}
 47 |             if "expt" in row and row["expt"]:
 48 |                 for key in row["expt"]:
 49 |                     cot_rows[row["question_number"]]["expt"][key] = row["expt"][key]
 50 |     return list(cot_rows.values())
 51 | 
 52 | 
 53 | cot_rows = merge_ds(cot_rows_list)
 54 | logprobs_rows = load_problems(f"expt/{test_problem}/logprobs5/result")
 55 | 
 56 | if cot_rows:
 57 |     print("Number of COT:", len(cot_rows[42]["expt"].keys()))
 58 | if logprobs_rows:
 59 |     print("Number of logprobs:", len(logprobs_rows[42]["expt"].keys()))
 60 | 
 61 | # Merge datasets
 62 | rows = {}
 63 | for row in cot_rows:
 64 |     key = row["question_number"]
 65 |     if key not in rows:
 66 |         rows[key] = {}
 67 |     rows[key]["question"] = row["question"]
 68 |     rows[key]["subject"] = row["extra"].replace("_test", "").replace("_dev", "")
 69 |     rows[key]["answer"] = row["correct_answer"]
 70 |     expts = row["expt"]
 71 |     rows[key]["cot"] = [
 72 |         expts[expt]["answer"]
 73 |         for expt in expts
 74 |         if expts[expt].get("answer", None) is not None
 75 |     ]
 76 | 
 77 | for row in logprobs_rows:
 78 |     key = row["question_number"]
 79 |     if key not in rows:
 80 |         rows[key] = {}
 81 |     rows[key]["question"] = row["question"]
 82 |     rows[key]["subject"] = row["extra"].replace("_test", "").replace("_dev", "")
 83 |     rows[key]["answer"] = row["correct_answer"]
 84 |     expts = row["expt"]
 85 |     rows[key]["logprobs"] = [
 86 |         expts[expt]["scores"]
 87 |         for expt in expts
 88 |         if expts[expt].get("scores", None) is not None
 89 |     ]
 90 | 
 91 | rows = list(rows.values())
 92 | 
 93 | n_correct = 0
 94 | for row in rows:
 95 |     if "cot" in row:
 96 |         x = Counter(row["cot"])
 97 |         for k in x:
 98 |             x[k] /= len(row["cot"])
 99 |     else:
100 |         x = {}
101 | 
102 |     if "logprobs" in row:
103 |         for e in row["logprobs"]:
104 |             for k in e:
105 |                 if k not in x:
106 |                     x[k] = 0
107 |                 if row["subject"] in subjects:
108 |                     x[k] += 0.5 * e[k] / len(row["logprobs"])
109 |                 else:
110 |                     x[k] += 2.0 * e[k] / len(row["logprobs"])
111 | 
112 |     if x:
113 |         selected_answer = max(x, key=x.get)
114 |         if row["answer"] == selected_answer:
115 |             n_correct += 1
116 |     else:
117 |         n_correct += 1 / 4
118 | 
119 | print("Number of questions:", len(rows))
120 | print("Number of correct answers:", n_correct)
121 | print("Accuracy:", n_correct / len(rows))
122 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/embed_problems.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | from tqdm import tqdm
 3 | from .eval import *
 4 | from .utils import *
 5 | 
 6 | 
 7 | def embed_file(file_name):
 8 |     ds = load_json_file(file_name)
 9 |     questions = [row["question"] for row in ds]
10 |     embeddings = embed_batch(questions)
11 |     for row, embedding in tqdm(zip(ds, embeddings)):
12 |         row["embedding"] = embedding
13 | 
14 |     with gzip.open(file_name + ".gz", "wt") as f:
15 |         json.dump(ds, f)
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     parser = argparse.ArgumentParser()
20 |     parser.add_argument("question_file", help="The JSON file containing user answers")
21 |     args = parser.parse_args()
22 | 
23 |     embed_file(args.question_file)
24 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/eval.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import json
 3 | import pathlib
 4 | 
 5 | import sklearn.metrics as skm
 6 | 
 7 | from .mmlu_paths import mmlu_data_dir, mmlu_generations_dir
 8 | 
 9 | API_DATA_KEYS = ["api_calls", "tokens_used_prompt", "tokens_used_completion"]
10 | 
11 | 
12 | def load_json_file(file_path):
13 |     if type(file_path) is str:
14 |         file_path = pathlib.Path(file_path)
15 | 
16 |     gz_path = file_path.with_suffix(file_path.suffix + ".gz")
17 |     print(f"Looking for: {gz_path}")
18 |     if gz_path.exists():
19 |         print("Found zip file")
20 |         with gzip.open(gz_path, "rt") as f:
21 |             return json.load(f)
22 |     else:
23 |         print("Found regular file")
24 |         with open(file_path, "r", encoding="utf-8") as f:
25 |             return json.load(f)
26 | 
27 | 
28 | def eval_answers(all_questions) -> dict[str, any]:
29 |     y_true = []
30 |     y_pred = []
31 |     answer_counts = []
32 |     skipped = 0
33 |     for item in all_questions:
34 |         answer_voting = dict()
35 |         for response in item["expt"].values():
36 |             if response["answer"] in answer_voting:
37 |                 answer_voting[response["answer"]] += 1
38 |             else:
39 |                 answer_voting[response["answer"]] = 1
40 |         best_answer = ""
41 |         best_count = 0
42 |         for k, v in answer_voting.items():
43 |             if v > best_count:
44 |                 best_answer = k
45 |         if not best_answer:
46 |             skipped += 1
47 |             continue
48 |         y_true.append(item["correct_answer"])
49 |         answer_counts.append(len(answer_voting))
50 |         y_pred.append(best_answer)
51 | 
52 |     result = dict()
53 |     result["count"] = len(y_true)
54 |     result["accuracy"] = skm.accuracy_score(y_true, y_pred)
55 |     result["skipped"] = skipped
56 |     result["mean_different_answers"] = sum(answer_counts) / len(answer_counts)
57 | 
58 |     return result
59 | 
60 | 
61 | def evaluate_all(dataset_name: str):
62 |     dev_problem = f"mmlu_{dataset_name}_val"
63 |     test_problem = f"mmlu_{dataset_name}_test"
64 | 
65 |     print(f"Starting evaluation of {dataset_name}")
66 | 
67 |     variants = {
68 |         "cot": dev_problem,
69 |         "cot_knn": test_problem,
70 |         "cot_via_knn": test_problem,
71 |     }
72 | 
73 |     for k, v in variants.items():
74 |         print(f"Evaluating {v}")
75 |         # Note that output we have in the directory appears to be a gzip
76 |         all_generated_data = load_json_file(
77 |             mmlu_generations_dir / "expt" / v / k / "result.json"
78 |         )
79 |         stats = eval_answers(all_generated_data)
80 |         print(f"{json.dumps(stats, indent=4)}")
81 |     print("Evaluations complete")
82 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/generate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pathlib
 3 | 
 4 | from . import MMLU
 5 | from .embed_problems import embed_file
 6 | from .mmlu_paths import mmlu_data_dir, mmlu_generations_dir
 7 | 
 8 | model_name = "gpt-4-1106-preview"
 9 | 
10 | 
11 | def generate(dataset_name: str):
12 |     dev_problem = f"mmlu_{dataset_name}_val"
13 |     test_problem = f"mmlu_{dataset_name}_test"
14 | 
15 |     if not os.path.exists(str(mmlu_data_dir / dev_problem) + ".json.gz"):
16 |         embed_file(str(mmlu_data_dir / dev_problem) + ".json")
17 | 
18 |     if not os.path.exists(str(mmlu_data_dir / test_problem) + ".json.gz"):
19 |         embed_file(str(mmlu_data_dir / test_problem) + ".json")
20 | 
21 |     MMLU.generate_solutions_without_rank(
22 |         dev_problem, run_name=f"{dev_problem}/cot", model=model_name
23 |     )
24 |     MMLU.run_cot_without_rank(
25 |         test_problem,
26 |         run_name=f"{test_problem}/cot_knn",
27 |         examples=str(
28 |             mmlu_generations_dir / f"expt" / f"{dev_problem}" / "cot" / "result"
29 |         ),
30 |         mode="knn",
31 |         num_examples=5,
32 |         num_repeat=5,
33 |         max_thread=50,
34 |         model=model_name,
35 |     )
36 |     MMLU.run_cot_without_rank(
37 |         test_problem,
38 |         run_name=f"{test_problem}/cot_via_knn",
39 |         examples=str(
40 |             mmlu_generations_dir / f"expt" / f"{test_problem}" / "cot_knn" / "result"
41 |         ),
42 |         mode="knn",
43 |         num_examples=5,
44 |         num_repeat=15,
45 |         max_thread=50,
46 |         model=model_name,
47 |     )
48 |     if False:
49 |         # Logprobs not currently available in OpenAI API
50 |         MMLU.run_logprobs(
51 |             test_problem,
52 |             run_name=f"{test_problem}/logprobs5",
53 |             num_examples=5,
54 |             num_repeat=10,
55 |             max_thread=50,
56 |             model=model_name,
57 |         )
58 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/mmlu_paths.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | 
3 | _my_path = pathlib.Path(__file__).parent.resolve()
4 | 
5 | mmlu_data_dir = _my_path.parent / "datasets" / "mmlu"
6 | 
7 | mmlu_generations_dir = _my_path.parent / "generations"
8 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/print_results.py:
--------------------------------------------------------------------------------
 1 | from .problem_utils import *
 2 | import gzip
 3 | 
 4 | 
 5 | def load_problems(file_name):
 6 |     with gzip.open(file_name + ".json.gz", "rt") as f:
 7 |         problems = json.loads(f.read())
 8 |     return problems
 9 | 
10 | 
11 | # Load problems from the file
12 | problems = load_problems(f"expt/final/MMLU_medical_genetics/logits0/result")
13 | 
14 | # Compute statistics on the loaded problems
15 | summary = compute_statistics(problems)
16 | print(summary)
17 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/test.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import os
 3 | 
 4 | from . import MMLU
 5 | from .embed_problems import *
 6 | from .problem_utils import *
 7 | 
 8 | dev_name = "MMLU_dev"
 9 | test_name = "MMLU_test"
10 | dev_name = "MMLU_chemistry"
11 | test_name = "MMLU_chemistry"
12 | 
13 | # embed questions
14 | if not os.path.exists(problem_files[dev_name] + ".json.gz"):
15 |     embed_file(problem_files[dev_name] + ".json")
16 | 
17 | if not os.path.exists(problem_files[test_name] + ".json.gz"):
18 |     embed_file(problem_files[test_name] + ".json")
19 | 
20 | # generate cot solutions on dev set
21 | if not os.path.exists(f"mmlu/expt/{dev_name}/cot/result.json.gz"):
22 |     MMLU.run_cot(dev_name, example_selector="random", max_thread=50)
23 | 
24 | # generate cot solutions on test set via dev set
25 | if not os.path.exists(f"mmlu/expt/{test_name}/cot_merged.json.gz"):
26 | 
27 |     def generate_test_cot_initial(index):
28 |         MMLU.run_cot(
29 |             test_name,
30 |             run_name=f"{test_name}/cot_{index}",
31 |             examples=f"expt/{dev_name}/cot/result",
32 |             num_repeat=1,
33 |             max_thread=30,
34 |             num_examples=5,
35 |             example_selector="knn",
36 |             model="gpt-4-1106-preview",
37 |         )
38 |         return "Done!"
39 | 
40 |     with multiprocessing.Pool(processes=5) as pool:
41 |         results = pool.map(generate_test_cot_initial, range(5))
42 | 
43 |     cot_rows1 = load_problems(f"expt/{test_name}/cot_0/result")
44 |     cot_rows2 = load_problems(f"expt/{test_name}/cot_1/result")
45 |     cot_rows3 = load_problems(f"expt/{test_name}/cot_2/result")
46 |     cot_rows4 = load_problems(f"expt/{test_name}/cot_3/result")
47 |     cot_rows5 = load_problems(f"expt/{test_name}/cot_4/result")
48 | 
49 |     def merge_ds(dataset_list):
50 |         cot_rows = {}
51 |         for rows_set in dataset_list:
52 |             for row in rows_set:
53 |                 if row["question_number"] not in cot_rows:
54 |                     cot_rows[row["question_number"]] = copy.copy(row)
55 |                     cot_rows[row["question_number"]]["expt"] = {}
56 |                 for key in row["expt"]:
57 |                     cot_rows[row["question_number"]]["expt"][key] = row["expt"][key]
58 |         return list(cot_rows.values())
59 | 
60 |     cot_rows = merge_ds([cot_rows1, cot_rows2, cot_rows3, cot_rows4, cot_rows5])
61 |     save_problems(f"expt/{test_name}/cot_merged", cot_rows)
62 | 
63 | 
64 | # solutions on test set
65 | 
66 | 
67 | ## generate cot solutions on test set via test set
68 | def generate_test_cot(index):
69 |     MMLU.run_cot_without_rank(
70 |         test_name,
71 |         run_name=f"{test_name}/cot_via_test_{index}_v8",
72 |         examples=f"mmlu/expt/{test_name}/cot_merged",
73 |         num_repeat=1,
74 |         max_thread=30,
75 |         num_examples=5,
76 |         mode="knn",
77 |         model="gpt-4-1106-preview",
78 |     )
79 |     return "Done!"
80 | 
81 | 
82 | with multiprocessing.Pool(processes=15) as pool:
83 |     results = pool.map(generate_test_cot, range(5))
84 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/tune_parameter/analyze.py:
--------------------------------------------------------------------------------
 1 | import json, random, copy
 2 | import numpy as np
 3 | from tqdm import tqdm
 4 | from collections import Counter
 5 | 
 6 | with open("summary.json") as f:
 7 |     data = json.load(f)
 8 | 
 9 | 
10 | def calculate_result(rows):
11 |     best_weight = 0
12 |     best_acc = 0
13 |     for weight in np.arange(0, 2, 0.01):
14 |         n_correct = 0
15 |         n_cnt = 0
16 |         for row in rows:
17 |             x = copy.deepcopy(row["cot"])
18 |             for k in row["logprob"]:
19 |                 x[k] = x.get(k, 0) + weight * row["logprob"][k]
20 | 
21 |             selected_answer = max(x, key=x.get)
22 |             n_cnt += 1
23 |             if row["answer"] == selected_answer:
24 |                 n_correct += 1
25 |         acc = n_correct / len(rows)
26 |         if acc > best_acc:
27 |             best_acc = acc
28 |             best_weight = weight
29 |     return best_acc, best_weight
30 | 
31 | 
32 | # 89.93
33 | subject_weight = 0.5
34 | non_subject_weight = 1.2
35 | subject_list = []
36 | total_correct = 0
37 | total_count = 0
38 | if 1:
39 |     for subject in data:
40 |         print(subject)
41 |         rows = data[subject]
42 | 
43 |         # use best threshold to process each row
44 |         for i, row in tqdm(enumerate(rows)):
45 |             rows_i = [item for index, item in enumerate(rows) if index != i]
46 |             acc, weight = calculate_result(rows_i)
47 |             x = row["cot"]
48 | 
49 |             for k in row["logprob"]:
50 |                 x[k] = x.get(k, 0) + weight * row["logprob"][k]
51 |             selected_answer = max(x, key=x.get)
52 |             total_count += 1
53 |             if row["answer"] == selected_answer:
54 |                 total_correct += 1
55 | 
56 | if 0:
57 |     for subject in tqdm(data):
58 |         rows = data[subject]
59 |         subject_acc = calculate_result(rows, subject_weight)
60 |         non_subject_acc = calculate_result(rows, non_subject_weight)
61 |         if subject_acc > non_subject_acc:
62 |             weight = subject_weight
63 |         else:
64 |             weight = non_subject_weight
65 | 
66 |         # use best threshold to process each row
67 |         for i, row in enumerate(rows):
68 |             x = row["cot"]
69 |             for k in row["logprob"]:
70 |                 x[k] = x.get(k, 0) + weight * row["logprob"][k]
71 |             selected_answer = max(x, key=x.get)
72 |             total_count += 1
73 |             if row["answer"] == selected_answer:
74 |                 total_correct += 1
75 | 
76 | print(f"total_correct: {total_correct}")
77 | print(f"total_count: {total_count}")
78 | print(f"accuracy: {total_correct / total_count}")
79 | # save best_thresholds to best_thresholds.json
80 | with open("best_thresholds.json", "w") as f:
81 |     json.dump(subject_list, f, indent=4)
82 | 


--------------------------------------------------------------------------------
/src/promptbase/mmlu/tune_parameter/summarize.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | import sys
 4 | from .problem_utils import *
 5 | 
 6 | cot_rows1 = load_problems("mmlu/expt/final/MMLU_test/cot_without_rank_knn_5_v0/result")
 7 | cot_rows2 = load_problems(
 8 |     "mmlu/expt/final/MMLU_test/cot_without_rank_knn_5_gpt-4-1106-preview/result"
 9 | )
10 | cot_rows3 = load_problems(
11 |     "mmlu/expt/final/MMLU_test/cot_without_rank_knn_5_gpt-4-1106-preview/result"
12 | )
13 | logprobs_rows1 = load_problems("mmlu/expt/final/MMLU_test/logprobs5_MMLU_dev/result")
14 | logprobs_rows2 = load_problems("mmlu/expt/final/MMLU_test/logprobs5_MMLU_test/result")
15 | 
16 | import copy
17 | 
18 | 
19 | def merge_ds(dataset_list):
20 |     cot_rows = {}
21 |     for rows_set in dataset_list:
22 |         for row in rows_set:
23 |             if row["question_number"] not in cot_rows:
24 |                 cot_rows[row["question_number"]] = copy.deepcopy(row)
25 |             else:
26 |                 cot_rows[row["question_number"]]["expt"].update(row["expt"])
27 |     return list(cot_rows.values())
28 | 
29 | 
30 | cot_rows = merge_ds([cot_rows1, cot_rows2, cot_rows3])
31 | logprobs_rows = merge_ds([logprobs_rows1, logprobs_rows2])
32 | 
33 | rows = {}
34 | for row in cot_rows:
35 |     key = row["question_number"]
36 |     if key not in rows:
37 |         rows[key] = {}
38 |     rows[key]["question"] = row["question"]
39 |     rows[key]["subject"] = row["extra"].replace("_test", "")
40 |     rows[key]["answer"] = row["correct_answer"]
41 |     expts = row["expt"]
42 |     rows[key]["cot"] = [
43 |         expts[expt]["answer"]
44 |         for expt in expts
45 |         if expts[expt].get("answer", None) is not None
46 |     ]
47 | 
48 | for row in logprobs_rows:
49 |     key = row["question_number"]
50 |     if key not in rows:
51 |         rows[key] = {}
52 |     rows[key]["question"] = row["question"]
53 |     rows[key]["answer"] = row["correct_answer"]
54 |     expts = row["expt"]
55 |     rows[key]["logprobs"] = [
56 |         expts[expt]["scores"]
57 |         for expt in expts
58 |         if expts[expt].get("scores", None) is not None
59 |     ]
60 | 
61 | rows = list(rows.values())
62 | 
63 | data = {}
64 | for row in rows:
65 |     if row["subject"] not in data:
66 |         data[row["subject"]] = []
67 | 
68 |     scores_logprob = {}
69 |     for e in row["logprobs"]:
70 |         for k in e:
71 |             scores_logprob[k] = scores_logprob.get(k, 0) + e[k] / len(row["logprobs"])
72 | 
73 |     scores_cot = Counter(row["cot"])
74 |     for k in scores_cot:
75 |         scores_cot[k] /= len(row["cot"])
76 | 
77 |     data[row["subject"]].append(
78 |         {"logprob": scores_logprob, "cot": scores_cot, "answer": row["answer"]}
79 |     )
80 | 
81 | # save data to summary.json
82 | with open("summary.json", "w") as f:
83 |     json.dump(data, f, indent=4)
84 | 


--------------------------------------------------------------------------------
/src/promptbase/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import helpers
2 | 


--------------------------------------------------------------------------------
/src/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="promptbase",
 5 |     version="0.1.0",
 6 |     author="Microsoft",
 7 |     description="Advanced prompting for advanced intelligence",
 8 |     # url="https://github.com/repo",  # Replace with the URL of your project
 9 |     packages=find_packages(),
10 |     install_requires=[
11 |         "datasets",
12 |         "tqdm",
13 |         "openai",
14 |         "python-liquid",
15 |         "GitPython",
16 |         "torch",
17 |         "scikit-learn",
18 |     ],
19 |     python_requires=">=3.9",  # Specify the minimum Python version required
20 | )
21 | 


--------------------------------------------------------------------------------