├── .github
    └── workflows
    │   └── python-publish.yml
├── .gitignore
├── README.md
├── agora-banner.png
├── pyproject.toml
├── setup.py
└── src
    └── distiller
        ├── __init__.py
        ├── __main__.py
        ├── agents
            ├── abstract.py
            └── huggingface.py
        ├── base.py
        ├── cli.py
        ├── conversations.py
        ├── outputs.py
        └── texts.py


/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # macOS local attributes
156 | .DS_Store
157 | 
158 | # Local experimentation
159 | lab
160 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Agora
  2 | Agora is an new open source Multi-Modality AI Research Organization devoted to advancing Humanity!
  3 | 
  4 | ![Agora banner](agora-banner.png)
  5 | 
  6 | [Join our Agora discord and contribute to this project or 40+ others!](https://discord.gg/qUtxnK2NMf)
  7 | 
  8 | 
  9 | # The Distiller
 10 | 
 11 | `The Distiller` is a command-line interface and a Python library for inferencing Large Language Models to generate textual or multi-modal datasets.
 12 | 
 13 | Possible use cases may include:
 14 | 
 15 | - Constructing textual corpora to train/fine-tune detectors for content written by AI.
 16 | - Collecting datasets of LLM-produced conversations for research purposes, analysis of AI performance/impact/ethics, etc.
 17 | - Automating a task that a LLM can handle over big amounts of input texts. For example, using GPT-3 to summarize 1000 paragraphs with a single CLI command.
 18 | - Leveraging APIs of especially big LLMs to produce diverse texts for a specific task and then fine-tune a smaller model with them.
 19 | 
 20 | > This tool is distributed freely and doesn't imply any restrictions on the downstream use cases.
 21 | > However, you should make sure to follow the **Terms of use** of the backend APIs (OpenAI, Cohere, Petals, etc.) in your specific context.
 22 | 
 23 | 
 24 | # Agora
 25 | The Distiller is brought to you by Agora, we're an all-new open source multi-modality AI Research organization devoted to advancing Humanity!
 26 | 
 27 | 
 28 | [Join our Discord and help work on this project or 30+ others!](https://discord.gg/qUtxnK2NMf)
 29 | 
 30 | 
 31 | ## Installation
 32 | 
 33 | ```
 34 | pip install distiller
 35 | ```
 36 | 
 37 | Most of the generation features rely on third-party APIs. Install their respective packages:
 38 | 
 39 | ```
 40 | pip install openai cohere petals
 41 | ```
 42 | 
 43 | ## Usage examples
 44 | 
 45 | ### Inference LLMs at scale
 46 | 
 47 | ```bash
 48 | export OPENAI_API_KEY="..."
 49 | export COHERE_API_KEY="..."
 50 | 
 51 | distiller texts \             
 52 |     --prompt "If {country} was a planet in the Star Wars universe it would be called" \
 53 |     --backend "openai|text-davinci-003" \
 54 |     --backend "cohere|medium" \
 55 |     --temperature 0.9 \
 56 |     --option country Germany \
 57 |     --option country France \
 58 |     --max-length 50 \
 59 |     --num-samples 1 \
 60 |     --single-file
 61 | ```
 62 | 
 63 | The command above should produce a dataset file with 4 texts. Each possible combination of options is used for each of the backend LLMs. Check out the [CLI reference](#cli-reference) for more details. A dataset file looks like this:
 64 | 
 65 | ```json
 66 | [
 67 |     {
 68 |         "sample_id": 0,
 69 |         "backend": "openai|text-davinci-003",
 70 |         "max_length": 50,
 71 |         "temperature": 0.9,
 72 |         "country": "Germany",
 73 |         "prompt": "If Germany was a planet in the Star Wars universe it would be called",
 74 |         "output": " Euron. The planet would be home to a powerful and diverse species of aliens, known as the Eurons, that have evolved to a higher level of understanding and technological advancement compared to many of the other planets in the galaxy. The planet would be"
 75 |     },
 76 |     {
 77 |         "sample_id": 0,
 78 |         "backend": "openai|text-davinci-003",
 79 |         "max_length": 50,
 80 |         "temperature": 0.9,
 81 |         "country": "France",
 82 |         "prompt": "If France was a planet in the Star Wars universe it would be called",
 83 |         "output": " The Empire of Liberty. It would be a peaceful, democratic planet with a strong sense of justice and equality. The planet would be home to many different species of aliens but the majority of its population would be humans. It would have a strong military and"
 84 |     },
 85 |     {
 86 |         "sample_id": 0,
 87 |         "backend": "cohere|medium",
 88 |         "max_length": 50,
 89 |         "temperature": 0.9,
 90 |         "country": "Germany",
 91 |         "prompt": "If Germany was a planet in the Star Wars universe it would be called",
 92 |         "output": " the Hoth of the universe.\nAfter the Soviet invasion of Eastern Europe and the subsequent Western anti-Soviet sentiment, Germany's arms manufacturers went into hyperdrive and the country churned out guns at a frightening pace. By the early 1930"
 93 |     },
 94 |     ...
 95 | ]
 96 | ```
 97 | 
 98 | Alternatively, you can use our [`TextsGenerator`](https://github.com/radi-cho/distiller/blob/main/distiller/texts.py#L27) to produce texts in Python.
 99 | 
100 | ### Generate conversations with the ChatGPT API
101 | 
102 | ```bash
103 | export OPENAI_API_KEY="..."
104 | distiller conversations \
105 |     --length 4 \
106 |     --length 5 \
107 |     --agent1 "You're a shop assistant in a pet store. Answer to customer questions politely." \
108 |     --agent2 "You're a customer in a pet store. You should behave like a human. You want to buy {n} pets. Ask questions about the pets in the store." \
109 |     --temperature 0.1 \
110 |     --temperature 0.2 \
111 |     --option n 2 \
112 |     --option n 3 \
113 |     --path dataset
114 | ```
115 | 
116 | The command above should produce a dataset directory with 16 conversations saved as JSON files. You can specify if you want all of them to be saved in a single file. But why 16? Because `num_samples` dialogues are generated for each possible combination of parameters (conversation length, LLM temperature, and custom prompt options). A dataset item looks like this:
117 | 
118 | ```json
119 | {
120 |     "length": 5,
121 |     "temperature": 0.1,
122 |     "n": "2",
123 |     "agent1": "You're a shop assistant in a pet store. Answer to customer questions politely. When the whole conversation is over end with \"Goodbye\".",
124 |     "agent2": "You're a customer in a pet store. You should behave like a human. You want to buy 2 pets. Ask questions about the pets in the store. When the whole conversation is over end with \"Goodbye\".",
125 |     "utterances": [
126 |         [
127 |             "agent1",
128 |             "Hello! How can I assist you today?"
129 |         ],
130 |         [
131 |             "agent2",
132 |             "Hi! I'm interested in buying two pets. Can you tell me what kind of pets you have available in the store?"
133 |         ],
134 |         [
135 |             "agent1",
136 |             "Certainly! We have a variety of pets available, including dogs, cats, birds, fish, hamsters, guinea pigs, rabbits, and reptiles. Is there a specific type of pet you're interested in?"
137 |         ],
138 |         [
139 |             "agent2",
140 |             "I'm not sure yet. Can you tell me more about the dogs and cats you have available? What breeds do you have?"
141 |         ],
142 |         ...
143 |     ]
144 | }
145 | ```
146 | 
147 | You can also use our [`ConversationsGenerator`](https://github.com/radi-cho/distiller/blob/main/distiller/conversations.py#L43) to produce texts with the `gpt-3.5-turbo` API programatically.
148 | 
149 | ```python
150 | from distiller import ConversationsGenerator, ConversationsGeneratorConfig, DatasetWriter
151 | 
152 | dataset_writer = DatasetWriter() # single_file=True
153 | 
154 | generator_config = ConversationsGeneratorConfig(agent1="You're a shop assistant in a pet store. Answer to customer questions politely.",
155 |                                                 agent2="You're a customer in a pet store. You should behave like a human. You want to buy {n} pets. Ask questions about the pets in the store.",
156 |                                                 num_samples=2,
157 |                                                 interruption="length",
158 |                                                 lengths=[4, 5],
159 |                                                 temperatures=[0.1, 0.2],
160 |                                                 options=[("n", "2"), ("n", "3")])
161 | 
162 | conversations_generator = ConversationsGenerator(generator_config)
163 | 
164 | for conversation in conversations_generator:
165 |     dataset_writer.save_intermediate_result(conversation)
166 | ```
167 | 
168 | ## Contributing
169 | 
170 | > Still under active development.
171 | 
172 | Contributions will be highly appreciated. Currently these features are under development:
173 | - [x] `distiller conversations` - Make two ChatGPT agents talk with one another and record the conversation history.
174 | - [x] `distiller texts` - Inference different LLMs with a given input prompt and generate multiple outputs by varying parameters.
175 | - [ ] `distiller transformations` - Apply a list of transformations to a list of texts. For example, summarizing a list of texts for a {child | university student | PhD candidate} to understand.
176 | - [ ] Support more backend LLMs.
177 | 
178 | To set up a local development environment:
179 | 
180 | ```bash
181 | git clone https://github.com/kyegomez/The-Distiller/tree/Epoch2
182 | cd distiller
183 | pip install -e .
184 | ```
185 | 
186 | ## CLI Reference
187 | 
188 | ```
189 | distiller [OPTIONS] COMMAND [ARGS]...
190 | 
191 |   Command line interface that generates datasets with LLMs.
192 | 
193 | Options:
194 |   --help  Show this message and exit.
195 | 
196 | Commands:
197 |   conversations  Produce conversations between two gpt-3.5-turbo agents...
198 |   texts          Inference multiple LLMs at scale.
199 | ```
200 | 
201 | ```
202 | distiller texts [OPTIONS]
203 | 
204 |   Inference multiple LLMs at scale.
205 | 
206 | Options:
207 |   -p, --prompt TEXT            Input prompt.  [required]
208 |   -b, --backend TEXT           LLM APIs to use as backends. Use
209 |                                "backend|model_name" notation. For example:
210 |                                "openai|text-davinci-003".
211 |   -l, --max-length INTEGER     Maximum number of tokens to generate for each
212 |                                prompt.
213 |   -t, --temperature FLOAT      Possible temperature values for the backend
214 |                                language model.
215 |   -n, --num-samples INTEGER    Number of conversations for each configuration.
216 |   -o, --option <TEXT TEXT>...  Values for additional options denoted in your
217 |                                prompts by {OPTION_NAME}.
218 |   -f, --path PATH              Where to save the dataset. Either a file or a
219 |                                directory (folder).
220 |   -s, --single-file            Either save the whole dataset to a single file
221 |                                or create multiple files.
222 |   --help                       Show this message and exit.
223 | ```
224 | 
225 | - You can specify multiple variants for the following options: `--length`, `--temperature`, `--num-samples`, `--option`. A dataset item will be generated for each possible combination of the supplied values.
226 | - Each `--option` provided must be formatted as follows: `--option option_name "Some option value"`.
227 | - Currently supported backends: GPT-3 model variants by [OpenAI](https://openai.com/blog/openai-api), the language models by [Cohere](https://pypi.org/project/cohere/), BLOOM through the [Petals API](https://petals.ml/).
228 | 
229 | ```
230 | distiller conversations [OPTIONS]
231 | 
232 |   Produce conversations between two gpt-3.5-turbo agents with given roles.
233 | 
234 | Options:
235 |   
236 |   -k, --openai-api-key TEXT       OpenAI API key.
237 |   -hf, --hf-api-token TEXT        Hugging Face API token.
238 |   -a, --agent1 TEXT               Agent role description.  [required]
239 |   -b, --agent2 TEXT               Agent role description.  [required]
240 |   -m, --model-agent [openai|hf]   Backend to use for the AI agents. Can be 'openai' for OpenAI's gpt-3.5-turbo or 'hf' for a Hugging Face model. Defaults to 'openai'.
241 |   -r, --repo-id TEXT              Hugging Face model repo id, required if 'hf' is chosen as model-agent.
242 |   -i, --interruption [length|end_phrase]
243 |                                   Interruption mode.
244 |   -e, --end-phrase TEXT           Interrupt after this phrase is outputted by
245 |                                   one of the agents.
246 |   -d, --end-agent [agent1|agent2|both]
247 |                                   In which agent's messages to look for the
248 |                                   end phrase.
249 |   -l, --length INTEGER            Maximum number of utterances for each agent.
250 |                                   A conversation sample will be generated for
251 |                                   each length.
252 |   -t, --temperature FLOAT         Possible temperature values for the backend
253 |                                   language model.
254 |   -n, --num-samples INTEGER       Number of conversations for each
255 |                                   configuration.
256 |   -o, --option <TEXT TEXT>...     Values for additional options denoted in
257 |                                   your prompts by {OPTION_NAME}.
258 |   -f, --path PATH                 Where to save the dataset. Either a file or
259 |                                   a directory (folder).
260 |   -s, --single-file               Either save the whole dataset to a single
261 |                                   file or create multiple files.
262 |   --help                          Show this message and exit.
263 | ```
264 | 
265 | - The length parameter specifies how many utterances each agent should make. A length of 4 typically produces 8 utterances in total.
266 | - You can specify either `length` (default) or `end_phrase` as an interruption strategy. When using `end_phrase` a conversation will be interrupted once the `--end-phrase` has appeared in the messages of the `--end-agent` (could be both). In this case, the lengths provided will be treated as maximum conversation lengths.
267 | 
268 | 
269 | # Roadmap
270 | 
271 | * Conversation transformations like chain of thought prompting, conversational summaries, step by step breakdowns of conversations, explain your rational
272 | 
273 | * Dynamically scale up agent instances, 2, 4, 100, 1000, all conversating with eachother.
274 | 
275 | * Add Hugginggace polymorphic function to use any model on hf
276 | 
277 | * Add CLI arg for huggingface model to utilize
278 | 
279 | * Integrate tree of thoughts prompting
280 | 
281 | * Integrate multi-modality models from hf 
282 | 
283 | * Integrate multi-modality conversation => store images in folder then agent2 looks up image address
284 | 
285 | * Enable Multi-Modal Dataset Generation using generate, critique 


--------------------------------------------------------------------------------
/agora-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Agora-Lab-AI/The-Distiller/0a4ad46c65e102e90696851c5dbcfa1923161f17/agora-banner.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ['setuptools>=61.0']
3 | build-backend = 'setuptools.build_meta'


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from setuptools import find_packages
 3 | 
 4 | with open("README.md", "r", encoding = "utf-8") as readme:
 5 |     long_description = readme.read()
 6 | 
 7 | setup(
 8 |     name="The Distiller",
 9 |     version="0.0.2",
10 |     description="Generate textual and conversational datasets with LLMs.",
11 |     long_description = long_description,
12 |     long_description_content_type = "text/markdown",
13 |     author="Kye Gomez",
14 |     author_email="Kye@apac.ai",
15 |     url="https://github.com/kyegomez/The-Distiller",
16 |     keywords=["dataset", "llm", "langchain", "openai"],
17 |     package_dir={"": "src"},
18 |     packages = find_packages(where="src"),
19 |     install_requires=[
20 |         "langchain>=0.0.113",
21 |         "click>=8.1"
22 |     ],
23 |     entry_points={
24 |         "console_scripts": [
25 |             "distiller=distiller:distiller"
26 |         ],
27 |     },
28 | )
29 | 


--------------------------------------------------------------------------------
/src/distiller/__init__.py:
--------------------------------------------------------------------------------
1 | from .cli import distiller
2 | from .conversations import *
3 | from .texts import *
4 | from .outputs import *
5 | 


--------------------------------------------------------------------------------
/src/distiller/__main__.py:
--------------------------------------------------------------------------------
1 | from .cli import main
2 | 
3 | main()
4 | 


--------------------------------------------------------------------------------
/src/distiller/agents/abstract.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | class AbstractLanguageModel(ABC):
 4 |     @abstractmethod
 5 |     def generate_thoughts(self, state, k):
 6 |         pass
 7 | 
 8 |     @abstractmethod
 9 |     def evaluate_states(self, states):
10 |         pass
11 | 


--------------------------------------------------------------------------------
/src/distiller/agents/huggingface.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | from transformers import AutoModelForCausalLM, AutoTokenizer
 5 | from abstract import AbstractLanguageModel
 6 | 
 7 | 
 8 | class HuggingLanguageModel(AbstractLanguageModel):
 9 |     def __init__(self, model_name, model_tokenizer=None, verbose=False):
10 |         self.model = AutoModelForCausalLM.from_pretrained(model_name)
11 |         self.tokenizer = AutoTokenizer.from_pretrained(model_tokenizer or model_name)
12 |         self.verbose = verbose
13 | 
14 |     def generate_thoughts(self, state, k, max_length=100):
15 |         state_text = ' '.join(state)
16 |         prompt = f"Write down your observations in format 'Observation:xxxx', then write down your thoughts in format 'Thoughts:xxxx Given the current state of reasoning: '{state_text}', generate {k} coherent solutions to achieve {state_text}"
17 | 
18 |         if self.verbose:
19 |             print(f"Generating thoughts for state: {state_text}")
20 | 
21 |         try:
22 |             inputs = self.tokenizer(prompt, return_tensors="pt")
23 |             outputs = self.model.generate(**inputs, max_length=max_length, num_return_sequences=k)
24 |             thoughts = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
25 |         except Exception as e:
26 |             if self.verbose:
27 |                 print(f"Error generating thoughts for state: {state_text}")
28 |                 print(f"Error: {e}")
29 |             thoughts = []
30 | 
31 |         return thoughts
32 | 
33 |     def evaluate_states(self, states, initial_prompt, max_length=10):
34 |         state_values = {}
35 |         for state in states:
36 |             state_text = ' '.join(state)
37 |             prompt = f"Given the current state of reasoning: '{state_text}', pessimitically evaluate its value as a float between 0 and 1 based on it's potential to achieve {initial_prompt}"
38 | 
39 |             if self.verbose:
40 |                 print(f"Evaluating state: {state_text}")
41 | 
42 |             try:
43 |                 inputs = self.tokenizer(prompt, return_tensors="pt")
44 |                 outputs = self.model.generate(**inputs, num_return_sequences=1, max_length=max_length)
45 |                 value_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
46 |                 value = float(value_text)
47 |             except ValueError:
48 |                 if self.verbose:
49 |                     print(f"Error converting value to float for state: {state_text}")
50 |                 value = 0  # Assign a default value if the conversion fails
51 |             except Exception as e:
52 |                 if self.verbose:
53 |                     print(f"Error evaluating state: {state_text}")
54 |                     print(f"Error: {e}")
55 |                 value = 0
56 | 
57 |             state_values[state] = value
58 | 
59 |         return state_values


--------------------------------------------------------------------------------
/src/distiller/base.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from typing import List, Any, Dict, Tuple, Generator, Iterator, Protocol
 3 | 
 4 | OPTIONS_CONFIG_KEYS = ["temperature"]
 5 | GENERATOR_CONFIG_KEYS =  ["temperatures"]
 6 | 
 7 | 
 8 | class DatasetGeneratorConfig(Protocol):
 9 |     """Base generator configuration protocol."""
10 |     openai_api_key: str
11 |     """OpenAI API key."""
12 |     num_samples: int
13 |     """Number of texts to generate for each options combination."""
14 |     options: List[Tuple[str, str]]
15 |     """Additional options defined in the text prompt with curly brackets."""
16 | 
17 | 
18 | class DatasetGenerator:
19 |     """Abstraction of a dataset generator."""
20 | 
21 |     config: DatasetGeneratorConfig
22 |     """Generator configuration."""
23 |     options_configs: List[Dict[str, Any]]
24 |     """Possible combinations of the provided options."""
25 |     generator_index: int = 0
26 |     """Index of the next item to be returned by the generator."""
27 | 
28 |     def __init__(self, config: DatasetGeneratorConfig) -> None:
29 |         self.config = config
30 |         self.initialize_options_configs()
31 | 
32 |     def initialize_options_configs(
33 |         self,
34 |         options_config_keys: List[str] = OPTIONS_CONFIG_KEYS,
35 |         generator_config_keys: List[str] = GENERATOR_CONFIG_KEYS
36 |     ) -> None:
37 |         """Prepare options combinations."""
38 |         options_keys = ["sample_id", *options_config_keys]
39 |         options_values = [range(self.config.num_samples)]
40 |         options_values += [getattr(self.config, key) for key in generator_config_keys]
41 | 
42 |         for option in self.config.options:
43 |             if option[0] not in options_keys:
44 |                 options_keys.append(option[0])
45 |                 options_values.append([option[1]])
46 |             else:
47 |                 index = options_keys.index(option[0])
48 |                 if option[1] not in options_values[index]:
49 |                     options_values[index].append(option[1])
50 | 
51 |         self.options_configs = list(map(lambda x: dict(zip(options_keys, x)),
52 |                                         itertools.product(*options_values)))
53 | 
54 |     def generate_item(self) -> Dict[str, Any]:
55 |         """Produce a data item."""
56 |         return {}
57 | 
58 |     def __next__(self) -> Generator[Dict[str, Any], None, None]:
59 |         return self.generate_item()
60 | 
61 |     def __iter__(self) -> Iterator:
62 |         return self
63 | 


--------------------------------------------------------------------------------
/src/distiller/cli.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | from typing import List, Tuple
  3 | 
  4 | from .conversations import ConversationsGeneratorConfig, ConversationsGenerator
  5 | from .texts import TextsGeneratorConfig, TextsGenerator
  6 | from .outputs import DatasetWriter
  7 | 
  8 | 
  9 | @click.group()
 10 | def distiller() -> None:
 11 |     """Command line interface that generates datasets with LLMs."""
 12 |     pass
 13 | 
 14 | 
 15 | click_options = click.option("--option",
 16 |                              "-o",
 17 |                              "options",
 18 |                              type=(str, str),
 19 |                              multiple=True,
 20 |                              help="Values for additional options denoted in your prompts by {OPTION_NAME}.")
 21 | 
 22 | click_path = click.option("--path",
 23 |                           "-f",
 24 |                           "path",
 25 |                           type=click.Path(),
 26 |                           help="Where to save the dataset. Either a file or a directory (folder).")
 27 | 
 28 | click_single_file = click.option("--single-file",
 29 |                                  "-s",
 30 |                                  "single_file",
 31 |                                  type=bool,
 32 |                                  is_flag=True,
 33 |                                  help="Either save the whole dataset to a single file or create multiple files.")
 34 | 
 35 | click_num_samples = click.option("--num-samples",
 36 |                                  "-n",
 37 |                                  "num_samples",
 38 |                                  type=int,
 39 |                                  default=1,
 40 |                                  help="Number of conversations for each configuration.")
 41 | 
 42 | click_temperatures = click.option("--temperature",
 43 |                                   "-t",
 44 |                                   "temperatures",
 45 |                                   type=float,
 46 |                                   multiple=True,
 47 |                                   default=[0.5],
 48 |                                   help="Possible temperature values for the backend language model.")
 49 | 
 50 | 
 51 | @click.command()
 52 | @click.option("--agents",
 53 |               "-a",
 54 |               "agents",
 55 |               type=str,
 56 |               multiple=True,
 57 |               required=True,
 58 |               help="Agent role descriptions.")
 59 | 
 60 | @click.option("--hf-api-token",
 61 |               "-hf",
 62 |               type=str,
 63 |               envvar="HF_API_KEY",
 64 |               help="Hugging Face API key.")
 65 | 
 66 | @click.option("--model-agent",
 67 |               "-m",
 68 |               type=click.Choice(["openai", "hf"]),
 69 |               default="openai",
 70 |               help="Backend to use for the AI agents. Can be 'openai' for OpenAI's gpt-3.5-turbo or 'hf' for a Hugging Face model. Defaults to 'openai'.")
 71 | 
 72 | @click.option("--repo-id",
 73 |               "-r",
 74 |               type=str,
 75 |               help="Hugging Face model repo id, required if 'hf' is chosen as model-agent.")
 76 | 
 77 | @click.option("--openai-api-key",
 78 |               "-k",
 79 |               "openai_api_key",
 80 |               type=str,
 81 |               envvar="OPENAI_API_KEY",
 82 |               help="OpenAI API key.")
 83 | @click.option("--agent1",
 84 |               "-a",
 85 |               "agent1",
 86 |               type=str,
 87 |               required=True,
 88 |               help="Agent role description.")
 89 | @click.option("--agent2",
 90 |               "-b",
 91 |               "agent2",
 92 |               type=str,
 93 |               required=True,
 94 |               help="Agent role description.")
 95 | @click.option("--initial-utterance",
 96 |               "-u",
 97 |               "initial_utterances",
 98 |               type=str,
 99 |               default=["Hello."],
100 |               multiple=True,
101 |               help="Utterance to be provisioned to the first agent. For many use cases a \"Hello\" is enough.")
102 | @click.option("--interruption",
103 |               "-i",
104 |               "interruption",
105 |               type=click.Choice(["length", "end_phrase"]),
106 |               default="length",
107 |               help="Interruption mode.")
108 | @click.option("--end-phrase",
109 |               "-e",
110 |               "end_phrase",
111 |               type=str,
112 |               default="Goodbye",
113 |               help="Interrupt after this phrase is outputted by one of the agents.")
114 | @click.option("--end-agent",
115 |               "-d",
116 |               "end_agent",
117 |               type=click.Choice(["agent1", "agent2", "both"]),
118 |               default="both",
119 |               help="In which agent's messages to look for the end phrase.")
120 | @click.option("--length",
121 |               "-l",
122 |               "lengths",
123 |               type=int,
124 |               multiple=True,
125 |               default=[5],
126 |               help="Maximum number of utterances for each agent. A conversation sample will be generated for each length.")
127 | @click_temperatures
128 | @click_num_samples
129 | @click_options
130 | @click_path
131 | @click_single_file
132 | def conversations(
133 |     agents: List[str],
134 |     repo_id: str,
135 |     openai_api_key: str,
136 |     agent1: str,
137 |     agent2: str,
138 |     initial_utterances: List[str],
139 |     num_samples: int,
140 |     interruption: str,
141 |     end_phrase: str,
142 |     end_agent: str,
143 |     lengths: List[int],
144 |     temperatures: List[int],
145 |     options: List[Tuple[str, str]],
146 |     path: str,
147 |     single_file: bool
148 | ) -> None:
149 |     """Produce conversations between two gpt-3.5-turbo agents with given roles."""
150 |     dataset_writer = DatasetWriter(path, single_file)
151 | 
152 |     generator_config = ConversationsGeneratorConfig(agents=agents,
153 |                                                     openai_api_key=openai_api_key,
154 |                                                     agent1=agent1,
155 |                                                     agent2=agent2,
156 |                                                     initial_utterances=initial_utterances,
157 |                                                     num_samples=num_samples,
158 |                                                     interruption=interruption,
159 |                                                     end_phrase=end_phrase,
160 |                                                     end_agent=end_agent,
161 |                                                     lengths=lengths,
162 |                                                     temperatures=temperatures,
163 |                                                     options=options)
164 | 
165 |     conversations_generator = ConversationsGenerator(generator_config)
166 | 
167 |     for conversation in conversations_generator:
168 |         dataset_writer.save_intermediate_result(conversation)
169 | 
170 | 
171 | @click.command()
172 | @click.option("--prompt",
173 |               "-p",
174 |               "prompt",
175 |               type=str,
176 |               required=True,
177 |               help="Input prompt.")
178 | @click.option("--backend",
179 |               "-b",
180 |               "backends",
181 |               type=str,
182 |               multiple=True,
183 |               default=["openai|text-davinci-003"],
184 |               help="LLM APIs to use as backends. Use \"backend|model_name\" notation. For example: \"openai|text-davinci-003\".")
185 | @click.option("--max-length",
186 |               "-l",
187 |               "max_lengths",
188 |               type=int,
189 |               multiple=True,
190 |               default=[100],
191 |               help="Maximum number of tokens to generate for each prompt.")
192 | @click_temperatures
193 | @click_num_samples
194 | @click_options
195 | @click_path
196 | @click_single_file
197 | def texts(
198 |     agents: List[str],
199 |     prompt: str,
200 |     num_samples: int,
201 |     max_lengths: List[int],
202 |     temperatures: List[int],
203 |     backends: List[str],
204 |     options: List[Tuple[str, str]],
205 |     path: str,
206 |     single_file: bool
207 | ) -> None:
208 |     """Inference multiple LLMs at scale."""
209 |     dataset_writer = DatasetWriter(path, single_file)
210 | 
211 |     generator_config = TextsGeneratorConfig(agents=agents,
212 |                                             prompt=prompt,
213 |                                             backends=backends,
214 |                                             num_samples=num_samples,
215 |                                             max_lengths=max_lengths,
216 |                                             temperatures=temperatures,
217 |                                             options=options)
218 | 
219 |     texts_generator = TextsGenerator(generator_config)
220 | 
221 |     for text_object in texts_generator:
222 |         dataset_writer.save_intermediate_result(text_object)
223 | 
224 | 
225 | distiller.add_command(texts)
226 | distiller.add_command(conversations)
227 | 
228 | 
229 | def main() -> None:
230 |     """Run the distiller CLI."""
231 |     distiller()
232 | 


--------------------------------------------------------------------------------
/src/distiller/conversations.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, field
  2 | from typing import List, Any, Dict, Tuple, Union
  3 | 
  4 | from langchain.prompts import (
  5 |     ChatPromptTemplate,
  6 |     MessagesPlaceholder,
  7 |     SystemMessagePromptTemplate,
  8 |     HumanMessagePromptTemplate
  9 | )
 10 | 
 11 | from langchain.chains import ConversationChain
 12 | from langchain.chat_models import ChatOpenAI
 13 | from langchain.memory import ConversationBufferMemory
 14 | from langchain.schema import SystemMessage
 15 | from langchain import HuggingFaceHub
 16 | from .base import DatasetGenerator
 17 | 
 18 | OPTIONS_CONFIG_KEYS = ["length", "temperature", "initial_utterance"]
 19 | GENERATOR_CONFIG_KEYS = ["lengths", "temperatures", "initial_utterances"]
 20 | 
 21 | 
 22 | @dataclass
 23 | class ConversationsGeneratorConfig:
 24 |     agents: List[str]
 25 |     """List of agent descriptions to construct their system message"""
 26 |     agent_type: str
 27 |     """type of language odel either openai or huggingface"""
 28 |     hf_id: str
 29 |     """repo id for the hf model"""
 30 |     openai_api_key: str
 31 |     """OpenAI API key."""
 32 |     agent1: str
 33 |     """Description of the first agent used to construct its system message."""
 34 |     agent2: str
 35 |     """Description of the second agent used to construct its system message."""
 36 |     initial_utterances: List[str] = "Hello."
 37 |     """Utterances to be provisioned to the first agent."""
 38 |     num_samples: int = 1
 39 |     """Number of conversations to generate for each options combination."""
 40 |     interruption: str = "length"
 41 |     """Interruption mode."""
 42 |     end_phrase: str = "Goodbye!"
 43 |     """Phrase to look for when checking whether to interrupt a conversation."""
 44 |     end_agent: str = "both"
 45 |     """Agent whose messages to check for the interruption phrase."""
 46 |     lengths: List[int] = field(default_factory=lambda: [5])
 47 |     """Possible lengths of the conversations. If end_phrase interruption is enabled these will be used for maximum lengths."""
 48 |     temperatures: List[float] = field(default_factory=lambda: [0])
 49 |     """Possible temperatures for the backend LLM."""
 50 |     options: List[Tuple[str, str]] = field(default_factory=lambda: [])
 51 |     """Additional options defined in the system prompts with curly brackets."""
 52 | 
 53 | 
 54 | class ConversationsGenerator(DatasetGenerator):
 55 |     """Generator producing conversations between two AI agents."""
 56 | 
 57 |     config: ConversationsGeneratorConfig
 58 |     """Configuration for a ConversationsGenerator."""
 59 | 
 60 |     def __init__(self, config: ConversationsGeneratorConfig) -> None:
 61 |         """Initialize ConversationsGenerator."""
 62 |         super().__init__(config)
 63 | 
 64 |     def initialize_options_configs(
 65 |         self,
 66 |         options_config_keys: List[str] = OPTIONS_CONFIG_KEYS,
 67 |         generator_config_keys: List[str] = GENERATOR_CONFIG_KEYS
 68 |     ) -> None:
 69 |         """Prepare options combinations."""
 70 |         super().initialize_options_configs(options_config_keys, generator_config_keys)
 71 | 
 72 |     def initialize_chain(
 73 |         self,
 74 |         agent: str,
 75 |         system_prompt: str,
 76 |         conversation_config: Dict[str, Any]
 77 |     ) -> Tuple[ConversationChain, str]:
 78 |         """Initialize a conversation and return a chain and a formatted system prompt."""
 79 |         if self.config.interruption == "end_phrase":
 80 |             if self.config.end_agent == agent or self.config.end_agent == "both":
 81 |                 system_prompt += f" When the whole conversation is over end with \"{self.config.end_phrase}\"."
 82 | 
 83 |         system_template = SystemMessagePromptTemplate.from_template(
 84 |             system_prompt)
 85 |         template_params = {key: conversation_config[key]
 86 |                            for key in system_template.input_variables}
 87 |         system_message = system_template.format(**template_params).content
 88 | 
 89 |         prompt = ChatPromptTemplate.from_messages([
 90 |             SystemMessage(content=system_message),
 91 |             MessagesPlaceholder(variable_name="history"),
 92 |             HumanMessagePromptTemplate.from_template("{input}")
 93 |         ])
 94 | 
 95 |         memory = ConversationBufferMemory(return_messages=True)
 96 | 
 97 | 
 98 |         if self.config.agent_type == "openai":
 99 |             llm = ChatOpenAI(temperature=conversation_config["temperature"], 
100 |                              openai_api_key=self.config.openai_api_key)
101 |         elif self.config.agent_type == "huggingface":
102 |             llm = HuggingFaceHub(repo_id=self.config.repo_id, model_kwargs={"temperature": conversation_config["temperature"], "max_length": 64})
103 | 
104 | 
105 |         chain = ConversationChain(memory=memory, prompt=prompt, llm=llm)
106 | 
107 |         return chain, system_message
108 |     
109 |     def initialize_chains(
110 |             self,
111 |             conversation_config: Dict[str, Any]
112 |         ) -> Tuple[List[ConversationChain], List[str]]:
113 |         chains = []
114 |         system_prompts = []
115 |         for agent in self.config.agents:
116 |             chain, system_prompt = self.initialize_chain(agent, agent, conversation_config)
117 |             chains.append(chain)
118 |             system_prompts.append(system_prompt)
119 |         return chains, system_prompts
120 | 
121 |     def end_phrase_interruption(self, agent: str, message: str) -> bool:
122 |         """Check whether to interrupt conversation generation."""
123 |         if self.config.interruption == "end_phrase":
124 |             if self.config.end_agent == agent or self.config.end_agent == "both":
125 |                 if self.config.end_phrase in message:
126 |                     return True
127 | 
128 |         return False
129 | 
130 |     def generate_item(self) -> Dict[str, Union[List[List[Any]], float, int]]:
131 |         """Run two chains to talk with one another and record the chat history."""
132 |         if self.generator_index >= len(self.options_configs):
133 |             raise StopIteration()
134 | 
135 |         conversation_config = self.options_configs[self.generator_index]
136 |         self.generator_index += 1
137 | 
138 |         chains, system_prompts = self.initialize_chain(conversation_config)
139 | 
140 |         utterances = []
141 | 
142 | 
143 |         chain_inp = conversation_config["initial_utterance"]
144 |         for _ in range(conversation_config["length"]):
145 |             for i, chain in enumerate(chains):
146 |                 agent = f"agent{i + 1}"
147 |                 chain_out = chain.predict(input=chain_inp)
148 |                 utterances.append([agent, chain_out])
149 | 
150 |                 if self.end_phrase_interruption(agent, chain_out):
151 |                     break
152 | 
153 |                 chain_inp = chain_out
154 | 
155 | 
156 |         return {**conversation_config,
157 |                 **{f"agent{i + 1}": system_prompts[i] for i in range(len(system_prompts))},
158 |                 "utterances": utterances}


--------------------------------------------------------------------------------
/src/distiller/outputs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | from uuid import uuid4
 5 | from typing import Dict, Any, List
 6 | 
 7 | 
 8 | class DatasetWriter:
 9 |     """Handle outputting dataset items."""
10 | 
11 |     single_file: bool
12 |     """Whether to save all dataset items in a single file."""
13 |     path: str
14 |     """Path of the output file or directory."""
15 |     dataset_items: List[Dict[str, Any]]
16 |     """Collection of all the items in the current dataset."""
17 | 
18 |     def __init__(self, path: str = None, single_file: bool = False) -> None:
19 |         """Initialize DatasetWriter."""
20 |         if path == None and single_file:
21 |             path = self.get_unique_filename(os.getcwd())
22 |         elif path == None and not single_file:
23 |             path = self.get_unique_dirname(os.getcwd())
24 |         elif os.path.isdir(path) and single_file:
25 |             path = self.get_unique_filename(path)
26 |         elif os.path.isfile(path) and not single_file:
27 |             raise ValueError(
28 |                 "Cannot write to a file with the single_file mode disabled. Try setting --single-file.")
29 | 
30 |         self.single_file = single_file
31 |         self.path = path
32 |         self.dataset_items = []
33 | 
34 |     def get_unique_dirname(self, base_path):
35 |         """Get a unique dirname."""
36 |         return os.path.join(base_path, str(uuid4()))
37 | 
38 |     def get_unique_filename(self, base_path):
39 |         """Get a unique filename."""
40 |         return os.path.join(base_path, f"{uuid4()}.json")
41 | 
42 |     def save_intermediate_result(self, result: Dict[str, Any]):
43 |         """Either save an item to its own file or concatenate it with all dataset items in a single file."""
44 |         if self.single_file:
45 |             self.dataset_items.append(result)
46 | 
47 |             current_directory = os.path.dirname(self.path)
48 |             if current_directory != "" and current_directory != ".":
49 |                 os.makedirs(current_directory, exist_ok=True)
50 | 
51 |             with open(self.path, "w") as output_file:
52 |                 json.dump(self.dataset_items, output_file)
53 |         else:
54 |             current_filepath = self.get_unique_filename(self.path)
55 | 
56 |             os.makedirs(self.path, exist_ok=True)
57 |             with open(current_filepath, "w") as output_file:
58 |                 json.dump(result, output_file)
59 | 


--------------------------------------------------------------------------------
/src/distiller/texts.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, field
  2 | from typing import List, Any, Dict, Tuple, Union
  3 | 
  4 | from langchain.prompts import PromptTemplate
  5 | from langchain.llms import BaseLLM
  6 | from langchain.chains import LLMChain
  7 | 
  8 | from .base import DatasetGenerator
  9 | 
 10 | OPTIONS_CONFIG_KEYS = ["backend", "max_length", "temperature"]
 11 | GENERATOR_CONFIG_KEYS =  ["backends", "max_lengths", "temperatures"]
 12 | 
 13 | 
 14 | @dataclass
 15 | class TextsGeneratorConfig:
 16 |     agents: List[str]
 17 |     """An array that allows you to dynamically scale up agents"""
 18 |     prompt: str
 19 |     """Text prompt."""
 20 |     backends: List[Tuple[str, str, str]]
 21 |     """LLM APIs to use as backends."""
 22 |     num_samples: int = 1
 23 |     """Number of texts to generate for each options combination."""
 24 |     max_lengths: List[int] = field(default_factory=lambda: [5])
 25 |     """Maximum lengths in tokens for the output of each generation."""
 26 |     temperatures: List[float] = field(default_factory=lambda: [0])
 27 |     """Possible temperatures for the backend LLM."""
 28 |     options: List[Tuple[str, str]] = field(default_factory=lambda: [])
 29 |     """Additional options defined in the system prompts with curly brackets."""
 30 | 
 31 | 
 32 | class TextsGenerator(DatasetGenerator):
 33 |     """Generator producing texts by varying model parameters and prompt options."""
 34 | 
 35 |     config: TextsGeneratorConfig
 36 |     """Configuration for a TextsGenerator."""
 37 | 
 38 |     def __init__(self, config: TextsGeneratorConfig) -> None:
 39 |         """Initialize TextsGenerator."""
 40 |         super().__init__(config)
 41 | 
 42 |     def initialize_options_configs(
 43 |         self,
 44 |         options_config_keys: List[str] = OPTIONS_CONFIG_KEYS,
 45 |         generator_config_keys: List[str] = GENERATOR_CONFIG_KEYS
 46 |     ) -> None:
 47 |         """Prepare options combinations."""
 48 |         super().initialize_options_configs(options_config_keys, generator_config_keys)
 49 | 
 50 |     def initialize_backend(self, text_config: Dict[str, Any]) -> BaseLLM:
 51 |         """Initialize a specific LLM."""
 52 |         backend_str = text_config["backend"]
 53 |         temperature = text_config["temperature"]
 54 |         max_length = text_config["max_length"]
 55 | 
 56 |         backend, model = backend_str.split("|")
 57 | 
 58 |         if backend.lower() == "openai":
 59 |             from langchain.llms import OpenAI
 60 |             llm = OpenAI(model_name=model,
 61 |                          temperature=temperature,
 62 |                          max_tokens=max_length)
 63 |         elif backend.lower() == "cohere":
 64 |             from langchain.llms import Cohere
 65 |             llm = Cohere(model=model,
 66 |                          temperature=temperature,
 67 |                          max_tokens=max_length)
 68 |         elif backend.lower() == "petals":
 69 |             from langchain.llms import Petals
 70 |             llm = Petals(model_name=model,
 71 |                          temperature=temperature,
 72 |                          max_new_tokens=max_length)
 73 |         elif backend.lower() == "huggingface":
 74 |             from langchain import HuggingFaceHub
 75 |             llm = HuggingFaceHub(repo_id=model, 
 76 |                                     temperature=temperature,
 77 |                                     max_tokens=max_length)
 78 |         else:
 79 |             raise ValueError("Cannot use the specified backend.")
 80 | 
 81 |         return llm
 82 |     
 83 |     def initialize_backends(self, text_config: Dict[str, Any]) -> List[BaseLLM]:
 84 |         backends = []
 85 |         for _ in self.config.agents:
 86 |             backend = self.initialize_backend(text_config)
 87 |             backends.append(backend)
 88 |             return backends
 89 | 
 90 |     def generate_item(self) -> Dict[str, Union[List[List[Any]], float, int]]:
 91 |         """Produce text with a LLM Chain."""
 92 |         if self.generator_index >= len(self.options_configs):
 93 |             raise StopIteration()
 94 | 
 95 |         text_config = self.options_configs[self.generator_index]
 96 |         self.generator_index += 1
 97 | 
 98 |         input_variables = text_config.keys() - ["sample_id",
 99 |                                                 "backend",
100 |                                                 "temperature",
101 |                                                 "max_length"]
102 | 
103 |         prompt_template = PromptTemplate(template=self.config.prompt,
104 |                                          input_variables=input_variables)
105 | 
106 |         llm = self.initialize_backend(text_config)
107 | 
108 |         prompt_params = {k: text_config[k] for k in input_variables}
109 |         input_prompt = prompt_template.format(**prompt_params)
110 | 
111 |         # chain = LLMChain(prompt=prompt_template, llm=llm)
112 |         # output = chain.predict(**prompt_params)
113 | 
114 |         # return {**text_config,
115 |         #         "prompt": input_prompt,
116 |         #         "output": output}
117 | 
118 |         backends = self.initialize_backends(text_config)
119 | 
120 |         outputs = []
121 |         for backend in backends:
122 |             chain = LLMChain(prompt=prompt_template, llm=backend)
123 |             output = chain.predict(**prompt_params)
124 |             outputs.append(output)
125 | 
126 |         return {**text_config,
127 |                 "prompt": input_prompt,
128 |                 "outputs": outputs}
129 | 
130 | 
131 | agents = [
132 |     "You're a shop assistant in a pet store. Answer to customer questions politely.",
133 |     "You're a customer in a pet store. You should behave like a human. You want to buy {n} pets. Ask questions about the pets in the store.",
134 |     "You're another customer in the pet store. You should behave like a human. You want to buy {n} pets. Ask questions about the pets in the store."
135 | ]
136 | 
137 | generator_config = TextsGeneratorConfig(prompt="your prompt",
138 |                                         agents = agents,
139 |                                         backends=[('huggingface', 'distilgpt', '')],
140 |                                         num_samples=2,
141 |                                         max_lengths=[49],
142 |                                         temperatures=[0.1, 0.2],
143 |                                         options=[("n", "n"), ("n", "3")])
144 | 
145 | 
146 | texts_generator = TextsGenerator(generator_config)
147 | 
148 | for text in texts_generator:
149 |     print(text)


--------------------------------------------------------------------------------