├── quality_prompts
    ├── utils
    │   ├── __init__.py
    │   ├── prompt_postprocessing.py
    │   ├── llm.py
    │   └── prompting_techniques_system_prompts.py
    ├── __init__.py
    ├── exemplars.py
    └── prompt.py
├── assets
    └── big_dipper_design.jpg
├── .gitignore
├── setup.py
├── LICENSE
├── README.MD
└── examples
    ├── math_science_problems_sample_exemplars.json
    ├── kg_creation_problem_sample_exemplars.json
    ├── few_shot_prompt_usage.ipynb
    ├── zero_shot_cot_usage.ipynb
    └── few_shot_cot_usage.ipynb


/quality_prompts/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assets/big_dipper_design.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sarthakrastogi/quality-prompts/HEAD/assets/big_dipper_design.jpg


--------------------------------------------------------------------------------
/quality_prompts/__init__.py:
--------------------------------------------------------------------------------
1 | from .prompt import QualityPrompt
2 | from .exemplars import ExemplarStore, Exemplar
3 | from .utils.llm import *
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .env
 3 | utils/__pycache__
 4 | core/__pycache__
 5 | examples/__pycache__
 6 | quality_prompts/__pycache__
 7 | __pycache__
 8 | .gitmodules
 9 | usage.ipynb
10 | build/
11 | dist/
12 | quality_prompts.egg-info/


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.MD", "r") as f:
 4 |     readme_content = f.read()
 5 | 
 6 | setup(
 7 |     name="quality-prompts",
 8 |     version="0.0.5",
 9 |     packages=find_packages(),
10 |     long_description=readme_content,
11 |     long_description_content_type="text/markdown",
12 |     install_requires=[
13 |         "litellm==1.41.8",
14 |     ],
15 | )
16 | 


--------------------------------------------------------------------------------
/quality_prompts/utils/prompt_postprocessing.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def remove_extra_chars(prompt):
 5 |     # Remove leading tabs and spaces from lines
 6 |     processed_prompt = re.sub(r"^[ \t]+", "", prompt, flags=re.MULTILINE)
 7 | 
 8 |     # Replace occurrences of more than two consecutive new lines with exactly two new lines
 9 |     processed_prompt = re.sub(r"\n{3,}", "\n\n", processed_prompt)
10 | 
11 |     return processed_prompt
12 | 


--------------------------------------------------------------------------------
/quality_prompts/utils/llm.py:
--------------------------------------------------------------------------------
 1 | from litellm import completion, embedding
 2 | 
 3 | 
 4 | def llm_call(messages, model="gpt-3.5-turbo"):
 5 |     response = completion(model=model, messages=messages)
 6 |     return response.choices[0].message.content
 7 | 
 8 | 
 9 | def llm_call_multiple_choices(messages, model="gpt-3.5-turbo", n=1, temperature=0):
10 |     response = completion(model=model, messages=messages, n=n, temperature=temperature)
11 |     return [choice.message.content for choice in response.choices]
12 | 
13 | 
14 | def get_embedding(input_text, model="text-embedding-ada-002"):
15 |     response = embedding(model=model, input=[input_text])
16 |     return response.data[0]["embedding"]
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2024 Sarthak Rastogi
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
 1 | # Quality Prompts
 2 | ## Use and evaluate prompting techniques quickly.
 3 | 
 4 | Quality Prompts implements 58 prompting techniques explained in [this survey from the University of Maryland](https://arxiv.org/pdf/2406.06608) in collaboration with researchers from [Learn Prompting](https://learnprompting.org), OpenAI, Microsoft, etc.
 5 | 
 6 | ![Quality Prompts](https://raw.githubusercontent.com/sarthakrastogi/quality-prompts/main/assets/big_dipper_design.jpg)
 7 | 
 8 | ### 1. Install Quality Prompts:
 9 | 
10 | `pip install quality-prompts`
11 | 
12 | ### 2. Write the components of your prompt
13 | 
14 | ```
15 | from quality_prompts.prompt import QualityPrompt
16 | 
17 | directive = "You are given a document and your task..."
18 | additional_information = "In the knowledge graph, ..."
19 | output_formatting = "You will respond with a ..."
20 | 
21 | prompt = QualityPrompt(
22 |                         directive,
23 |                         additional_information,
24 |                         output_formatting,
25 |                         exemplar_store
26 |                        )
27 | ```
28 | 
29 | ### 3. Quality Prompts searches and uses only the few-shot examples that are relevant to the user's query
30 | 
31 | ```
32 | input_text = "list the disorders included in cvd"
33 | prompt.few_shot(input_text=input_text, n_shots=1)
34 | ```
35 | 
36 | ### 4. Simply call one of several prompting techniques to your prompt
37 | 
38 | #### System2Attention
39 | Helps clarify the given context as an additinoal step before it's used to answer the question
40 | 
41 | ```
42 | prompt.system2attention(input_text)
43 | ```
44 | 
45 | #### Tabular Chain of Thought
46 | Prompts the LLM to think step by step and write the step, process and result of each step in a markdown table.
47 | Significantly boosts accuracy in solving math problems.
48 | 
49 | ```
50 | prompt.tabular_chain_of_thought_prompting(input_text)
51 | ```
52 | 
53 | ### 6. Upcoming: Easily evaluate different prompting techniques
54 | 
55 | ## Star History
56 | 
57 | To stay updated on the latest evaluation features and prompting techniques added to the library, you can star this repo.
58 | 
59 | [![Star History Chart](https://api.star-history.com/svg?repos=sarthakrastogi/quality-prompts&type=Date)](https://star-history.com/#sarthakrastogi/quality-prompts&Date)
60 | 
61 | 


--------------------------------------------------------------------------------
/quality_prompts/exemplars.py:
--------------------------------------------------------------------------------
  1 | from pydantic import BaseModel
  2 | from typing import List
  3 | from sklearn.neighbors import NearestNeighbors
  4 | import numpy as np
  5 | 
  6 | from .utils.llm import get_embedding
  7 | 
  8 | 
  9 | class Exemplar(BaseModel):
 10 |     input: str
 11 |     label: str
 12 |     input_embedding: List[float]
 13 |     complexity_level: str = "medium"
 14 | 
 15 |     def format(self):
 16 |         return f"""Input: {self.input}
 17 |         Output: {self.label}"""
 18 | 
 19 | 
 20 | class ExemplarStore(BaseModel):
 21 |     exemplars: List[Exemplar]
 22 | 
 23 |     def size(self):
 24 |         return len(self.exemplars)
 25 | 
 26 |     def get_similar_exemplars_to_test_sample(
 27 |         self,
 28 |         input_text,
 29 |         exemplar_selection_method="knn",
 30 |         k=3,
 31 |         prioritise_complex_exemplars=False,
 32 |     ):
 33 |         input_embedding = get_embedding(input_text)
 34 |         input_embedding = np.array(input_embedding).reshape(1, -1)
 35 | 
 36 |         # Extract embeddings of all exemplars
 37 |         example_embeddings = np.array(
 38 |             [example.input_embedding for example in self.exemplars]
 39 |         )
 40 | 
 41 |         if exemplar_selection_method == "knn":
 42 |             difficult_exemplars = [
 43 |                 ex for ex in self.exemplars if ex.complexity_level == "high"
 44 |             ]
 45 |             medium_and_simple_exemplars = [
 46 |                 ex for ex in self.exemplars if ex.complexity_level in ["medium", "low"]
 47 |             ]
 48 | 
 49 |             if prioritise_complex_exemplars:
 50 |                 if len(difficult_exemplars) >= k:
 51 |                     # Only use difficult exemplars for KNN
 52 |                     example_embeddings = np.array(
 53 |                         [example.input_embedding for example in difficult_exemplars]
 54 |                     )
 55 |                     exemplars_to_search = difficult_exemplars
 56 |                 else:
 57 |                     # Use all difficult exemplars and fill the rest with medium and simple ones
 58 |                     difficult_embeddings = np.array(
 59 |                         [example.input_embedding for example in difficult_exemplars]
 60 |                     )
 61 |                     if difficult_embeddings.size == 0:
 62 |                         raise ValueError("No difficult exemplars found.")
 63 | 
 64 |                     medium_simple_embeddings = np.array(
 65 |                         [
 66 |                             example.input_embedding
 67 |                             for example in medium_and_simple_exemplars
 68 |                         ]
 69 |                     )
 70 | 
 71 |                     example_embeddings = np.vstack(
 72 |                         (difficult_embeddings, medium_simple_embeddings)
 73 |                     )
 74 |                     exemplars_to_search = (
 75 |                         difficult_exemplars + medium_and_simple_exemplars
 76 |                     )
 77 |             else:
 78 |                 exemplars_to_search = self.exemplars
 79 |                 example_embeddings = np.array(
 80 |                     [example.input_embedding for example in exemplars_to_search]
 81 |                 )
 82 | 
 83 |             # Ensure example_embeddings is not empty
 84 |             if example_embeddings.size == 0:
 85 |                 raise ValueError("No exemplars found for KNN search.")
 86 | 
 87 |             # Initialize and fit the NearestNeighbors model
 88 |             nbrs = NearestNeighbors(n_neighbors=k, metric="cosine")
 89 |             nbrs.fit(example_embeddings)
 90 |             distances, indices = nbrs.kneighbors(input_embedding)
 91 | 
 92 |             # Return the top k closest exemplars
 93 |             return [exemplars_to_search[i] for i in indices.flatten()]
 94 | 
 95 |         elif exemplar_selection_method == "vote-k":
 96 |             pass  # TODO
 97 | 
 98 |         elif exemplar_selection_method == "sg-icl":
 99 |             pass  # TODO
100 | 


--------------------------------------------------------------------------------
/examples/math_science_problems_sample_exemplars.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "input": "Solve the integral \\( \\int (3x^2 - 2x + 1) \\, dx \\).",
 4 |         "label": "To solve the integral \\( \\int (3x^2 - 2x + 1) \\, dx \\), we need to integrate each term separately.\n1. \\( \\int 3x^2 \\, dx = 3 \\int x^2 \\, dx = 3 \\cdot \\frac{x^3}{3} = x^3 \\)\n2. \\( \\int -2x \\, dx = -2 \\int x \\, dx = -2 \\cdot \\frac{x^2}{2} = -x^2 \\)\n3. \\( \\int 1 \\, dx = x \\)\nCombining these results, we get:\n\\( \\int (3x^2 - 2x + 1) \\, dx = x^3 - x^2 + x + C \\), where \\(C\\) is the constant of integration.",
 5 |         "complexity_level": "low"
 6 |     },
 7 |     {
 8 |         "input": "Calculate the molarity of a solution prepared by dissolving 5 grams of NaCl in 250 mL of water.",
 9 |         "label": "To calculate the molarity, we first need to find the number of moles of NaCl. The molar mass of NaCl is approximately 58.44 g/mol.\nNumber of moles = \\( \\frac{5 \\text{ g}}{58.44 \\text{ g/mol}} = 0.0856 \\text{ mol} \\)\nNext, we convert the volume from mL to L: 250 mL = 0.250 L.\nMolarity (M) = \\( \\frac{0.0856 \\text{ mol}}{0.250 \\text{ L}} = 0.3424 \\text{ M} \\)\nTherefore, the molarity of the solution is 0.3424 M.",
10 |         "complexity_level": "medium"
11 |     },
12 |     {
13 |         "input": "A block of mass 5 kg is pushed up a frictionless incline of 30 degrees with an initial velocity of 10 m/s. Calculate the distance the block travels up the incline before coming to a stop.",
14 |         "label": "To solve this problem, we use the principles of energy conservation. Initially, the block has kinetic energy given by \\( KE = \\frac{1}{2} m v^2 \\).\n\\( KE = \\frac{1}{2} \\times 5 \\text{ kg} \\times (10 \\text{ m/s})^2 = 250 \\text{ J} \\)\nAs the block travels up the incline, it gains potential energy, which is given by \\( PE = mgh \\). The height \\( h \\) can be related to the distance \\( d \\) up the incline by \\( h = d \\sin(30^\\circ) = \\frac{d}{2} \\).\nSetting the initial kinetic energy equal to the potential energy at the stopping point:\n\\( 250 \\text{ J} = 5 \\text{ kg} \\times 9.8 \\text{ m/s}^2 \\times \\frac{d}{2} \\)\nSolving for \\( d \\):\n\\( 250 = 24.5d \\implies d = \\frac{250}{24.5} = 10.2 \\text{ m} \\)\nTherefore, the block travels 10.2 meters up the incline before coming to a stop.",
15 |         "complexity_level": "high"
16 |     },
17 |     {
18 |         "input": "Find the roots of the quadratic equation \\( 2x^2 - 4x + 1 = 0 \\).",
19 |         "label": "To find the roots of the quadratic equation \\( 2x^2 - 4x + 1 = 0 \\), we use the quadratic formula \\( x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a} \\), where \\( a = 2 \\), \\( b = -4 \\), and \\( c = 1 \\).\n\\( x = \\frac{-(-4) \\pm \\sqrt{(-4)^2 - 4 \\cdot 2 \\cdot 1}}{2 \\cdot 2} \\)\n\\( x = \\frac{4 \\pm \\sqrt{16 - 8}}{4} \\)\n\\( x = \\frac{4 \\pm \\sqrt{8}}{4} \\)\n\\( x = \\frac{4 \\pm 2\\sqrt{2}}{4} \\)\n\\( x = 1 \\pm \\frac{\\sqrt{2}}{2} \\)\nTherefore, the roots of the equation are \\( x = 1 + \\frac{\\sqrt{2}}{2} \\) and \\( x = 1 - \\frac{\\sqrt{2}}{2} \\).",
20 |         "complexity_level": "medium"
21 |     },
22 |     {
23 |         "input": "Determine the pH of a 0.01 M HCl solution.",
24 |         "label": "HCl is a strong acid, which dissociates completely in water. Therefore, the concentration of hydrogen ions \\( [H^+] \\) in the solution is equal to the concentration of the HCl solution, which is 0.01 M.\nThe pH of a solution is calculated using the formula \\( \\text{pH} = -\\log[H^+] \\).\n\\( \\text{pH} = -\\log(0.01) = -\\log(10^{-2}) = 2 \\)\nTherefore, the pH of the 0.01 M HCl solution is 2.",
25 |         "complexity_level": "low"
26 |     },
27 |     {
28 |         "input": "A particle is moving in a circle of radius 5 m with a constant speed of 10 m/s. Calculate the centripetal acceleration of the particle.",
29 |         "label": "The centripetal acceleration \\( a_c \\) of a particle moving in a circle with radius \\( r \\) and speed \\( v \\) is given by the formula \\( a_c = \\frac{v^2}{r} \\).\nGiven: \\( v = 10 \\text{ m/s} \\) and \\( r = 5 \\text{ m} \\).\n\\( a_c = \\frac{(10 \\text{ m/s})^2}{5 \\text{ m}} = \\frac{100}{5} = 20 \\text{ m/s}^2 \\)\nTherefore, the centripetal acceleration of the particle is 20 \\text{ m/s}^2.",
30 |         "complexity_level": "low"
31 |     },
32 |     {
33 |         "input": "Evaluate the limit \\( \\lim_{x \\to 2} \\frac{x^2 - 4}{x - 2} \\).",
34 |         "label": "To evaluate the limit \\( \\lim_{x \\to 2} \\frac{x^2 - 4}{x - 2} \\), we first notice that direct substitution results in a 0/0 indeterminate form. We can factor the numerator:\n\\( \\frac{x^2 - 4}{x - 2} = \\frac{(x - 2)(x + 2)}{x - 2} \\).\nWe can then cancel the \\( x - 2 \\) term in the numerator and the denominator:\n\\( \\frac{(x - 2)(x + 2)}{x - 2} = x + 2 \\) (for \\( x \\neq 2 \\)).\nNow, we can directly substitute \\( x = 2 \\):\n\\( \\lim_{x \\to 2} (x + 2) = 2 + 2 = 4 \\).\nTherefore, the limit is 4.",
35 |         "complexity_level": "medium"
36 |     },
37 |     {
38 |         "input": "Determine the empirical formula of a compound that contains 40% carbon, 6.7% hydrogen, and 53.3% oxygen by mass.",
39 |         "label": "To determine the empirical formula, we convert the percentages to moles by dividing by the atomic masses of each element.\n1. Moles of C: \\( \\frac{40 \\text{ g}}{12.01 \\text{ g/mol}} = 3.33 \\text{ mol} \\).\n2. Moles of H: \\( \\frac{6.7 \\text{ g}}{1.008 \\text{ g/mol}} = 6.65 \\text{ mol} \\).\n3. Moles of O: \\( \\frac{53.3 \\text{ g}}{16.00 \\text{ g/mol}} = 3.33 \\text{ mol} \\).\nNext, we divide each mole value by the smallest number of moles:\n1. Moles of C: \\( \\frac{3.33}{3.33} = 1 \\).\n2. Moles of H: \\( \\frac{6.65}{3.33} = 2 \\).\n3. Moles of O: \\( \\frac{3.33}{3.33} = 1 \\).\nTherefore, the empirical formula is CH2O.",
40 |         "complexity_level": "medium"
41 |     },
42 |     {
43 |         "input": "A 10 kg mass is attached to a spring with a spring constant of 200 N/m. If the mass is displaced 0.1 m from its equilibrium position and released, calculate the maximum velocity of the mass.",
44 |         "label": "To find the maximum velocity, we use the conservation of mechanical energy. The potential energy stored in the spring at maximum displacement is converted into kinetic energy at the equilibrium position.\nThe potential energy in the spring is given by \\( PE = \\frac{1}{2} k x^2 \\), where \\( k \\) is the spring constant and \\( x \\) is the displacement.\n\\( PE = \\frac{1}{2} \\times 200 \\text{ N/m} \\times (0.1 \\text{ m})^2 = 1 \\text{ J} \\).\nAt the equilibrium position, this potential energy is converted to kinetic energy: \\( KE = \\frac{1}{2} mv^2 \\).\nSetting \\( KE = PE \\):\n\\( \\frac{1}{2} mv^2 = 1 \\text{ J} \\).\n\\( \\frac{1}{2} \\times 10 \\text{ kg} \\times v^2 = 1 \\text{ J} \\).\n\\( 5v^2 = 1 \\implies v^2 = 0.2 \\implies v = \\sqrt{0.2} \\approx 0.447 \\text{ m/s} \\).\nTherefore, the maximum velocity of the mass is approximately 0.447 m/s.",
45 |         "complexity_level": "high"
46 |     },
47 |     {
48 |         "input": "Find the derivative of the function \\( f(x) = x^3 - 3x^2 + 2x \\).",
49 |         "label": "To find the derivative of the function \\( f(x) = x^3 - 3x^2 + 2x \\), we apply the power rule to each term.\n1. \\( \\frac{d}{dx} (x^3) = 3x^2 \\).\n2. \\( \\frac{d}{dx} (-3x^2) = -6x \\).\n3. \\( \\frac{d}{dx} (2x) = 2 \\).\nCombining these results, we get:\n\\( f'(x) = 3x^2 - 6x + 2 \\).\nTherefore, the derivative of the function is \\( f'(x) = 3x^2 - 6x + 2 \\).",
50 |         "complexity_level": "low"
51 |     },
52 |     {
53 |         "input": "What is the final temperature when 50 g of aluminum at 100°C is mixed with 200 g of water at 25°C? (Specific heat capacities: \\( c_{Al} = 0.897 \\text{ J/g°C} \\), \\( c_{H2O} = 4.18 \\text{ J/g°C} \\)).",
54 |         "label": "To find the final temperature (\\( T_f \\)) when the aluminum and water reach thermal equilibrium, we use the principle of conservation of energy. The heat lost by the aluminum will be equal to the heat gained by the water.\nHeat lost by aluminum: \\( Q_{Al} = m_{Al} c_{Al} (T_{Al} - T_f) \\).\nHeat gained by water: \\( Q_{H2O} = m_{H2O} c_{H2O} (T_f - T_{H2O}) \\).\nSetting \\( Q_{Al} = Q_{H2O} \\):\n\\( 50 \\text{ g} \\times 0.897 \\text{ J/g°C} \\times (100 - T_f) = 200 \\text{ g} \\times 4.18 \\text{ J/g°C} \\times (T_f - 25) \\).\n\\( 44.85 \\times (100 - T_f) = 836 \\times (T_f - 25) \\).\n\\( 4485 - 44.85 T_f = 836 T_f - 20900 \\).\n\\( 4485 + 20900 = 836 T_f + 44.85 T_f \\).\n\\( 25385 = 880.85 T_f \\).\n\\( T_f = \\frac{25385}{880.85} \\approx 28.8°C \\).\nTherefore, the final temperature is approximately 28.8°C.",
55 |         "complexity_level": "high"
56 |     },
57 |     {
58 |         "input": "Calculate the standard enthalpy change (ΔH°) for the reaction: \\( 2H_2 + O_2 \\rightarrow 2H_2O \\). Given the bond enthalpies: H-H = 436 kJ/mol, O=O = 498 kJ/mol, and O-H = 463 kJ/mol.",
59 |         "label": "To calculate the standard enthalpy change (ΔH°) for the reaction \\( 2H_2 + O_2 \\rightarrow 2H_2O \\), we use the bond enthalpies of the reactants and products.\nThe reaction involves breaking the bonds in the reactants and forming new bonds in the products.\nBreaking bonds:\n1. 2 moles of H-H bonds: \\( 2 \\times 436 \\text{ kJ/mol} = 872 \\text{ kJ} \\).\n2. 1 mole of O=O bond: \\( 498 \\text{ kJ} \\).\nTotal energy to break bonds: \\( 872 + 498 = 1370 \\text{ kJ} \\).\nForming bonds:\n1. 4 moles of O-H bonds: \\( 4 \\times 463 \\text{ kJ/mol} = 1852 \\text{ kJ} \\).\nThe standard enthalpy change is the difference between the energy required to break the bonds and the energy released when new bonds are formed:\n\\( ΔH° = 1370 \\text{ kJ} - 1852 \\text{ kJ} = -482 \\text{ kJ} \\).\nTherefore, the standard enthalpy change for the reaction is -482 kJ.",
60 |         "complexity_level": "medium"
61 |     }
62 | ]


--------------------------------------------------------------------------------
/examples/kg_creation_problem_sample_exemplars.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "input": "Cardiovascular disease (CVD) encompasses a spectrum of disorders involving the heart and vasculature, prominently including atherosclerosis, characterized by endothelial dysfunction and the accumulation of lipid-laden plaques. These pathophysiological processes often precipitate myocardial infarction and cerebrovascular accidents, arising from the rupture of vulnerable plaques and subsequent thrombogenesis.",
  4 |         "complexity_level": "high",
  5 |         "label": [
  6 |             {
  7 |                 "entity": "cardiovascular disease (cvd)",
  8 |                 "connections": [
  9 |                     {
 10 |                         "entity": "heart",
 11 |                         "relationship": "involves"
 12 |                     },
 13 |                     {
 14 |                         "entity": "vasculature",
 15 |                         "relationship": "involves"
 16 |                     },
 17 |                     {
 18 |                         "entity": "atherosclerosis",
 19 |                         "relationship": "associated disorder"
 20 |                     },
 21 |                     {
 22 |                         "entity": "endothelial dysfunction",
 23 |                         "relationship": "characteristic feature"
 24 |                     },
 25 |                     {
 26 |                         "entity": "lipid-laden plaques",
 27 |                         "relationship": "associated feature"
 28 |                     },
 29 |                     {
 30 |                         "entity": "myocardial infarction",
 31 |                         "relationship": "common complication"
 32 |                     },
 33 |                     {
 34 |                         "entity": "cerebrovascular accidents",
 35 |                         "relationship": "common complication"
 36 |                     },
 37 |                     {
 38 |                         "entity": "plaque rupture",
 39 |                         "relationship": "cause of complications"
 40 |                     },
 41 |                     {
 42 |                         "entity": "thrombogenesis",
 43 |                         "relationship": "subsequent process"
 44 |                     }
 45 |                 ]
 46 |             },
 47 |             {
 48 |                 "entity": "atherosclerosis",
 49 |                 "connections": [
 50 |                     {
 51 |                         "entity": "endothelial dysfunction",
 52 |                         "relationship": "causes"
 53 |                     },
 54 |                     {
 55 |                         "entity": "lipid-laden plaques",
 56 |                         "relationship": "result of"
 57 |                     },
 58 |                     {
 59 |                         "entity": "plaque rupture",
 60 |                         "relationship": "leads to complications"
 61 |                     }
 62 |                 ]
 63 |             }
 64 |         ]
 65 |     },
 66 |     {
 67 |         "input": "Management of cardiovascular disease necessitates a multifaceted approach involving antihypertensive agents, statins to modulate dyslipidemia, and antiplatelet therapy to mitigate thrombosis risk.",
 68 |         "complexity_level": "low",
 69 |         "label": [
 70 |             {
 71 |                 "entity": "cardiovascular disease",
 72 |                 "connections": [
 73 |                     {
 74 |                         "entity": "antihypertensive agents",
 75 |                         "relationship": "involves management"
 76 |                     },
 77 |                     {
 78 |                         "entity": "statins",
 79 |                         "relationship": "modulates dyslipidemia"
 80 |                     },
 81 |                     {
 82 |                         "entity": "antiplatelet therapy",
 83 |                         "relationship": "mitigates thrombosis risk"
 84 |                     }
 85 |                 ]
 86 |             }
 87 |         ]
 88 |     },
 89 |     {
 90 |         "input": "Emerging therapeutic interventions targeting molecular pathways, including PCSK9 inhibitors and SGLT2 inhibitors, show promise in reducing cardiovascular morbidity and mortality.",
 91 |         "complexity_level": "medium",
 92 |         "label": [
 93 |             {
 94 |                 "entity": "therapeutic interventions",
 95 |                 "connections": [
 96 |                     {
 97 |                         "entity": "pcsk9 inhibitors",
 98 |                         "relationship": "targeting molecular pathways"
 99 |                     },
100 |                     {
101 |                         "entity": "sglt2 inhibitors",
102 |                         "relationship": "targeting molecular pathways"
103 |                     }
104 |                 ]
105 |             },
106 |             {
107 |                 "entity": "therapeutic interventions",
108 |                 "connections": [
109 |                     {
110 |                         "entity": "reducing cardiovascular morbidity",
111 |                         "relationship": "show promise"
112 |                     },
113 |                     {
114 |                         "entity": "reducing mortality",
115 |                         "relationship": "show promise"
116 |                     }
117 |                 ]
118 |             }
119 |         ]
120 |     },
121 |     {
122 |         "input": "The epidemiological burden of cardiovascular disease underscores the imperative for ongoing research into genetic predispositions and the optimization of primary and secondary prevention strategies.Cardiovascular disease also significantly intersects with metabolic syndrome, wherein insulin resistance and visceral adiposity contribute to endothelial dysfunction and systemic inflammation, further accelerating atherogenic processes.",
123 |         "complexity_level": "medium",
124 |         "label": [
125 |             {
126 |                 "entity": "cardiovascular disease",
127 |                 "connections": [
128 |                     {
129 |                         "entity": "research",
130 |                         "relationship": "ongoing research into genetic predispositions and prevention strategies"
131 |                     },
132 |                     {
133 |                         "entity": "metabolic syndrome",
134 |                         "relationship": "significantly intersects with"
135 |                     }
136 |                 ]
137 |             },
138 |             {
139 |                 "entity": "metabolic syndrome",
140 |                 "connections": [
141 |                     {
142 |                         "entity": "insulin resistance",
143 |                         "relationship": "contributes to endothelial dysfunction and inflammation"
144 |                     },
145 |                     {
146 |                         "entity": "visceral adiposity",
147 |                         "relationship": "contributes to endothelial dysfunction and inflammation"
148 |                     }
149 |                 ]
150 |             }
151 |         ]
152 |     },
153 |     {
154 |         "input": "Advanced imaging techniques, such as coronary artery calcium scoring and carotid intima-media thickness measurement, enhance the stratification of cardiovascular risk, enabling more tailored therapeutic interventions.",
155 |         "complexity_level": "medium",
156 |         "label": [
157 |             {
158 |                 "entity": "advanced imaging techniques",
159 |                 "connections": [
160 |                     {
161 |                         "entity": "coronary artery calcium scoring",
162 |                         "relationship": "enhances cardiovascular risk stratification"
163 |                     },
164 |                     {
165 |                         "entity": "carotid intima-media thickness measurement",
166 |                         "relationship": "enhances cardiovascular risk stratification"
167 |                     }
168 |                 ]
169 |             },
170 |             {
171 |                 "entity": "cardiovascular risk",
172 |                 "connections": [
173 |                     {
174 |                         "entity": "therapeutic interventions",
175 |                         "relationship": "tailored based on imaging results"
176 |                     }
177 |                 ]
178 |             }
179 |         ]
180 |     },
181 |     {
182 |         "input": "Role of novel biomarkers, including high-sensitivity troponins and natriuretic peptides, is pivotal in the early detection and prognostication of acute coronary syndromes and heart failure within the broader spectrum of cardiovascular disease.",
183 |         "complexity_level": "high",
184 |         "label": [
185 |             {
186 |                 "entity": "novel biomarkers",
187 |                 "connections": [
188 |                     {
189 |                         "entity": "high-sensitivity troponins",
190 |                         "relationship": "associated with"
191 |                     },
192 |                     {
193 |                         "entity": "natriuretic peptides",
194 |                         "relationship": "associated with"
195 |                     }
196 |                 ]
197 |             },
198 |             {
199 |                 "entity": "acute coronary syndromes",
200 |                 "connections": [
201 |                     {
202 |                         "entity": "novel biomarkers",
203 |                         "relationship": "detected by"
204 |                     },
205 |                     {
206 |                         "entity": "heart failure",
207 |                         "relationship": "prognostication for"
208 |                     }
209 |                 ]
210 |             },
211 |             {
212 |                 "entity": "heart failure",
213 |                 "connections": [
214 |                     {
215 |                         "entity": "novel biomarkers",
216 |                         "relationship": "detected by"
217 |                     },
218 |                     {
219 |                         "entity": "acute coronary syndromes",
220 |                         "relationship": "prognostication for"
221 |                     }
222 |                 ]
223 |             },
224 |             {
225 |                 "entity": "cardiovascular disease",
226 |                 "connections": [
227 |                     {
228 |                         "entity": "acute coronary syndromes",
229 |                         "relationship": "part of"
230 |                     },
231 |                     {
232 |                         "entity": "heart failure",
233 |                         "relationship": "part of"
234 |                     }
235 |                 ]
236 |             }
237 |         ]
238 |     }
239 | ]


--------------------------------------------------------------------------------
/quality_prompts/utils/prompting_techniques_system_prompts.py:
--------------------------------------------------------------------------------
  1 | from pydantic import BaseModel
  2 | from typing import List, Dict
  3 | 
  4 | from ..exemplars import Exemplar
  5 | 
  6 | 
  7 | class System2AttentionSystemPrompt(BaseModel):
  8 |     # Source: Page 4 of https://arxiv.org/pdf/2311.11829 -- only paragraph 1 of their prompt is used
  9 |     additional_information: str
 10 |     input_text: str
 11 | 
 12 |     @property
 13 |     def system_prompt(self) -> str:
 14 |         return f"""Given the following text by a user, extract the part that is unbiased and not their opinion,
 15 |     so that using that text alone would be good context for providing an unbiased answer to
 16 |     the question portion of the text.
 17 |     Text sent by User:
 18 |     {self.additional_information}
 19 |     """
 20 | 
 21 |     @property
 22 |     def messages(self) -> List[Dict[str, str]]:
 23 |         return [
 24 |             {
 25 |                 "role": "system",
 26 |                 "content": self.system_prompt,
 27 |             },
 28 |             {"role": "user", "content": self.input_text},
 29 |         ]
 30 | 
 31 | 
 32 | class SimtoMCharacterExtractionSystemPrompt(BaseModel):
 33 |     input_text: str
 34 | 
 35 |     @property
 36 |     def system_prompt(self) -> str:
 37 |         return """Which character's perspective is relevant to answer this user's question."""
 38 | 
 39 |     @property
 40 |     def messages(self) -> List[Dict[str, str]]:
 41 |         return [
 42 |             {
 43 |                 "role": "system",
 44 |                 "content": self.system_prompt,
 45 |             },
 46 |             {
 47 |                 "role": "user",
 48 |                 "content": self.input_text,
 49 |             },
 50 |         ]
 51 | 
 52 | 
 53 | class SimtoMSystemPrompt(BaseModel):
 54 |     # Source: Page 4 of https://arxiv.org/pdf/2311.10227
 55 |     additional_information: str
 56 |     character_name: str
 57 | 
 58 |     @property
 59 |     def system_prompt(self) -> str:
 60 |         return f"""The following is a sequence of events:
 61 |     {self.additional_information}
 62 |     Which events does {self.character_name} know about?"""
 63 | 
 64 |     @property
 65 |     def messages(self) -> List[Dict[str, str]]:
 66 |         return [
 67 |             {
 68 |                 "role": "system",
 69 |                 "content": self.system_prompt,
 70 |             },
 71 |         ]
 72 | 
 73 | 
 74 | class SelfAskSystemPrompt(BaseModel):
 75 |     # Source: Written by @sarthakrastogi
 76 |     input_text: str
 77 |     additional_information: str
 78 | 
 79 |     @property
 80 |     def system_prompt(self) -> str:
 81 |         return f"""Given the below information and the user's question, decide whether follow-up questions are required to answer the question.
 82 |     Only if follow-up questions are absolutely required before answering the present questions, return the questions as a Python list:
 83 |     # Example response:
 84 |     ["follow_up_question_1", "follow_up_question_2"]
 85 | 
 86 |     If the user's question can be answered without follow up questions, simply respond with "FALSE".
 87 | 
 88 |     # Provided information:
 89 |     {self.additional_information}"""
 90 | 
 91 |     @property
 92 |     def messages(self) -> List[Dict[str, str]]:
 93 |         return [
 94 |             {
 95 |                 "role": "system",
 96 |                 "content": self.system_prompt,
 97 |             },
 98 |             {
 99 |                 "role": "user",
100 |                 "content": self.input_text,
101 |             },
102 |         ]
103 | 
104 | 
105 | class ChainOfThoughtSystemPrompt(BaseModel):
106 |     # Source: https://arxiv.org/abs/2211.01910
107 |     system_prompt: str = (
108 |         """Let's work this out it a step by step to be sure we have the right answer."""
109 |     )
110 | 
111 | 
112 | class StepBackPromptingSystemPrompt(BaseModel):
113 |     # Source: Written by @sarthakrastogi
114 |     input_text: str
115 |     additional_information: str
116 | 
117 |     @property
118 |     def system_prompt(self) -> str:
119 |         return f"""Given the below information and the user's question, write a generic, high-level question about relevant concepts or facts that are required for answering the user's question.
120 |     # Provided information:
121 |     {self.additional_information}"""
122 | 
123 |     @property
124 |     def messages(self) -> List[Dict[str, str]]:
125 |         return [
126 |             {
127 |                 "role": "system",
128 |                 "content": self.system_prompt,
129 |             },
130 |             {
131 |                 "role": "user",
132 |                 "content": self.input_text,
133 |             },
134 |         ]
135 | 
136 | 
137 | class AnalogicalPromptingSystemPrompt(BaseModel):
138 |     # Source: Improvised from page 5, section 5.1 of https://arxiv.org/pdf/2310.01714
139 |     directive: str
140 |     input_text: str
141 |     output_formatting: str
142 | 
143 |     @property
144 |     def updated_directive(self) -> str:
145 |         return f"""{self.directive}
146 |     When presented with a problem, recall relevant problems as examples. Afterward,
147 |     proceed to solve the initial problem."""
148 | 
149 |     @property
150 |     def updated_output_formatting(self) -> str:
151 |         return f"""{self.output_formatting}
152 |     # Problem:
153 |     {self.input_text}
154 |     # Instructions:
155 |     ## Relevant Problems:
156 |     Recall three examples of problems that are relevant to the initial problem. Your problems should be distinct from each other and from the initial problem. For each problem:
157 |     - After "Q: ", describe the problem
158 |     - After "A: ", explain the solution and provide the ultimate answer.
159 |     ## Solve the Initial Problem:
160 |     Q: Copy and paste the initial problem here.
161 |     A: Explain the solution and provide the ultimate answer.
162 |     """
163 | 
164 | 
165 | class ThreadOfThoughtPromptingSystemPrompt(BaseModel):
166 |     # Source: Page 4 of https://arxiv.org/pdf/2311.08734
167 |     additional_information: str
168 | 
169 |     @property
170 |     def context_summarisation_messages(self) -> str:
171 |         context_summarisation_system_prompt = f"""{self.additional_information}
172 |         Walk me through this context in manageable parts step by step, summarizing and analyzing as we go."""
173 |         return [{"role": "system", "content": context_summarisation_system_prompt}]
174 | 
175 | 
176 | class TabularChainOfThoughtPrompingSystemPrompt(BaseModel):
177 |     # Source: Written by @sarthakrastogi, output formatting taken from page 5, table 2 of https://arxiv.org/pdf/2305.17812
178 |     directive: str
179 |     input_text: str
180 |     output_formatting: str
181 | 
182 |     @property
183 |     def updated_directive(self) -> str:
184 |         return f"""{self.directive}
185 |         Think through the problem step by step to solve it.
186 |         At each step, you have to figure out:
187 |         - the step number,
188 |         - the sub-question to be answered in that step,
189 |         - the thought process of solving that step, and
190 |         - the result of solving that step.
191 |     """
192 | 
193 |     @property
194 |     def updated_output_formatting(self) -> str:
195 |         return f"""{self.output_formatting}
196 |         Respond in the following markdown table format for each step:
197 |         |step|subquestion|process|result|
198 |     """
199 | 
200 | 
201 | class ContrastiveCoTSystemPrompt(BaseModel):
202 |     # Source: improvised by @sarthakrastogi
203 |     directive: str
204 |     additional_information: str
205 |     exemplar: Exemplar
206 | 
207 |     @property
208 |     def updated_directive(self) -> str:
209 |         return f"""{self.directive}
210 |         You are given examples of both valid and invalid reasoning for solving the problem. Observe these examples to understand how to and how not to reason about the problem.
211 |         """
212 | 
213 |     @property
214 |     def valid_and_invalid_exemplar_pair_generation_messages(self) -> str:
215 |         valid_and_invalid_exemplar_pair_generation_system_prompt = f"""
216 |         You will be given an example used by them in an LLM prompt.
217 |         Your task is to generate two examples for how the LLM should reason about solving the example.
218 |         You will generate:
219 |         - one valid example showing the correct reasoning for the LLM to solve that example prompt, and
220 |         - one invalid example showing the incorrect reasoning that the LLM might mistakenly use to arrive at an incorrect answer.
221 |         These two examples will be used to teach the LLM how to and how not to answer a given question.
222 |         These are the directive of the problem and the given example.
223 |         """
224 | 
225 |         directive_and_exemplar_user_prompt = f"""Directive: {self.directive}
226 |         Example:
227 |         {self.exemplar.label}
228 |         """
229 | 
230 |         return [
231 |             {
232 |                 "role": "system",
233 |                 "content": valid_and_invalid_exemplar_pair_generation_system_prompt,
234 |             },
235 |             {
236 |                 "role": "user",
237 |                 "content": directive_and_exemplar_user_prompt,
238 |             },
239 |         ]
240 | 
241 | 
242 | class SearchMajorityReasoningPathSystemPrompt(BaseModel):
243 |     # Source: improvised by @sarthakrastogi
244 |     directive: str
245 |     additional_information: str
246 |     cot_reasoning_paths: List[str]
247 |     exemplars: List[Exemplar]
248 | 
249 |     @property
250 |     def messages(self) -> str:
251 |         search_majority_reasoning_path_system_prompt = f"""
252 |         You will be given a list of reasoning paths taken by an LLM to answer a given directive.
253 |         Your task is to read all reasoning paths carefully and perform majority voting to identify the path used most often.
254 |         In your respond, you have to explain the winning reasoning path in detail.
255 |         """
256 |         directive_and_reasoning_paths_user_prompt = f"""Directive: {self.directive}
257 |         Additional Information: {self.additional_information}
258 |         """
259 |         for i, exemplar in enumerate(self.exemplars):
260 |             search_majority_reasoning_path_system_prompt += f"""Example {str(i+1)}:
261 |             {exemplar.format()}
262 |             """
263 |         for i, reasoning_path in enumerate(self.cot_reasoning_paths):
264 |             directive_and_reasoning_paths_user_prompt += f"""Reasoning path {str(i+1)}:
265 |             {reasoning_path}
266 |             """
267 |         return [
268 |             {
269 |                 "role": "system",
270 |                 "content": search_majority_reasoning_path_system_prompt,
271 |             },
272 |             {
273 |                 "role": "user",
274 |                 "content": directive_and_reasoning_paths_user_prompt,
275 |             },
276 |         ]
277 | 
278 | 
279 | class ConstrainedChainOfThoughtSystemPrompt(BaseModel):
280 |     # Source: https://arxiv.org/pdf/2407.19825
281 |     max_words: int
282 | 
283 |     @property
284 |     def system_prompt(self):
285 |         return f"""Let’s think a bit step by step and limit the answer length to {str(self.max_words)} words."""
286 | 


--------------------------------------------------------------------------------
/examples/few_shot_prompt_usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "markdown",
  5 |             "metadata": {},
  6 |             "source": [
  7 |                 "<a target=\"_blank\" href=\"https://colab.research.google.com/github/sarthakrastogi/quality-prompts/blob/main/examples/few_shot_prompt_usage.ipynb\">\n",
  8 |                 "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
  9 |                 "</a>"
 10 |             ]
 11 |         },
 12 |         {
 13 |             "cell_type": "code",
 14 |             "execution_count": 10,
 15 |             "metadata": {},
 16 |             "outputs": [],
 17 |             "source": [
 18 |                 "%%capture\n",
 19 |                 "!pip install quality-prompts"
 20 |             ]
 21 |         },
 22 |         {
 23 |             "cell_type": "code",
 24 |             "execution_count": 11,
 25 |             "metadata": {},
 26 |             "outputs": [],
 27 |             "source": [
 28 |                 "import requests\n",
 29 |                 "import json\n",
 30 |                 "import os\n",
 31 |                 "\n",
 32 |                 "from quality_prompts.prompt import QualityPrompt\n",
 33 |                 "from quality_prompts.exemplars import ExemplarStore, Exemplar\n",
 34 |                 "from quality_prompts.utils.llm import get_embedding"
 35 |             ]
 36 |         },
 37 |         {
 38 |             "cell_type": "code",
 39 |             "execution_count": 12,
 40 |             "metadata": {},
 41 |             "outputs": [],
 42 |             "source": [
 43 |                 "os.environ['OPENAI_API_KEY'] = \"YOUR_API_KEY_HERE\""
 44 |             ]
 45 |         },
 46 |         {
 47 |             "cell_type": "markdown",
 48 |             "metadata": {},
 49 |             "source": [
 50 |                 "### Initialise sample exemplars for use in few-shot prompt"
 51 |             ]
 52 |         },
 53 |         {
 54 |             "cell_type": "code",
 55 |             "execution_count": 13,
 56 |             "metadata": {},
 57 |             "outputs": [],
 58 |             "source": [
 59 |                 "url = \"https://github.com/sarthakrastogi/quality-prompts/raw/main/examples/kg_creation_problem_sample_exemplars.json\"\n",
 60 |                 "response = requests.get(url)\n",
 61 |                 "kg_creation_problem_sample_exemplars = json.loads(response.text)\n",
 62 |                 "\n",
 63 |                 "exemplars = [Exemplar(input=e['input'], label=str(e['label']), input_embedding=get_embedding(e['input'])) for e in kg_creation_problem_sample_exemplars]\n",
 64 |                 "exemplar_store = ExemplarStore(exemplars=exemplars)"
 65 |             ]
 66 |         },
 67 |         {
 68 |             "cell_type": "markdown",
 69 |             "metadata": {},
 70 |             "source": [
 71 |                 "### Define your prompt's components and initialise it"
 72 |             ]
 73 |         },
 74 |         {
 75 |             "cell_type": "code",
 76 |             "execution_count": 14,
 77 |             "metadata": {},
 78 |             "outputs": [],
 79 |             "source": [
 80 |                 "directive = \"\"\"You are given a document and your task is to create a knowledge graph from it.\"\"\"\n",
 81 |                 "\n",
 82 |                 "additional_information = \"\"\"\n",
 83 |                 "In the knowledge graph, entities such as people, places, objects, institutions, topics, ideas, etc. are represented as nodes.\n",
 84 |                 "Whereas the relationships and actions between them are represented as edges.\n",
 85 |                 "\"\"\"\n",
 86 |                 "\n",
 87 |                 "output_formatting = \"\"\"\n",
 88 |                 "You will respond with a knowledge graph in the given JSON format:\n",
 89 |                 "\n",
 90 |                 "[\n",
 91 |                 "    {\"entity\" : \"Entity_name\", \"connections\" : [\n",
 92 |                 "        {\"entity\" : \"Connected_entity_1\", \"relationship\" : \"Relationship_with_connected_entity_1},\n",
 93 |                 "        {\"entity\" : \"Connected_entity_2\", \"relationship\" : \"Relationship_with_connected_entity_2},\n",
 94 |                 "        ]\n",
 95 |                 "    },\n",
 96 |                 "    {\"entity\" : \"Entity_name\", \"connections\" : [\n",
 97 |                 "        {\"entity\" : \"Connected_entity_1\", \"relationship\" : \"Relationship_with_connected_entity_1},\n",
 98 |                 "        {\"entity\" : \"Connected_entity_2\", \"relationship\" : \"Relationship_with_connected_entity_2},\n",
 99 |                 "        ]\n",
100 |                 "    },\n",
101 |                 "]\n",
102 |                 "\n",
103 |                 "You must strictly respond in the given JSON format or your response will not be parsed correctly!\n",
104 |                 "\"\"\"\n",
105 |                 "\n",
106 |                 "prompt = QualityPrompt(\n",
107 |                 "                        directive=directive,\n",
108 |                 "                        additional_information=additional_information,\n",
109 |                 "                        output_formatting=output_formatting,\n",
110 |                 "                        exemplar_store=exemplar_store\n",
111 |                 "                       )"
112 |             ]
113 |         },
114 |         {
115 |             "cell_type": "code",
116 |             "execution_count": 15,
117 |             "metadata": {},
118 |             "outputs": [],
119 |             "source": [
120 |                 "input_text = \"list the disorders included in cvd\""
121 |             ]
122 |         },
123 |         {
124 |             "cell_type": "markdown",
125 |             "metadata": {},
126 |             "source": [
127 |                 "### Apply few-shot prompting\n",
128 |                 "\n",
129 |                 "This searches through your set of exemplars and uses kNN to search the most relevant exemplars to be included in context."
130 |             ]
131 |         },
132 |         {
133 |             "cell_type": "code",
134 |             "execution_count": 16,
135 |             "metadata": {},
136 |             "outputs": [],
137 |             "source": [
138 |                 "prompt.few_shot(input_text=input_text, n_shots=2)"
139 |             ]
140 |         },
141 |         {
142 |             "cell_type": "code",
143 |             "execution_count": 17,
144 |             "metadata": {},
145 |             "outputs": [
146 |                 {
147 |                     "name": "stdout",
148 |                     "output_type": "stream",
149 |                     "text": [
150 |                         "Exemplar #1: exemplar.input, exemplar.label\n",
151 |                         "Exemplar #2: exemplar.input, exemplar.label\n"
152 |                     ]
153 |                 }
154 |             ],
155 |             "source": [
156 |                 "# check the relevant exemplars searched for the user's query\n",
157 |                 "\n",
158 |                 "for i, exemplar in enumerate(prompt.few_shot_examples):\n",
159 |                 "    print(f\"Exemplar #{i+1}: exemplar.input, exemplar.label\")"
160 |             ]
161 |         },
162 |         {
163 |             "cell_type": "markdown",
164 |             "metadata": {},
165 |             "source": [
166 |                 "### Apply one of several Chain of Thought Techniques"
167 |             ]
168 |         },
169 |         {
170 |             "cell_type": "code",
171 |             "execution_count": null,
172 |             "metadata": {},
173 |             "outputs": [],
174 |             "source": [
175 |                 "prompt.system2attention(input_text=input_text)"
176 |             ]
177 |         },
178 |         {
179 |             "cell_type": "markdown",
180 |             "metadata": {},
181 |             "source": [
182 |                 "### Finally compile your prompt for use in an LLM"
183 |             ]
184 |         },
185 |         {
186 |             "cell_type": "code",
187 |             "execution_count": 19,
188 |             "metadata": {},
189 |             "outputs": [
190 |                 {
191 |                     "name": "stdout",
192 |                     "output_type": "stream",
193 |                     "text": [
194 |                         "You are given a document and your task is to create a knowledge graph from it.\n",
195 |                         "        In the context provided, the unbiased information that can be extracted is:\n",
196 |                         "- In the knowledge graph, entities such as people, places, objects, institutions, topics, ideas, etc. are represented as nodes.\n",
197 |                         "- Whereas the relationships and actions between them are represented as edges.\n",
198 |                         "        Example input: Cardiovascular disease (CVD) encompasses a spectrum of disorders involving the heart and vasculature, prominently including atherosclerosis, characterized by endothelial dysfunction and the accumulation of lipid-laden plaques. These pathophysiological processes often precipitate myocardial infarction and cerebrovascular accidents, arising from the rupture of vulnerable plaques and subsequent thrombogenesis.\n",
199 |                         "Example output: [{'entity': 'cardiovascular disease (cvd)', 'connections': [{'entity': 'heart', 'relationship': 'involves'}, {'entity': 'vasculature', 'relationship': 'involves'}, {'entity': 'atherosclerosis', 'relationship': 'associated disorder'}, {'entity': 'endothelial dysfunction', 'relationship': 'characteristic feature'}, {'entity': 'lipid-laden plaques', 'relationship': 'associated feature'}, {'entity': 'myocardial infarction', 'relationship': 'common complication'}, {'entity': 'cerebrovascular accidents', 'relationship': 'common complication'}, {'entity': 'plaque rupture', 'relationship': 'cause of complications'}, {'entity': 'thrombogenesis', 'relationship': 'subsequent process'}]}, {'entity': 'atherosclerosis', 'connections': [{'entity': 'endothelial dysfunction', 'relationship': 'causes'}, {'entity': 'lipid-laden plaques', 'relationship': 'result of'}, {'entity': 'plaque rupture', 'relationship': 'leads to complications'}]}]\n",
200 |                         "\n",
201 |                         "Example input: The epidemiological burden of cardiovascular disease underscores the imperative for ongoing research into genetic predispositions and the optimization of primary and secondary prevention strategies.Cardiovascular disease also significantly intersects with metabolic syndrome, wherein insulin resistance and visceral adiposity contribute to endothelial dysfunction and systemic inflammation, further accelerating atherogenic processes.\n",
202 |                         "Example output: [{'entity': 'cardiovascular disease', 'connections': [{'entity': 'research', 'relationship': 'ongoing research into genetic predispositions and prevention strategies'}, {'entity': 'metabolic syndrome', 'relationship': 'significantly intersects with'}]}, {'entity': 'metabolic syndrome', 'connections': [{'entity': 'insulin resistance', 'relationship': 'contributes to endothelial dysfunction and inflammation'}, {'entity': 'visceral adiposity', 'relationship': 'contributes to endothelial dysfunction and inflammation'}]}]\n",
203 |                         "\n",
204 |                         "        \n",
205 |                         "You will respond with a knowledge graph in the given JSON format:\n",
206 |                         "\n",
207 |                         "[\n",
208 |                         "    {\"entity\" : \"Entity_name\", \"connections\" : [\n",
209 |                         "        {\"entity\" : \"Connected_entity_1\", \"relationship\" : \"Relationship_with_connected_entity_1},\n",
210 |                         "        {\"entity\" : \"Connected_entity_2\", \"relationship\" : \"Relationship_with_connected_entity_2},\n",
211 |                         "        ]\n",
212 |                         "    },\n",
213 |                         "    {\"entity\" : \"Entity_name\", \"connections\" : [\n",
214 |                         "        {\"entity\" : \"Connected_entity_1\", \"relationship\" : \"Relationship_with_connected_entity_1},\n",
215 |                         "        {\"entity\" : \"Connected_entity_2\", \"relationship\" : \"Relationship_with_connected_entity_2},\n",
216 |                         "        ]\n",
217 |                         "    },\n",
218 |                         "]\n",
219 |                         "\n",
220 |                         "You must strictly respond in the given JSON format or your response will not be parsed correctly!\n",
221 |                         "\n",
222 |                         "        \n"
223 |                     ]
224 |                 }
225 |             ],
226 |             "source": [
227 |                 "print(prompt.compile())"
228 |             ]
229 |         },
230 |         {
231 |             "cell_type": "code",
232 |             "execution_count": null,
233 |             "metadata": {},
234 |             "outputs": [],
235 |             "source": []
236 |         }
237 |     ],
238 |     "metadata": {
239 |         "kernelspec": {
240 |             "display_name": "base",
241 |             "language": "python",
242 |             "name": "python3"
243 |         },
244 |         "language_info": {
245 |             "codemirror_mode": {
246 |                 "name": "ipython",
247 |                 "version": 3
248 |             },
249 |             "file_extension": ".py",
250 |             "mimetype": "text/x-python",
251 |             "name": "python",
252 |             "nbconvert_exporter": "python",
253 |             "pygments_lexer": "ipython3",
254 |             "version": "3.10.9"
255 |         }
256 |     },
257 |     "nbformat": 4,
258 |     "nbformat_minor": 2
259 | }
260 | 


--------------------------------------------------------------------------------
/quality_prompts/prompt.py:
--------------------------------------------------------------------------------
  1 | from pydantic import BaseModel
  2 | import warnings
  3 | from typing import List
  4 | import json
  5 | 
  6 | from .exemplars import ExemplarStore, Exemplar
  7 | from .utils.llm import llm_call, llm_call_multiple_choices, get_embedding
  8 | from .utils.prompting_techniques_system_prompts import *
  9 | from .utils.prompt_postprocessing import *
 10 | 
 11 | 
 12 | class QualityPrompt(BaseModel):
 13 |     directive: str  # Core intent of the prompt
 14 |     output_formatting: str = ""
 15 |     additional_information: str = ""
 16 |     style_instructions: str = ""
 17 |     role_instructions: str = ""
 18 |     emotion_instructions: str = ""
 19 |     exemplar_store: ExemplarStore = ExemplarStore(exemplars=[])
 20 |     few_shot_examples: List[Exemplar] = []
 21 | 
 22 |     def compile(self):
 23 |         formatted_examples = "\n".join(
 24 |             [
 25 |                 f"Example input: {e.input}\nExample output: {e.label}\n"
 26 |                 for e in self.few_shot_examples
 27 |             ]
 28 |         )
 29 |         compiled_prompt = f"""{self.directive}
 30 |         {self.additional_information}
 31 |         {formatted_examples}
 32 |         {self.output_formatting}
 33 |         """
 34 |         return remove_extra_chars(compiled_prompt)
 35 | 
 36 |     def few_shot(self, input_text, n_shots=3, prioritise_complex_exemplars=False):
 37 |         if len(self.exemplar_store.exemplars) > n_shots:
 38 |             self.few_shot_examples = (
 39 |                 self.exemplar_store.get_similar_exemplars_to_test_sample(
 40 |                     input_text=input_text,
 41 |                     k=n_shots,
 42 |                     prioritise_complex_exemplars=prioritise_complex_exemplars,
 43 |                 )
 44 |             )
 45 |         else:
 46 |             self.few_shot_examples = self.exemplar_store.exemplars
 47 | 
 48 |     # ZERO-SHOT PROMPTING TECHNIQUES
 49 |     def system2attention(self, input_text):
 50 |         """
 51 |         Makes an LLM rewrite the prompt by removing any info unrelated to the user's question.
 52 |         https://arxiv.org/abs/2311.11829
 53 |         """
 54 |         messages = System2AttentionSystemPrompt(
 55 |             additional_information=self.additional_information, input_text=input_text
 56 |         ).messages
 57 |         self.additional_information = llm_call(messages=messages)
 58 | 
 59 |     def sim_to_M(self, input_text):
 60 |         """
 61 |         Establishes the known facts
 62 |         https://arxiv.org/abs/2311.10227
 63 |         """
 64 |         messages = SimtoMCharacterExtractionSystemPrompt(input_text=input_text).messages
 65 |         character_name = llm_call(messages=messages)
 66 | 
 67 |         messages = SimtoMSystemPrompt(
 68 |             additional_information=self.additional_information,
 69 |             character_name=character_name,
 70 |         ).messages
 71 |         self.additional_information = llm_call(messages=messages)
 72 | 
 73 |     def rephrase_and_respond(self, input_text, perform_in="same_pass"):
 74 |         """
 75 |         http://arxiv.org/abs/2311.04205
 76 |         """
 77 |         RaR_instruction = "Rephrase and expand the question, and respond."
 78 |         if perform_in == "same_shot":
 79 |             input_text += RaR_instruction
 80 |         elif perform_in == "separate_llm_call":
 81 |             messages = [
 82 |                 {"role": "system", "content": RaR_instruction},
 83 |                 {"role": "user", "content": input_text},
 84 |             ]
 85 |             input_text += llm_call(messages=messages)
 86 | 
 87 |     def rereading(self, input_text):
 88 |         """
 89 |         http://arxiv.org/abs/2309.06275
 90 |         """
 91 |         input_text += "Read the question again:" + input_text
 92 | 
 93 |     def self_ask(self, input_text, allow_search_engine=False):
 94 |         """
 95 |         Prompts the LLM to first ask any follow-up questions if needed
 96 |         http://arxiv.org/abs/2210.03350
 97 |         """
 98 |         messages = SelfAskSystemPrompt(
 99 |             input_text=input_text, additional_information=self.additional_information
100 |         ).messages
101 |         response = llm_call(messages=messages)
102 |         if "FALSE" in response:
103 |             pass
104 |         else:
105 |             follow_up_questions = json.loads(response)
106 |             for follow_up_question in follow_up_questions:
107 |                 if allow_search_engine:
108 |                     pass  # TODO
109 |                 else:
110 |                     messages = [
111 |                         {"role": "system", "content": self.additional_information},
112 |                         {"role": "user", "content": follow_up_question},
113 |                     ]
114 |                     follow_up_question_answer = llm_call(messages=messages)
115 | 
116 |                 self.additional_information += f"""Question: {follow_up_question}
117 |                                                    Answer: {follow_up_question_answer}
118 |                                                 """
119 | 
120 |     # THOUGHT GENERATION
121 |     def chain_of_thought_prompting(self):
122 |         """
123 |         https://arxiv.org/pdf/2201.11903
124 |         """
125 |         chain_of_thought_system_prompt = ChainOfThoughtSystemPrompt().system_prompt
126 |         self.output_formatting = f"""{chain_of_thought_system_prompt}
127 |         {self.output_formatting}"""
128 | 
129 |     # ZERO-SHOT CoT
130 |     def step_back_prompting(self, input_text):
131 |         """
132 |         Prompts the LLM to first generate generic questions about facts/concepts used to answer the question, before answering.
133 |         https://arxiv.org/pdf/2310.06117
134 |         """
135 |         messages = StepBackPromptingSystemPrompt(
136 |             input_text=input_text, additional_information=self.additional_information
137 |         ).messages
138 |         step_back_question = llm_call(messages=messages)
139 | 
140 |         messages = [
141 |             {"role": "system", "content": self.additional_information},
142 |             {"role": "user", "content": step_back_question},
143 |         ]
144 |         step_back_answer = llm_call(messages=messages)
145 | 
146 |         self.additional_information += f"""Question: {step_back_question}
147 |                                             Answer: {step_back_answer}
148 |                                         """
149 | 
150 |     def analogical_prompting(self, input_text):
151 |         """
152 |         Prompts the LLM to generate three distinct questions (along with solutions) with are similar to the user's query, and then finally solve the user's query.
153 |         https://arxiv.org/pdf/2310.01714
154 |         """
155 |         analogical_prompting_system_prompt = AnalogicalPromptingSystemPrompt(
156 |             input_text=input_text, directive=self.directive
157 |         )
158 |         self.directive, self.output_formatting = (
159 |             analogical_prompting_system_prompt.updated_directive,
160 |             analogical_prompting_system_prompt.updated_output_formatting,
161 |         )
162 | 
163 |     def thread_of_thought_prompting(self, input_text):
164 |         """
165 |         Prompts the LLM to first analyse and summarise and additional information / context step by step, before answering.
166 |         https://arxiv.org/pdf/2311.08734
167 |         """
168 |         thread_of_thought_context_summarisation_messages = (
169 |             ThreadOfThoughtPromptingSystemPrompt(
170 |                 additional_information=self.additional_information
171 |             ).context_summarisation_system_prompt
172 |         ).context_summarisation_messages
173 | 
174 |         self.additional_information = llm_call(
175 |             messages=thread_of_thought_context_summarisation_messages
176 |         )
177 | 
178 |     def tabular_chain_of_thought_prompting(self, input_text):
179 |         """
180 |         Prompts the LLM to think step by step and write the step, process and result of each step in a markdown table
181 |         https://arxiv.org/pdf/2305.17812
182 |         """
183 |         tabcot_prompting_system_prompt = TabularChainOfThoughtPrompingSystemPrompt(
184 |             input_text=input_text,
185 |             directive=self.directive,
186 |             output_formatting=self.output_formatting,
187 |         )
188 |         self.directive, self.output_formatting = (
189 |             tabcot_prompting_system_prompt.updated_directive,
190 |             tabcot_prompting_system_prompt.updated_output_formatting,
191 |         )
192 | 
193 |     # FEW-SHOT CoT
194 |     def contrastive_cot_prompting(self, input_text):
195 |         """
196 |         Adds exemplars with both valid and invalid reasoning paths to show the LLM both how to and how not to reason about the problem.
197 |         https://arxiv.org/pdf/2311.09277
198 |         """
199 |         # Select the best matching exemplar
200 |         self.few_shot(input_text=input_text, n_shots=1)
201 |         selected_few_shot_example = self.few_shot_examples[0]
202 | 
203 |         # Generate valid and invalid exemplar pair
204 |         contrastive_cot_system_prompt = ContrastiveCoTSystemPrompt(
205 |             directive=self.directive,
206 |             additional_information=self.additional_information,
207 |             exemplar=selected_few_shot_example,
208 |         )
209 | 
210 |         valid_and_invalid_exemplar_pair_generation_messages = (
211 |             contrastive_cot_system_prompt.valid_and_invalid_exemplar_pair_generation_messages
212 |         )
213 |         self.directive = contrastive_cot_system_prompt.updated_directive
214 | 
215 |         valid_and_invalid_exemplar_pair = llm_call(
216 |             messages=valid_and_invalid_exemplar_pair_generation_messages
217 |         )
218 |         exemplar = Exemplar(
219 |             input=selected_few_shot_example.input,
220 |             label=valid_and_invalid_exemplar_pair,
221 |             input_embedding=selected_few_shot_example.input_embedding,
222 |         )
223 |         self.few_shot_examples = [exemplar]
224 | 
225 |     def uncertainty_routed_cot_prompting(
226 |         self, input_text, n_reasoning_paths=5, temperature=0.4
227 |     ):
228 |         """
229 |         Samples multiple CoT reasoning paths, then selects the majority if it is above a certain threshold (calculated based on validation data). If not, it samples greedily and selects that response
230 |         https://storage.googleapis.com/deepmind-media/gemini/gemini_1_report.pdf
231 |         """
232 |         # Step 1: Generate n reasoning paths using an LLM
233 |         self.chain_of_thought_prompting()
234 |         prompt_with_cot = self.compile()
235 |         messages = [
236 |             {"role": "system", "content": prompt_with_cot},
237 |             {"role": "user", "content": input_text},
238 |         ]
239 |         cot_reasoning_paths = llm_call_multiple_choices(
240 |             messages=messages, n=n_reasoning_paths, temperature=temperature
241 |         )
242 | 
243 |         # Step 2: Do majority voting on these reasoning paths
244 |         search_majority_reasoning_path_messages = (
245 |             SearchMajorityReasoningPathSystemPrompt(
246 |                 directive=self.directive,
247 |                 additional_information=self.additional_information,
248 |                 cot_reasoning_paths=cot_reasoning_paths,
249 |                 exemplars=[],
250 |             ).messages
251 |         )
252 |         majority_reasoning_path = llm_call(
253 |             messages=search_majority_reasoning_path_messages
254 |         )
255 |         exemplar = Exemplar(
256 |             input=input_text,
257 |             label=majority_reasoning_path,
258 |             input_embedding=get_embedding(input_text),
259 |         )
260 |         self.few_shot_examples = [exemplar]
261 | 
262 |     def complexity_based_prompting(
263 |         self, input_text, n_reasoning_paths=5, temperature=0.4, n_exemplars=3
264 |     ):
265 |         """
266 |         First searches the most complex exemplars for use in context.
267 |         Then samples multiple CoT reasoning paths, then selects the majority if it is above a certain threshold (calculated based on validation data). If not, it samples greedily and selects that response
268 |         https://openreview.net/pdf?id=yf1icZHC-l9
269 |         """
270 |         # Step 1: Search complex exemplars
271 |         self.few_shot(
272 |             input_text=input_text,
273 |             n_shots=n_exemplars,
274 |             prioritise_complex_exemplars=True,
275 |         )
276 |         # Step 2: Generate n reasoning paths using an LLM
277 |         self.chain_of_thought_prompting()
278 |         prompt_with_cot = self.compile()
279 |         messages = [
280 |             {"role": "system", "content": prompt_with_cot},
281 |             {"role": "user", "content": input_text},
282 |         ]
283 |         cot_reasoning_paths = llm_call_multiple_choices(
284 |             messages=messages, n=n_reasoning_paths, temperature=temperature
285 |         )
286 | 
287 |         # Step 3: Do majority voting on these reasoning paths
288 |         search_majority_reasoning_path_messages = (
289 |             SearchMajorityReasoningPathSystemPrompt(
290 |                 directive=self.directive,
291 |                 additional_information=self.additional_information,
292 |                 cot_reasoning_paths=cot_reasoning_paths,
293 |                 exemplars=[],
294 |             ).messages
295 |         )
296 |         majority_reasoning_path = llm_call(
297 |             messages=search_majority_reasoning_path_messages
298 |         )
299 |         exemplar = Exemplar(
300 |             input=input_text,
301 |             label=majority_reasoning_path,
302 |             input_embedding=get_embedding(input_text),
303 |         )
304 |         self.few_shot_examples = [exemplar]
305 | 
306 |     def constrained_chain_of_thought_prompting(self, max_words: int = 45):
307 |         """
308 |         Adds length constraints to reasoning steps, as an instruction to the prompt.
309 |         So it's able to maintain both accuracy AND conciseness.
310 |         https://arxiv.org/pdf/2407.19825
311 |         """
312 |         constrained_chain_of_thought_system_prompt = (
313 |             ConstrainedChainOfThoughtSystemPrompt(max_words=max_words).system_prompt
314 |         )
315 |         self.output_formatting = f"""{constrained_chain_of_thought_system_prompt}
316 |         {self.output_formatting}"""
317 | 


--------------------------------------------------------------------------------
/examples/zero_shot_cot_usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "markdown",
  5 |             "metadata": {},
  6 |             "source": [
  7 |                 "<a target=\"_blank\" href=\"https://colab.research.google.com/github/sarthakrastogi/quality-prompts/blob/main/examples/zero_shot_cot_usage.ipynb\">\n",
  8 |                 "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
  9 |                 "</a>"
 10 |             ]
 11 |         },
 12 |         {
 13 |             "cell_type": "code",
 14 |             "execution_count": 1,
 15 |             "metadata": {},
 16 |             "outputs": [],
 17 |             "source": [
 18 |                 "%%capture\n",
 19 |                 "!pip install quality-prompts"
 20 |             ]
 21 |         },
 22 |         {
 23 |             "cell_type": "code",
 24 |             "execution_count": 2,
 25 |             "metadata": {},
 26 |             "outputs": [],
 27 |             "source": [
 28 |                 "import os\n",
 29 |                 "\n",
 30 |                 "from quality_prompts.prompt import QualityPrompt\n",
 31 |                 "from quality_prompts.utils.llm import llm_call"
 32 |             ]
 33 |         },
 34 |         {
 35 |             "cell_type": "code",
 36 |             "execution_count": 3,
 37 |             "metadata": {},
 38 |             "outputs": [],
 39 |             "source": [
 40 |                 "os.environ['OPENAI_API_KEY'] = \"YOUR_API_KEY_HERE\""
 41 |             ]
 42 |         },
 43 |         {
 44 |             "cell_type": "markdown",
 45 |             "metadata": {},
 46 |             "source": [
 47 |                 "# Example 1: Tabular Chain of Thought Prompting"
 48 |             ]
 49 |         },
 50 |         {
 51 |             "cell_type": "markdown",
 52 |             "metadata": {},
 53 |             "source": [
 54 |                 "### Define your prompt's components and initialise it"
 55 |             ]
 56 |         },
 57 |         {
 58 |             "cell_type": "code",
 59 |             "execution_count": 4,
 60 |             "metadata": {},
 61 |             "outputs": [],
 62 |             "source": [
 63 |                 "directive = \"\"\"Solve the given math problem\"\"\"\n",
 64 |                 "prompt = QualityPrompt(directive=directive)"
 65 |             ]
 66 |         },
 67 |         {
 68 |             "cell_type": "code",
 69 |             "execution_count": 5,
 70 |             "metadata": {},
 71 |             "outputs": [],
 72 |             "source": [
 73 |                 "input_text = \"\"\"Jackson is planting tulips. He can fit 6 red tulips in a row and 8 blue\n",
 74 |                 "tulips in a row. If Jackson buys 36 red tulips and 24 blue tulips, how\n",
 75 |                 "many rows of flowers will he plant?\"\"\""
 76 |             ]
 77 |         },
 78 |         {
 79 |             "cell_type": "markdown",
 80 |             "metadata": {},
 81 |             "source": [
 82 |                 "### Apply Tabular Chain of Thought prompting"
 83 |             ]
 84 |         },
 85 |         {
 86 |             "cell_type": "code",
 87 |             "execution_count": 6,
 88 |             "metadata": {},
 89 |             "outputs": [
 90 |                 {
 91 |                     "name": "stdout",
 92 |                     "output_type": "stream",
 93 |                     "text": [
 94 |                         "Solve the given math problem\n",
 95 |                         "        Think through the problem step by step to solve it.\n",
 96 |                         "        At each step, you have to figure out:\n",
 97 |                         "        - the step number,\n",
 98 |                         "        - the sub-question to be answered in that step,\n",
 99 |                         "        - the thought process of solving that step, and\n",
100 |                         "        - the result of solving that step.\n",
101 |                         "    \n",
102 |                         "        \n",
103 |                         "        \n",
104 |                         "        \n",
105 |                         "        Respond in the following markdown table format for each step:\n",
106 |                         "        |step|subquestion|process|result|\n",
107 |                         "    \n",
108 |                         "        \n"
109 |                     ]
110 |                 }
111 |             ],
112 |             "source": [
113 |                 "prompt.tabular_chain_of_thought_prompting(input_text=input_text)\n",
114 |                 "compiled_quality_prompt = prompt.compile()\n",
115 |                 "print(compiled_quality_prompt)"
116 |             ]
117 |         },
118 |         {
119 |             "cell_type": "markdown",
120 |             "metadata": {},
121 |             "source": [
122 |                 "### Test the compiled prompt"
123 |             ]
124 |         },
125 |         {
126 |             "cell_type": "code",
127 |             "execution_count": 7,
128 |             "metadata": {},
129 |             "outputs": [
130 |                 {
131 |                     "name": "stdout",
132 |                     "output_type": "stream",
133 |                     "text": [
134 |                         "|step|subquestion|process|result|\n",
135 |                         "|-|-|-|-|\n",
136 |                         "|1|Calculate the number of rows of red tulips that Jackson can plant.|Divide the total number of red tulips (36) by the number of red tulips in a row (6).|6 rows|\n",
137 |                         "|2|Calculate the number of rows of blue tulips that Jackson can plant.|Divide the total number of blue tulips (24) by the number of blue tulips in a row (8).|3 rows|\n",
138 |                         "|3|Calculate the total number of rows of flowers that Jackson will plant.|Add the rows of red tulips and blue tulips.|6 rows (red) + 3 rows (blue) = 9 rows| \n",
139 |                         "\n",
140 |                         "Therefore, Jackson will plant a total of 9 rows of flowers.\n"
141 |                     ]
142 |                 }
143 |             ],
144 |             "source": [
145 |                 "messages = [{\"role\" : \"system\", \"content\" : compiled_quality_prompt},\n",
146 |                 "            {\"role\" : \"user\", \"content\" : input_text}]\n",
147 |                 "response = llm_call(messages)\n",
148 |                 "print(response)"
149 |             ]
150 |         },
151 |         {
152 |             "cell_type": "markdown",
153 |             "metadata": {},
154 |             "source": [
155 |                 "# Example 2: Step back prompting"
156 |             ]
157 |         },
158 |         {
159 |             "cell_type": "code",
160 |             "execution_count": 8,
161 |             "metadata": {},
162 |             "outputs": [],
163 |             "source": [
164 |                 "directive = \"\"\"Solve the given math problem\"\"\"\n",
165 |                 "prompt = QualityPrompt(\n",
166 |                 "                        directive=directive,\n",
167 |                 "                        additional_information=\"\",\n",
168 |                 "                       )"
169 |             ]
170 |         },
171 |         {
172 |             "cell_type": "code",
173 |             "execution_count": 9,
174 |             "metadata": {},
175 |             "outputs": [],
176 |             "source": [
177 |                 "input_text = \"\"\"What happens to the pressure, P, of an ideal gas if\n",
178 |                 "the temperature is increased by a factor of 2 and the \n",
179 |                 "volume is increased by a factor of 8 ?\"\"\""
180 |             ]
181 |         },
182 |         {
183 |             "cell_type": "markdown",
184 |             "metadata": {},
185 |             "source": [
186 |                 "### Apply Step Back Prompting"
187 |             ]
188 |         },
189 |         {
190 |             "cell_type": "code",
191 |             "execution_count": 10,
192 |             "metadata": {},
193 |             "outputs": [
194 |                 {
195 |                     "name": "stdout",
196 |                     "output_type": "stream",
197 |                     "text": [
198 |                         "Solve the given math problem\n",
199 |                         "        Question: How does the pressure of an ideal gas change when the temperature and volume are altered according to specific factors?\n",
200 |                         "                                            Answer: According to the ideal gas law, the pressure of an ideal gas is directly proportional to its temperature and inversely proportional to its volume. This relationship can be expressed by the formula:\n",
201 |                         "\n",
202 |                         "\\[ PV = nRT \\]\n",
203 |                         "\n",
204 |                         "where:\n",
205 |                         "- \\( P \\) is the pressure of the gas\n",
206 |                         "- \\( V \\) is the volume of the gas\n",
207 |                         "- \\( n \\) is the number of moles of gas\n",
208 |                         "- \\( R \\) is the ideal gas constant\n",
209 |                         "- \\( T \\) is the temperature of the gas in kelvin\n",
210 |                         "\n",
211 |                         "If we alter the temperature and volume of the gas according to specific factors, the pressure will change as follows:\n",
212 |                         "\n",
213 |                         "1. If the volume of the gas is decreased while keeping the temperature constant, the pressure will increase. This is known as Boyle's Law, which states that pressure and volume are inversely proportional when temperature is constant.\n",
214 |                         "\n",
215 |                         "2. If the temperature of the gas is increased while keeping the volume constant, the pressure will also increase. This is known as Charles's Law, which states that pressure and temperature are directly proportional when volume is constant.\n",
216 |                         "\n",
217 |                         "3. If both the temperature and volume of the gas are changed, the overall effect on pressure will depend on the specific changes made. However, in general, increasing temperature and decreasing volume will lead to a greater increase in pressure.\n",
218 |                         "\n",
219 |                         "In summary, the pressure of an ideal gas will change in response to alterations in temperature and volume according to the relationships described by Boyle's Law and Charles's Law.\n",
220 |                         "                                        \n",
221 |                         "        \n",
222 |                         "        \n",
223 |                         "        \n"
224 |                     ]
225 |                 }
226 |             ],
227 |             "source": [
228 |                 "prompt.step_back_prompting(input_text=input_text)\n",
229 |                 "compiled_quality_prompt = prompt.compile()\n",
230 |                 "print(compiled_quality_prompt)"
231 |             ]
232 |         },
233 |         {
234 |             "cell_type": "markdown",
235 |             "metadata": {},
236 |             "source": [
237 |                 "### Test the compiled prompt"
238 |             ]
239 |         },
240 |         {
241 |             "cell_type": "code",
242 |             "execution_count": 11,
243 |             "metadata": {},
244 |             "outputs": [
245 |                 {
246 |                     "name": "stdout",
247 |                     "output_type": "stream",
248 |                     "text": [
249 |                         "To determine the change in pressure of an ideal gas when the temperature is increased by a factor of 2 and the volume is increased by a factor of 8, we can refer to the ideal gas law:\n",
250 |                         "\n",
251 |                         "\\[ PV = nRT \\]\n",
252 |                         "\n",
253 |                         "Given that the initial pressure is \\( P \\), the initial temperature is \\( T \\), and the initial volume is \\( V \\), we can express the initial situation as:\n",
254 |                         "\n",
255 |                         "\\[ PV = nRT \\]\n",
256 |                         "\n",
257 |                         "When the temperature is increased by a factor of 2, the new temperature becomes \\( 2T \\).\n",
258 |                         "\n",
259 |                         "When the volume is increased by a factor of 8, the new volume becomes \\( 8V \\).\n",
260 |                         "\n",
261 |                         "Substitute the new temperature and volume into the ideal gas law equation:\n",
262 |                         "\n",
263 |                         "\\[ P(8V) = nR(2T) \\]\n",
264 |                         "\n",
265 |                         "\\[ 8PV = 2nRT \\]\n",
266 |                         "\n",
267 |                         "Divide both sides by 8:\n",
268 |                         "\n",
269 |                         "\\[ P = \\frac{2nRT}{8V} \\]\n",
270 |                         "\n",
271 |                         "\\[ P = \\frac{nRT}{4V} \\]\n",
272 |                         "\n",
273 |                         "Since the number of moles of gas and the gas constant remain constant, the new pressure can be expressed as:\n",
274 |                         "\n",
275 |                         "\\[ P_{new} = \\frac{P_{initial}}{4} \\]\n",
276 |                         "\n",
277 |                         "Therefore, if the temperature is increased by a factor of 2 and the volume is increased by a factor of 8, the pressure of the ideal gas will decrease to one-fourth of its initial value.\n"
278 |                     ]
279 |                 }
280 |             ],
281 |             "source": [
282 |                 "messages = [{\"role\" : \"system\", \"content\" : compiled_quality_prompt},\n",
283 |                 "            {\"role\" : \"user\", \"content\" : input_text}]\n",
284 |                 "response = llm_call(messages)\n",
285 |                 "print(response)"
286 |             ]
287 |         }
288 |     ],
289 |     "metadata": {
290 |         "kernelspec": {
291 |             "display_name": "base",
292 |             "language": "python",
293 |             "name": "python3"
294 |         },
295 |         "language_info": {
296 |             "codemirror_mode": {
297 |                 "name": "ipython",
298 |                 "version": 3
299 |             },
300 |             "file_extension": ".py",
301 |             "mimetype": "text/x-python",
302 |             "name": "python",
303 |             "nbconvert_exporter": "python",
304 |             "pygments_lexer": "ipython3",
305 |             "version": "3.10.9"
306 |         }
307 |     },
308 |     "nbformat": 4,
309 |     "nbformat_minor": 2
310 | }
311 | 


--------------------------------------------------------------------------------
/examples/few_shot_cot_usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "markdown",
  5 |             "metadata": {},
  6 |             "source": [
  7 |                 "<a target=\"_blank\" href=\"https://colab.research.google.com/github/sarthakrastogi/quality-prompts/blob/main/examples/few_shot_cot_usage.ipynb\">\n",
  8 |                 "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
  9 |                 "</a>"
 10 |             ]
 11 |         },
 12 |         {
 13 |             "cell_type": "code",
 14 |             "execution_count": 1,
 15 |             "metadata": {},
 16 |             "outputs": [],
 17 |             "source": [
 18 |                 "%%capture\n",
 19 |                 "!pip install quality-prompts"
 20 |             ]
 21 |         },
 22 |         {
 23 |             "cell_type": "code",
 24 |             "execution_count": 2,
 25 |             "metadata": {},
 26 |             "outputs": [],
 27 |             "source": [
 28 |                 "import requests\n",
 29 |                 "import json\n",
 30 |                 "import os\n",
 31 |                 "import copy\n",
 32 |                 "\n",
 33 |                 "from quality_prompts.prompt import QualityPrompt\n",
 34 |                 "from quality_prompts.exemplars import ExemplarStore, Exemplar\n",
 35 |                 "from quality_prompts.utils.llm import get_embedding"
 36 |             ]
 37 |         },
 38 |         {
 39 |             "cell_type": "code",
 40 |             "execution_count": 3,
 41 |             "metadata": {},
 42 |             "outputs": [],
 43 |             "source": [
 44 |                 "os.environ['OPENAI_API_KEY'] = \"YOUR_API_KEY_HERE\""
 45 |             ]
 46 |         },
 47 |         {
 48 |             "cell_type": "markdown",
 49 |             "metadata": {},
 50 |             "source": [
 51 |                 "### Initialise sample exemplars for use in few-shot prompt"
 52 |             ]
 53 |         },
 54 |         {
 55 |             "cell_type": "code",
 56 |             "execution_count": 4,
 57 |             "metadata": {},
 58 |             "outputs": [],
 59 |             "source": [
 60 |                 "url = \"https://github.com/sarthakrastogi/quality-prompts/raw/main/examples/math_science_problems_sample_exemplars.json\"\n",
 61 |                 "response = requests.get(url)\n",
 62 |                 "math_science_problems_sample_exemplars = response.json()\n",
 63 |                 "\n",
 64 |                 "exemplars = [Exemplar(input=e['input'], label=str(e['label']), input_embedding=get_embedding(e['input']), complexity_level=e['complexity_level']) for e in math_science_problems_sample_exemplars]\n",
 65 |                 "exemplar_store = ExemplarStore(exemplars=exemplars)"
 66 |             ]
 67 |         },
 68 |         {
 69 |             "cell_type": "markdown",
 70 |             "metadata": {},
 71 |             "source": [
 72 |                 "### Define your prompt's components and initialise it"
 73 |             ]
 74 |         },
 75 |         {
 76 |             "cell_type": "code",
 77 |             "execution_count": 5,
 78 |             "metadata": {},
 79 |             "outputs": [],
 80 |             "source": [
 81 |                 "directive = \"\"\"Solve the given problem step by step.\"\"\"\n",
 82 |                 "\n",
 83 |                 "additional_information = \"\"\n",
 84 |                 "\n",
 85 |                 "output_formatting = \"\"\n",
 86 |                 "\n",
 87 |                 "# Creating three separate prompts for this example\n",
 88 |                 "prompt1 = QualityPrompt(\n",
 89 |                 "                        directive=directive,\n",
 90 |                 "                        additional_information=additional_information,\n",
 91 |                 "                        output_formatting=output_formatting,\n",
 92 |                 "                        exemplar_store=exemplar_store\n",
 93 |                 "                       )\n",
 94 |                 "\n",
 95 |                 "prompt2 = copy.deepcopy(prompt1)\n",
 96 |                 "prompt3 = copy.deepcopy(prompt1)"
 97 |             ]
 98 |         },
 99 |         {
100 |             "cell_type": "code",
101 |             "execution_count": 6,
102 |             "metadata": {},
103 |             "outputs": [],
104 |             "source": [
105 |                 "phys_problem = \"A car accelerates from rest at a constant rate of \\( 3 \\, \\text{m/s}^2 \\). Calculate the distance it covers in 10 seconds.\"\n",
106 |                 "\n",
107 |                 "chem_problem = \"Calculate the pH of a 0.05 M solution of acetic acid (CH3COOH), given that the \\( K_a \\) of acetic acid is \\( 1.8 \\times 10^{-5} \\).\""
108 |             ]
109 |         },
110 |         {
111 |             "cell_type": "markdown",
112 |             "metadata": {},
113 |             "source": [
114 |                 "### Contrastive CoT prompting\n",
115 |                 "\n",
116 |                 "Adds exemplars with both correct and incorrect thoughts to show both how to and how not to think.\n",
117 |                 "\n",
118 |                 "https://arxiv.org/pdf/2311.09277"
119 |             ]
120 |         },
121 |         {
122 |             "cell_type": "code",
123 |             "execution_count": 7,
124 |             "metadata": {},
125 |             "outputs": [
126 |                 {
127 |                     "name": "stdout",
128 |                     "output_type": "stream",
129 |                     "text": [
130 |                         "Solve the given problem step by step.\n",
131 |                         "You are given examples of both valid and invalid reasoning for solving the problem. Observe these examples to understand how to and how not to reason about the problem.\n",
132 |                         "\n",
133 |                         "Example input: Determine the pH of a 0.01 M HCl solution.\n",
134 |                         "Example output: Valid Example:\n",
135 |                         "Given that HCl is a strong acid that dissociates completely in water, we can determine the concentration of hydrogen ions \\( [H^+] \\) in the solution to be 0.01 M.\n",
136 |                         "To find the pH of the solution, we use the formula \\( \\text{pH} = -\\log[H^+] \\).\n",
137 |                         "Substitute the value of \\( [H^+] \\):\n",
138 |                         "\\( \\text{pH} = -\\log(0.01) = -\\log(10^{-2}) = 2 \\)\n",
139 |                         "Therefore, the pH of the 0.01 M HCl solution is 2.\n",
140 |                         "\n",
141 |                         "Invalid Example:\n",
142 |                         "Since HCl is a strong acid, it means that the concentration of the resulting hydrogen ions is 1 M.\n",
143 |                         "Calculating the pH using the formula \\( \\text{pH} = -\\log[H^+] \\), we get:\n",
144 |                         "\\( \\text{pH} = -\\log(1) = -0 = 0 \\)\n",
145 |                         "Therefore, the pH of the 0.01 M HCl solution is 0.\n",
146 |                         "This is incorrect because the concentration of hydrogen ions in the solution is not 1 M; it is 0.01 M as given. The incorrect reasoning led to an invalid pH calculation.\n",
147 |                         "\n",
148 |                         "\n"
149 |                     ]
150 |                 }
151 |             ],
152 |             "source": [
153 |                 "prompt1.contrastive_cot_prompting(input_text=chem_problem)\n",
154 |                 "print(prompt1.compile())"
155 |             ]
156 |         },
157 |         {
158 |             "cell_type": "markdown",
159 |             "metadata": {},
160 |             "source": [
161 |                 "### Uncertainty routed CoT\n",
162 |                 "Samples multiple CoT reasoning paths, then selects the majority if it is above a certain threshold (calculated based on validation data). If not, it samples greedily and selects that response\n",
163 |                 "\n",
164 |                 "https://storage.googleapis.com/deepmind-media/gemini/gemini_1_report.pdf"
165 |             ]
166 |         },
167 |         {
168 |             "cell_type": "code",
169 |             "execution_count": 8,
170 |             "metadata": {},
171 |             "outputs": [
172 |                 {
173 |                     "name": "stdout",
174 |                     "output_type": "stream",
175 |                     "text": [
176 |                         "Solve the given problem step by step.\n",
177 |                         "\n",
178 |                         "Example input: Calculate the pH of a 0.05 M solution of acetic acid (CH3COOH), given that the \\( K_a \\) of acetic acid is \\( 1.8 \times 10^{-5} \\).\n",
179 |                         "Example output: After analyzing all the reasoning paths provided, it is clear that the majority of paths (3 out of 4 paths) follow the same logical steps to determine the pH of a 0.05 M solution of acetic acid. The winning reasoning path, which is used by the majority of LLMs, is Reasoning path 3. \n",
180 |                         "\n",
181 |                         "In Reasoning path 3:\n",
182 |                         "1. The path starts by writing the dissociation equation of acetic acid and the equilibrium expression for the dissociation reaction.\n",
183 |                         "2. The assumption that the concentrations of \\(CH_3COO^-\\) and \\(H^+\\) at equilibrium are considered to be equal leads to the setup of the equilibrium expression with the given \\(K_a\\) value.\n",
184 |                         "3. By simplifying the equation based on the assumption that \\(x\\) is much smaller than 0.05, the path calculates the concentration of \\(H^+\\) ions (\\(x\\)) at equilibrium.\n",
185 |                         "4. This concentration is then used to calculate the pH of the solution through the formula pH = -log[H^+], resulting in a pH value of 3.52 for the 0.05 M acetic acid solution.\n",
186 |                         "\n",
187 |                         "Therefore, the winning reasoning path provides a systematic and accurate approach to solving the problem step by step, taking into account the principles of weak acid dissociation and equilibrium chemistry.\n",
188 |                         "\n",
189 |                         "Let's work this out it a step by step to be sure we have the right answer.\n",
190 |                         "\n",
191 |                         "\n"
192 |                     ]
193 |                 }
194 |             ],
195 |             "source": [
196 |                 "prompt2.uncertainty_routed_cot_prompting(\n",
197 |                 "                                        input_text=chem_problem,\n",
198 |                 "                                        n_reasoning_paths=4,\n",
199 |                 "                                        temperature=0.4\n",
200 |                 "                                        )\n",
201 |                 "print(prompt2.compile())"
202 |             ]
203 |         },
204 |         {
205 |             "cell_type": "markdown",
206 |             "metadata": {},
207 |             "source": [
208 |                 "### Complexity based prompting\n",
209 |                 "\n",
210 |                 "First searches the most complex exemplars for use in context.\n",
211 |                 "Then samples multiple CoT reasoning paths, then selects the majority if it is above a certain threshold (calculated based on validation data). If not, it samples greedily and selects that response\n",
212 |                 "\n",
213 |                 "https://openreview.net/pdf?id=yf1icZHC-l9\n",
214 |                 "        "
215 |             ]
216 |         },
217 |         {
218 |             "cell_type": "code",
219 |             "execution_count": 9,
220 |             "metadata": {},
221 |             "outputs": [
222 |                 {
223 |                     "name": "stdout",
224 |                     "output_type": "stream",
225 |                     "text": [
226 |                         "Solve the given problem step by step.\n",
227 |                         "\n",
228 |                         "Example input: Calculate the pH of a 0.05 M solution of acetic acid (CH3COOH), given that the \\( K_a \\) of acetic acid is \\( 1.8 \times 10^{-5} \\).\n",
229 |                         "Example output: Based on the reasoning paths provided, majority voting indicates that Reasoning path 2 is the most commonly used approach in solving the problem step by step. We will now explain the winning reasoning path in detail:\n",
230 |                         "\n",
231 |                         "### Winning Reasoning Path (Reasoning path 2):\n",
232 |                         "**1. Concept Introduction:**\n",
233 |                         "- The reasoning path begins by introducing the concept of weak acid dissociation and the equilibrium constant expression for the dissociation reaction.\n",
234 |                         "\n",
235 |                         "**2. Equilibrium Reaction:**\n",
236 |                         "- It correctly writes the equilibrium reaction for the dissociation of acetic acid and presents the equilibrium constant expression.\n",
237 |                         "\n",
238 |                         "**3. Equilibrium Concentrations at Equilibrium:**\n",
239 |                         "- It correctly determines the concentrations of acetic acid, acetate ions, and hydrogen ions at equilibrium based on the assumption that the dissociation of acetic acid is minimal compared to its initial concentration.\n",
240 |                         "\n",
241 |                         "**4. Equilibrium Constant Equation:**\n",
242 |                         "- It substitutes the equilibrium concentrations into the equilibrium constant expression and sets up the equation to solve for the concentration of hydrogen ions (\\( H^+ \\)).\n",
243 |                         "\n",
244 |                         "**5. Concentration Calculation:**\n",
245 |                         "- It simplifies the equation by approximating \\( 0.05 - x \\) to be approximately 0.05 and solves for \\( x \\) to find the concentration of \\( H^+ \\) ions accurately.\n",
246 |                         "\n",
247 |                         "**6. pH Calculation:**\n",
248 |                         "- It accurately calculates the pH of the solution by applying the formula \\( pH = -\\log[H^+] \\) and simplifying the logarithmic calculations step by step to determine the final pH value.\n",
249 |                         "\n",
250 |                         "**7. Conclusion:**\n",
251 |                         "- It concludes by providing the final pH value for a 0.05 M solution of acetic acid, which is approximately 3.52, based on the calculated concentration of \\( H^+ \\) ions.\n",
252 |                         "\n",
253 |                         "In summary, Reasoning path 2 provides a clear and accurate step-by-step approach to calculating the pH of a weak acid solution like acetic acid, emphasizing the correct equilibrium concentrations, simplifications, and mathematical calculations involved in determining the pH value.\n",
254 |                         "\n",
255 |                         "Let's work this out it a step by step to be sure we have the right answer.\n",
256 |                         "\n",
257 |                         "\n"
258 |                     ]
259 |                 }
260 |             ],
261 |             "source": [
262 |                 "prompt3.complexity_based_prompting(\n",
263 |                 "                                    input_text=chem_problem,\n",
264 |                 "                                    n_reasoning_paths=4,\n",
265 |                 "                                    temperature=0.4,\n",
266 |                 "                                    n_exemplars=3\n",
267 |                 "                                )\n",
268 |                 "print(prompt3.compile())"
269 |             ]
270 |         }
271 |     ],
272 |     "metadata": {
273 |         "kernelspec": {
274 |             "display_name": "base",
275 |             "language": "python",
276 |             "name": "python3"
277 |         },
278 |         "language_info": {
279 |             "codemirror_mode": {
280 |                 "name": "ipython",
281 |                 "version": 3
282 |             },
283 |             "file_extension": ".py",
284 |             "mimetype": "text/x-python",
285 |             "name": "python",
286 |             "nbconvert_exporter": "python",
287 |             "pygments_lexer": "ipython3",
288 |             "version": "3.10.9"
289 |         }
290 |     },
291 |     "nbformat": 4,
292 |     "nbformat_minor": 2
293 | }
294 | 


--------------------------------------------------------------------------------