├── .github
    └── workflows
    │   └── code-reviewer.yaml
├── README.md
├── anthropic-citations
    ├── app.py
    └── data
    │   ├── info.txt
    │   └── relianceEarning.pdf
├── billion-persona
    ├── conversation-simulation.py
    ├── persona.jsonl
    └── prompt_template.py
├── code-analyzer-reflection
    ├── Readme.md
    └── app.py
├── code-test-analyzer
    ├── app.py
    ├── code_executor.py
    └── readme.md
├── diagram-of-thoughts
    ├── app.py
    └── readme.md
├── evals
    └── moverscore.py
├── gatr
    ├── app.py
    ├── readme.md
    └── requirements.txt
├── multi-stream-processor
    ├── app.py
    └── requirements.txt
├── nexusflow
    ├── app.py
    └── readme.md
├── planner
    ├── Readme.md
    └── heterogenous-planner.py
├── python-code-execution-agent
    ├── Readme.md
    └── app,py
├── query-optimizer
    ├── config.py
    ├── data_loader.py
    ├── embedding_generator.py
    ├── evaluator.py
    ├── main.py
    ├── prompt_optimizer.py
    ├── readme.md
    ├── requirements.txt
    ├── response_generator.py
    ├── similarity_search.py
    └── utils.py
├── reasoning-mode
    ├── app.py
    └── requirements.txt
├── requirements.txt
├── reverse-reasoning
    ├── Readme.md
    └── app.py
├── sandbox-rag
    ├── Readme.md
    ├── assets
    │   ├── book
    │   └── situationalawareness.docx
    ├── chunking.py
    ├── rag-system.py
    ├── requirements.txt
    └── streamlit-visualizer.py
├── self-correction
    ├── Readme.md
    └── app.py
└── self-taught-reasoners
    ├── Readme.md
    └── app.py


/.github/workflows/code-reviewer.yaml:
--------------------------------------------------------------------------------
  1 | name: AI CODE Review
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     types: [opened, synchronize]
  6 | 
  7 | jobs:
  8 |   analyze:
  9 |     runs-on: ubuntu-latest
 10 |     permissions:
 11 |       contents: read
 12 |       pull-requests: write
 13 | 
 14 |     steps:
 15 |     - name: Checkout repository
 16 |       uses: actions/checkout@v3
 17 |       with:
 18 |         fetch-depth: 0
 19 | 
 20 |     - name: Get changed files
 21 |       id: changed-files
 22 |       run: |
 23 |         git diff --name-only origin/${{ github.base_ref }} origin/${{ github.head_ref }} > changed_files.txt
 24 | 
 25 |     - name: Setup Python
 26 |       uses: actions/setup-python@v3
 27 |       with:
 28 |         python-version: '3.x'
 29 | 
 30 |     - name: Install dependencies
 31 |       run: |
 32 |         python -m pip install --upgrade pip
 33 |         pip install openai
 34 | 
 35 |     - name: Analyze code changes
 36 |       env:
 37 |         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
 38 |       run: |
 39 |         python - <<EOF
 40 |         import os
 41 |         import openai
 42 |         from openai import OpenAI
 43 |         import json
 44 | 
 45 |         client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
 46 | 
 47 |         def analyze_code(file_content):
 48 |             try:
 49 |                 response = client.chat.completions.create(
 50 |                     model="gpt-4o-mini",
 51 |                     messages=[
 52 |                         {"role": "system", "content": """
 53 |                         You are a code review assistant. Analyze the code for:
 54 |                         - Potential bugs
 55 |                         - Security issues
 56 |                         - Performance improvements
 57 |                         - Code style and best practices
 58 |                         - Possible optimizations
 59 |                         
 60 |                         Provide specific, actionable feedback.
 61 |                         """},
 62 |                         {"role": "user", "content": f"Please review this code:\n\n{file_content}"}
 63 |                     ],
 64 |                     max_tokens=1000
 65 |                 )
 66 |                 return response.choices[0].message.content
 67 |             except Exception as e:
 68 |                 return f"Error analyzing code: {str(e)}"
 69 | 
 70 |         def create_review_comment(filename, content):
 71 |             review = f"## Code Analysis for {filename}\n\n{content}\n\n---\n"
 72 |             return review
 73 | 
 74 |         with open('changed_files.txt', 'r') as f:
 75 |             changed_files = f.read().splitlines()
 76 | 
 77 |         full_review = ""
 78 |         
 79 |         for file in changed_files:
 80 |             if file.endswith(('.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.cs')):
 81 |                 try:
 82 |                     with open(file, 'r') as f:
 83 |                         content = f.read()
 84 |                         analysis = analyze_code(content)
 85 |                         full_review += create_review_comment(file, analysis)
 86 |                 except Exception as e:
 87 |                     full_review += f"\nError reading {file}: {str(e)}\n"
 88 | 
 89 |         # Write the review to a file
 90 |         with open('code_review.md', 'w') as f:
 91 |             f.write(full_review)
 92 |         EOF
 93 | 
 94 |     - name: Comment on PR
 95 |       uses: actions/github-script@v6
 96 |       with:
 97 |         script: |
 98 |           const fs = require('fs');
 99 |           const review = fs.readFileSync('code_review.md', 'utf8');
100 |           
101 |           await github.rest.issues.createComment({
102 |             owner: context.repo.owner,
103 |             repo: context.repo.repo,
104 |             issue_number: context.issue.number,
105 |             body: review
106 |           });
107 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AdvancedRAGTechniques
2 | This repository contains various RAG patterns implemented from scratch
3 | 
4 | 
5 | 
6 |         
7 | 


--------------------------------------------------------------------------------
/anthropic-citations/app.py:
--------------------------------------------------------------------------------
  1 | import anthropic
  2 | import os
  3 | from dotenv import load_dotenv
  4 | import base64
  5 | import pypdf
  6 | from rich.console import Console
  7 | from rich.panel import Panel
  8 | from rich.text import Text
  9 | from rich.table import Table
 10 | 
 11 | load_dotenv()
 12 | client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
 13 | def pdf_to_markdown(pdf_path):
 14 | 
 15 |     # Initialize PDF reader
 16 |     pdf_text = ""
 17 | 
 18 |     try:
 19 |         # Open and read PDF
 20 |         with open(pdf_path, "rb") as pdf_file:
 21 |             pdf_reader = pypdf.PdfReader(pdf_file)
 22 |             for page in pdf_reader.pages:
 23 |                 pdf_text += page.extract_text() + "\n\n"
 24 |         markdown_text = pdf_text
 25 |         paragraphs = markdown_text.split("\n\n")
 26 |         formatted_paragraphs = []
 27 | 
 28 |         for para in paragraphs:
 29 |             para = " ".join(para.split())
 30 |             if para.isupper() and len(para) < 100:
 31 |                 formatted_paragraphs.append(f"## {para}")
 32 |             else:
 33 |                 formatted_paragraphs.append(para)
 34 | 
 35 |         return "\n\n".join(formatted_paragraphs)
 36 | 
 37 |     except Exception as e:
 38 |         return f"Error converting PDF: {str(e)}"
 39 |     
 40 | def display_claude_message(message):
 41 |     """
 42 |     Displays Claude's message output in a beautifully formatted way using rich library.
 43 |     
 44 |     Args:
 45 |         message: The message object from Claude's response
 46 |     """
 47 |     console = Console()
 48 |     
 49 |     # Create main panel for message info
 50 |     message_info = Text()
 51 |     message_info.append(f"Message ID: ", style="bold cyan")
 52 |     message_info.append(f"{message.id}\n\n", style="cyan")
 53 |     message_info.append(f"Model: ", style="bold cyan")
 54 |     message_info.append(f"{message.model}\n", style="cyan")
 55 |     
 56 |     # Create table for content blocks
 57 |     table = Table(show_header=True, header_style="bold magenta", show_lines=True)
 58 |     table.add_column("Block Type", style="cyan")
 59 |     table.add_column("Content", style="white", overflow="fold")
 60 |     table.add_column("Citations", style="green")
 61 |     
 62 |     # Process each content block
 63 |     for block in message.content:
 64 |         # Get citations text if they exist
 65 |         citations = ""
 66 |         if block.citations:
 67 |             citations = "\n".join([
 68 |                 f"Source: {cit.document_title}\n"
 69 |                 f"Text: '{cit.cited_text}'"
 70 |                 for cit in block.citations
 71 |             ])
 72 |         
 73 |         table.add_row(
 74 |             block.type,
 75 |             block.text,
 76 |             citations
 77 |         )
 78 |     
 79 |     # Create usage statistics panel
 80 |     usage_info = Text()
 81 |     usage_info.append(f"Input tokens: ", style="bold yellow")
 82 |     usage_info.append(f"{message.usage.input_tokens}\n", style="yellow")
 83 |     usage_info.append(f"Output tokens: ", style="bold yellow")
 84 |     usage_info.append(f"{message.usage.output_tokens}\n", style="yellow")
 85 |     
 86 |     # Display everything
 87 |     console.print("\n=== Claude Message Output ===\n", style="bold white on blue")
 88 |     console.print(Panel(message_info, title="Message Information", border_style="blue"))
 89 |     console.print("\n=== Content Blocks ===\n", style="bold white on blue")
 90 |     console.print(table)
 91 |     console.print("\n=== Usage Statistics ===\n", style="bold white on blue")
 92 |     console.print(Panel(usage_info, title="Usage Information", border_style="yellow"))
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 | 
 97 |     markdown_content = pdf_to_markdown("data/relianceEarning.pdf")
 98 |     response = client.messages.create(
 99 |         model="claude-3-5-sonnet-20241022",
100 |         max_tokens=1024,
101 |         messages=[
102 |             {
103 |                 "role": "user",
104 |                 "content": [
105 |                     {
106 |                         "type": "document",
107 |                         "source": {
108 |                             "type": "text",
109 |                             "media_type": "text/plain",
110 |                             "data": markdown_content,
111 |                         },
112 |                         "title": "Reliance Q1 Earning Transcript",
113 |                         "citations": {"enabled": True},
114 |                     },
115 |                     {"type": "text", "text": "Which luxury store brands were mentioned in the meeting?"},
116 |                 ],
117 |             }
118 |         ],
119 |     )
120 | 
121 |     display_claude_message(response)
122 | 


--------------------------------------------------------------------------------
/anthropic-citations/data/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/anthropic-citations/data/relianceEarning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Cenrax/AdvancedRAGTechniques/e84090852bc95dc1f9eeb8854f623cce22e18195/anthropic-citations/data/relianceEarning.pdf


--------------------------------------------------------------------------------
/billion-persona/conversation-simulation.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import random
  5 | from dotenv import load_dotenv
  6 | from openai import OpenAI
  7 | from tqdm import tqdm
  8 | 
  9 | load_dotenv()
 10 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 11 | 
 12 | CONVERSATION_PROMPT = """You are simulating a conversation between two people with different backgrounds and perspectives.
 13 | 
 14 | Person 1: {persona1}
 15 | Person 2: {persona2}
 16 | 
 17 | Topic of discussion: {topic}
 18 | 
 19 | Generate a natural dialogue between these two personas discussing the given topic. The conversation should:
 20 | 1. Reflect each persona's unique background and perspective
 21 | 2. Show realistic interactions and potential agreements/disagreements
 22 | 3. Include 4-5 exchanges between the personas
 23 | 4. Maintain a professional and respectful tone
 24 | 5. Draw upon each persona's expertise and experience
 25 | 
 26 | Format the conversation as:
 27 | Person 1: [dialogue]
 28 | Person 2: [dialogue]
 29 | Person 1: [dialogue]
 30 | etc.
 31 | """
 32 | 
 33 | TOPICS = [
 34 |     "The impact of technology on society",
 35 |     "The future of education",
 36 |     "Work-life balance in modern times",
 37 |     "The role of social media in professional life",
 38 |     "Community development and local initiatives",
 39 |     "Healthcare accessibility and innovation",
 40 |     "Sports in education and personal development",
 41 |     "Environmental sustainability",
 42 |     "Professional development and career growth",
 43 |     "Mental health awareness"
 44 | ]
 45 | 
 46 | def get_response(prompt):
 47 |     completion = client.chat.completions.create(
 48 |         model="gpt-4o-mini",
 49 |         temperature=0.8,
 50 |         messages=[
 51 |             {"role": "system", "content": "You are a helpful assistant skilled in creating realistic dialogues."},
 52 |             {"role": "user", "content": prompt}
 53 |         ]
 54 |     )
 55 |     return completion.choices[0].message.content
 56 | 
 57 | def simulate_conversation(persona1, persona2, topic):
 58 |     """Generate a conversation between two personas on a specific topic."""
 59 |     prompt = CONVERSATION_PROMPT.format(
 60 |         persona1=persona1,
 61 |         persona2=persona2,
 62 |         topic=topic
 63 |     )
 64 |     return get_response(prompt)
 65 | 
 66 | def load_personas(file_path):
 67 |     """Load personas from JSONL file."""
 68 |     personas = []
 69 |     with open(file_path, "r", encoding="utf-8") as f:
 70 |         for line in f:
 71 |             if line.strip():
 72 |                 persona_obj = json.loads(line)
 73 |                 personas.append(persona_obj["persona"])
 74 |     return personas
 75 | 
 76 | def main(args):
 77 |     # Load personas
 78 |     personas = load_personas(args.input_file)
 79 |     print(f"Loaded {len(personas)} personas")
 80 | 
 81 |     print("Testing Testing")
 82 |     
 83 |     # Generate conversation pairs
 84 |     conversation_pairs = []
 85 |     for _ in range(args.num_conversations):
 86 |         # Randomly select two different personas
 87 |         selected_personas = random.sample(personas, 2)
 88 |         # Randomly select a topic
 89 |         topic = random.choice(TOPICS)
 90 |         conversation_pairs.append((selected_personas[0], selected_personas[1], topic))
 91 |     
 92 |     # Generate conversations
 93 |     print(f"Generating {args.num_conversations} conversations...")
 94 |     with open(args.output_file, "w", encoding="utf-8") as out:
 95 |         for persona1, persona2, topic in tqdm(conversation_pairs):
 96 |             conversation = simulate_conversation(persona1, persona2, topic)
 97 |             
 98 |             # Save the result
 99 |             result = {
100 |                 "persona1": persona1,
101 |                 "persona2": persona2,
102 |                 "topic": topic,
103 |                 "conversation": conversation
104 |             }
105 |             out.write(json.dumps(result, ensure_ascii=False) + '\n')
106 |     
107 |     print(f"Successfully generated conversations and saved to {args.output_file}")
108 | 
109 | if __name__ == "__main__":
110 |     parser = argparse.ArgumentParser(description="Generate conversations between different personas.")
111 |     parser.add_argument(
112 |         '--input_file',
113 |         type=str,
114 |         default="synthetic-data/persona.jsonl",
115 |         help='Path to the input JSONL file containing personas.'
116 |     )
117 |     parser.add_argument(
118 |         '--output_file',
119 |         type=str,
120 |         default="generated_conversations.jsonl",
121 |         help='Path to save the generated conversations.'
122 |     )
123 |     parser.add_argument(
124 |         '--num_conversations',
125 |         type=int,
126 |         default=5,
127 |         help='Number of conversations to generate.'
128 |     )
129 |     
130 |     args = parser.parse_args()
131 |     main(args)
132 | 


--------------------------------------------------------------------------------
/code-analyzer-reflection/Readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Reflection-Tuned Code Analyzer
 3 | 
 4 | ## Overview
 5 | 
 6 | The Reflection-Tuned Code Analyzer is an advanced AI-driven tool that leverages an extended reflection prompting technique to provide high-quality code analysis. By mimicking human expert thought processes, this system offers thorough, self-correcting code reviews with confidence-based decision making.
 7 | 
 8 | ## Key Features
 9 | 
10 | 1. **Multi-Stage Analysis**: Utilizes a three-step process of initial analysis, validation, and potential correction, mimicking a human expert's approach to code review.
11 | 
12 | 2. **Confidence-Based Decision Making**: Employs a validator LLM to assign a confidence score after each analysis stage, determining whether the result is satisfactory or requires further improvement.
13 | 
14 | 3. **Self-Correction Mechanism**: When the confidence score is below the threshold, a dedicated corrector LLM improves the initial analysis based on the validator's feedback.
15 | 
16 | 4. **Flexible Application**: While designed for code analysis, this reflection prompting technique can be adapted for various complex tasks requiring deep analysis and self-reflection in AI systems.
17 | 
18 | 
19 | ## Logical Flow Diagram
20 | ```mermaid
21 | flowchart TD
22 |     A[Start] --> B[Input Code Snippet]
23 |     B --> C[Initial Analysis LLM]
24 |     C --> D[Generate Initial Analysis]
25 |     D --> E[Validator LLM]
26 |     E --> F{Confidence >= 90%?}
27 |     F -->|Yes| G[Return Final Analysis]
28 |     F -->|No| H[Corrector LLM]
29 |     H --> I[Generate Improved Analysis]
30 |     I --> J[Validator LLM]
31 |     J --> K{Confidence >= 90%?}
32 |     K -->|Yes| L[Return Improved Analysis]
33 |     K -->|No| M[Return Best Available Analysis]
34 |     G --> N[End]
35 |     L --> N
36 |     M --> N
37 | 
38 | ```
39 | 
40 | ## Usage
41 | 
42 | Here's a basic example of how to use the Reflection-Tuned Code Analyzer:
43 | 
44 | ```python
45 | from code_analyzer import ReflectionTunedAnalyzer
46 | 
47 | analyzer = ReflectionTunedAnalyzer()
48 | 
49 | code_snippet = """
50 | def factorial(n):
51 |     if n == 0:
52 |         return 1
53 |     else:
54 |         return n * factorial(n-1)
55 | 
56 | def main():
57 |     num = input("Enter a number: ")
58 |     result = factorial(num)
59 |     print(f"The factorial of {num} is {result}")
60 | 
61 | if __name__ == "__main__":
62 |     main()
63 | """
64 | 
65 | analysis_result = analyzer.analyze(code_snippet)
66 | print(analysis_result)
67 | ```
68 | 
69 | ## How It Works
70 | 
71 | 1. **Initial Analysis**: An LLM performs the first pass of code analysis.
72 | 2. **Validation**: A separate validator LLM assesses the analysis and assigns a confidence score.
73 | 3. **Correction (if needed)**: If the confidence score is below 90%, a corrector LLM improves the analysis.
74 | 4. **Final Validation**: The improved analysis is re-validated to ensure high-quality output.
75 | 
76 | ## Contributing
77 | 
78 | We welcome contributions to the Reflection-Tuned Code Analyzer! Please read our [CONTRIBUTING.md](CONTRIBUTING.md) for details on our code of conduct and the process for submitting pull requests.
79 | 
80 | ## License
81 | 
82 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details.
83 | 
84 | ## Acknowledgments
85 | 
86 | - This project builds upon the reflection prompting technique, extending it for specialized code analysis.
87 | - Special thanks to the open-source AI community for their ongoing contributions to language model advancements.
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/code-analyzer-reflection/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from openai import OpenAI
  3 | from tenacity import retry, wait_random_exponential, stop_after_attempt
  4 | 
  5 | # Initialize OpenAI client
  6 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
  7 | 
  8 | # Retry decorators for API calls
  9 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 10 | def completion_with_backoff(**kwargs):
 11 |     return client.chat.completions.create(**kwargs)
 12 | 
 13 | REFLECTION_PROMPT_TEMPLATE = """
 14 | You are an AI assistant specialized in code analysis. Your task is to analyze the given code for potential bugs, inefficiencies, and suggest improvements. Follow this exact format:
 15 | 
 16 | <thinking>
 17 | Begin by outlining your approach to analyzing the code. Break down your process into steps.
 18 | </thinking>
 19 | 
 20 | <reflection>
 21 | If you need to clarify or correct any part of your initial thinking, do so here. This step is optional if no correction is needed.
 22 | </reflection>
 23 | 
 24 | <thinking>
 25 | Continue your analysis, incorporating any reflections or corrections. Identify potential issues and areas for improvement.
 26 | </thinking>
 27 | 
 28 | <output>
 29 | Provide your final analysis, including:
 30 | 1. Identified bugs or potential issues
 31 | 2. Suggestions for improvements
 32 | 3. A brief explanation of the changes and their benefits
 33 | </output>
 34 | 
 35 | Now, please analyze the following code:
 36 | {code_snippet}
 37 | 
 38 | Remember to use the exact format with <thinking>, <reflection> (if needed), and <output> tags as shown above.
 39 | """
 40 | 
 41 | VALIDATOR_PROMPT = """
 42 | You are a validator AI specialized in code review. Your job is to check if the given code analysis is thorough, accurate, and helpful. Provide a confidence score between 0 and 100.
 43 | 
 44 | Original code snippet:
 45 | {code_snippet}
 46 | 
 47 | Generated analysis:
 48 | {generated_analysis}
 49 | 
 50 | Please evaluate the analysis and provide:
 51 | 1. A brief explanation of whether the analysis is thorough, accurate, and helpful.
 52 | 2. A confidence score between 0 and 100.
 53 | 
 54 | Your response should be in the format:
 55 | <explanation>
 56 | Your explanation here
 57 | </explanation>
 58 | <confidence_score>
 59 | Your confidence score here (just the number)
 60 | </confidence_score>
 61 | """
 62 | 
 63 | CORRECTOR_PROMPT = """
 64 | You are a corrector AI specialized in improving code analysis. Your job is to enhance the given analysis to ensure it's thorough, accurate, and helpful.
 65 | 
 66 | Original code snippet:
 67 | {code_snippet}
 68 | 
 69 | Generated analysis:
 70 | {generated_analysis}
 71 | 
 72 | Validator's explanation:
 73 | {validator_explanation}
 74 | 
 75 | Please provide an improved analysis that addresses any issues mentioned by the validator. Your goal is to achieve a confidence score above 90%.
 76 | 
 77 | Use the same format as the original analysis, with <thinking>, <reflection>, and <output> tags.
 78 | """
 79 | 
 80 | def get_reflection_response(code_snippet):
 81 |     try:
 82 |         response = completion_with_backoff(
 83 |             model="gpt-4o-mini",
 84 |             messages=[
 85 |                 {"role": "system", "content": REFLECTION_PROMPT_TEMPLATE},
 86 |                 {"role": "user", "content": code_snippet}
 87 |             ],
 88 |             temperature=0.7,
 89 |             max_tokens=1000
 90 |         )
 91 |         return response.choices[0].message.content
 92 |     except Exception as e:
 93 |         print(f"An error occurred: {e}")
 94 |         return None
 95 | 
 96 | def validate_response(code_snippet, generated_analysis):
 97 |     try:
 98 |         response = completion_with_backoff(
 99 |             model="gpt-4o-mini",
100 |             messages=[
101 |                 {"role": "system", "content": VALIDATOR_PROMPT.format(
102 |                     code_snippet=code_snippet, 
103 |                     generated_analysis=generated_analysis
104 |                 )},
105 |             ],
106 |             temperature=0.3,
107 |             max_tokens=500
108 |         )
109 |         return response.choices[0].message.content
110 |     except Exception as e:
111 |         print(f"An error occurred during validation: {e}")
112 |         return None
113 | 
114 | def correct_response(code_snippet, generated_analysis, validator_explanation):
115 |     try:
116 |         response = completion_with_backoff(
117 |             model="gpt-4o-mini",
118 |             messages=[
119 |                 {"role": "system", "content": CORRECTOR_PROMPT.format(
120 |                     code_snippet=code_snippet,
121 |                     generated_analysis=generated_analysis,
122 |                     validator_explanation=validator_explanation
123 |                 )},
124 |             ],
125 |             temperature=0.7,
126 |             max_tokens=1000
127 |         )
128 |         return response.choices[0].message.content
129 |     except Exception as e:
130 |         print(f"An error occurred during correction: {e}")
131 |         return None
132 | 
133 | def extract_confidence_score(validator_response):
134 |     start = validator_response.find("<confidence_score>") + len("<confidence_score>")
135 |     end = validator_response.find("</confidence_score>")
136 |     return float(validator_response[start:end].strip())
137 | 
138 | def main(code_snippet):
139 |     # Get initial response
140 |     initial_response = get_reflection_response(code_snippet)
141 |     if not initial_response:
142 |         return "Failed to generate initial analysis."
143 | 
144 |     # Validate the response
145 |     validation_result = validate_response(code_snippet, initial_response)
146 |     if not validation_result:
147 |         return "Failed to validate the analysis."
148 | 
149 |     confidence_score = extract_confidence_score(validation_result)
150 | 
151 |     if confidence_score >= 90:
152 |         return f"Final analysis (confidence: {confidence_score}%):\n\n{initial_response}"
153 |     else:
154 |         # Extract explanation from validator's response
155 |         start = validation_result.find("<explanation>") + len("<explanation>")
156 |         end = validation_result.find("</explanation>")
157 |         validator_explanation = validation_result[start:end].strip()
158 | 
159 |         # Correct the response
160 |         corrected_response = correct_response(code_snippet, initial_response, validator_explanation)
161 |         if not corrected_response:
162 |             return "Failed to correct the analysis."
163 | 
164 |         # Validate the corrected response
165 |         final_validation = validate_response(code_snippet, corrected_response)
166 |         if not final_validation:
167 |             return "Failed to validate the corrected analysis."
168 | 
169 |         final_confidence_score = extract_confidence_score(final_validation)
170 | 
171 |         return f"Final analysis (confidence: {final_confidence_score}%):\n\n{corrected_response}"
172 | 
173 | # Example usage
174 | if __name__ == "__main__":
175 |     code_snippet = """
176 |     def factorial(n):
177 |         if n == 0:
178 |             return 1
179 |         else:
180 |             return n * factorial(n-1)
181 | 
182 |     def main():
183 |         num = input("Enter a number: ")
184 |         result = factorial(num)
185 |         print(f"The factorial of {num} is {result}")
186 | 
187 |     if __name__ == "__main__":
188 |         main()
189 |     """
190 |     result = main(code_snippet)
191 |     print(result)
192 | 


--------------------------------------------------------------------------------
/code-test-analyzer/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from dataclasses import dataclass
  3 | from typing import Optional, List, Tuple, Dict
  4 | import time
  5 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  6 | from openai import OpenAI
  7 | from dotenv import load_dotenv
  8 | from code_executor import CodeExecutor
  9 | 
 10 | # Load environment variables and initialize OpenAI client
 11 | load_dotenv()
 12 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 13 | 
 14 | @dataclass
 15 | class TestCase:
 16 |     """Represents a single test case for code evaluation"""
 17 |     input_data: Dict
 18 |     expected_output: str
 19 |     code_template: str
 20 |     
 21 |     def format_test_code(self, solution_code: str) -> str:
 22 |         """Formats the complete test code including the solution"""
 23 |         return f"""
 24 | {solution_code}
 25 | 
 26 | # Test case execution
 27 | def run_test():
 28 |     input_data = {repr(self.input_data)}
 29 |     expected = {repr(self.expected_output)}
 30 |     
 31 | {self.code_template}
 32 | 
 33 | if __name__ == "__main__":
 34 |     run_test()
 35 | """
 36 | 
 37 | @dataclass
 38 | class CodeSolution:
 39 |     """Class to store code solutions and their metadata"""
 40 |     code: str
 41 |     is_correct: bool = False
 42 |     execution_time: float = float('inf')
 43 |     test_results: Optional[str] = None
 44 |     error_message: Optional[str] = None
 45 | 
 46 | class TestEnvironment:
 47 |     """Test environment that uses CodeExecutor for secure code execution"""
 48 |     
 49 |     def __init__(self, timeout: int = 30):
 50 |         self.executor = CodeExecutor(timeout=timeout)
 51 |         # Define test cases for the problem
 52 |         self.test_cases = [
 53 |             TestCase(
 54 |                 input_data={"n": 5},
 55 |                 expected_output="5",
 56 |                 code_template="""
 57 |     # Execute test
 58 |     result = fibonacci(input_data["n"])
 59 |     assert str(result) == expected, f"Test failed: got {result}, expected {expected}"
 60 |     print(f"Test passed! Input: {input_data}, Output: {result}")
 61 | """
 62 |             ),
 63 |             TestCase(
 64 |                 input_data={"n": 10},
 65 |                 expected_output="55",
 66 |                 code_template="""
 67 |     # Execute test
 68 |     result = fibonacci(input_data["n"])
 69 |     assert str(result) == expected, f"Test failed: got {result}, expected {expected}"
 70 |     print(f"Test passed! Input: {input_data}, Output: {result}")
 71 | """
 72 |             )
 73 |         ]
 74 |     
 75 |     def execute_tests(self, code: str) -> Tuple[bool, str, List[float]]:
 76 |         """Execute test cases using the CodeExecutor"""
 77 |         all_passed = True
 78 |         feedback = []
 79 |         execution_times = []
 80 |         
 81 |         for i, test_case in enumerate(self.test_cases, 1):
 82 |             # Format the complete test code
 83 |             test_code = test_case.format_test_code(code)
 84 |             
 85 |             # Measure execution time
 86 |             start_time = time.time()
 87 |             output, error = self.executor.execute(test_code, install_libraries=True)
 88 |             execution_time = time.time() - start_time
 89 |             
 90 |             execution_times.append(execution_time)
 91 |             
 92 |             if error:
 93 |                 all_passed = False
 94 |                 feedback.append(f"Test {i} failed with error:\n{error}")
 95 |             elif "Test passed!" not in output:
 96 |                 all_passed = False
 97 |                 feedback.append(f"Test {i} failed: Unexpected output\n{output}")
 98 |             else:
 99 |                 feedback.append(f"Test {i} passed in {execution_time:.3f} seconds")
100 |         
101 |         return all_passed, "\n".join(feedback), execution_times
102 | 
103 | class CodeGenerator:
104 |     def __init__(self, model_name="gpt-4"):
105 |         self.model_name = model_name
106 |         
107 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
108 |     def completion_with_backoff(self, messages: List[dict]) -> str:
109 |         """Make an OpenAI API call with exponential backoff retry"""
110 |         response = client.chat.completions.create(
111 |             model=self.model_name,
112 |             messages=messages,
113 |             temperature=0
114 |         )
115 |         return response.choices[0].message.content
116 | 
117 |     def generate_initial_solution(self, problem_description: str) -> str:
118 |         """Generate initial solution based on problem description"""
119 |         prompt = f"""
120 |         Generate a Python solution for the following problem:
121 |         {problem_description}
122 |         
123 |         Important requirements:
124 |         1. The solution should be a complete, self-contained function
125 |         2. Include any necessary import statements
126 |         3. Use efficient algorithms and data structures
127 |         4. Include brief comments explaining key parts of the code
128 |         
129 |         Provide only the code without any explanation.
130 |         """
131 |         messages = [{"role": "user", "content": prompt}]
132 |         return self.completion_with_backoff(messages)
133 | 
134 |     def reflect_and_refine(self, problem_description: str, current_code: str, 
135 |                           feedback: str) -> str:
136 |         """Generate refined solution based on feedback"""
137 |         prompt = f"""
138 |         Problem: {problem_description}
139 |         
140 |         Current implementation:
141 |         {current_code}
142 |         
143 |         Test feedback:
144 |         {feedback}
145 |         
146 |         Please analyze the test feedback and generate an improved version of the code.
147 |         Focus on:
148 |         1. Fixing any identified errors or failures
149 |         2. Maintaining proper function signatures
150 |         3. Ensuring correct handling of edge cases
151 |         4. Following Python best practices
152 |         
153 |         Provide only the code without any explanation.
154 |         """
155 |         messages = [{"role": "user", "content": prompt}]
156 |         return self.completion_with_backoff(messages)
157 | 
158 |     def optimize_for_performance(self, problem_description: str, correct_code: str, 
159 |                                performance_feedback: str) -> str:
160 |         """Optimize correct solution for better performance"""
161 |         prompt = f"""
162 |         Problem: {problem_description}
163 |         
164 |         Current correct implementation:
165 |         {correct_code}
166 |         
167 |         Performance feedback:
168 |         {performance_feedback}
169 |         
170 |         Please optimize this code for better performance while maintaining correctness.
171 |         Focus on:
172 |         1. Algorithmic improvements
173 |         2. Data structure optimization
174 |         3. Removing unnecessary operations
175 |         4. Using built-in functions where applicable
176 |         
177 |         Provide only the code without any explanation.
178 |         """
179 |         messages = [{"role": "user", "content": prompt}]
180 |         return self.completion_with_backoff(messages)
181 | 
182 | class CodeGenerationPipeline:
183 |     def __init__(self, model_name="gpt-4", timeout: int = 30):
184 |         self.code_generator = CodeGenerator(model_name)
185 |         self.test_environment = TestEnvironment(timeout)
186 |         
187 |     def run(self, problem_description: str, max_iterations: int = 5) -> CodeSolution:
188 |         """Execute the complete code generation and optimization pipeline"""
189 |         
190 |         # Phase 1: Generate correct solution
191 |         current_solution = CodeSolution(
192 |             code=self.code_generator.generate_initial_solution(problem_description)
193 |         )
194 |         
195 |         # Iterate until solution is correct or max iterations reached
196 |         for iteration in range(max_iterations):
197 |             print(f"\nIteration {iteration + 1}/{max_iterations}")
198 |             
199 |             # Test current solution
200 |             is_correct, feedback, test_times = self.test_environment.execute_tests(
201 |                 current_solution.code
202 |             )
203 |             
204 |             print("Test Results:")
205 |             print(feedback)
206 |             
207 |             if is_correct:
208 |                 current_solution.is_correct = True
209 |                 current_solution.test_results = feedback
210 |                 current_solution.execution_time = max(test_times)
211 |                 print("Found correct solution!")
212 |                 break
213 |             
214 |             if iteration < max_iterations - 1:
215 |                 print("Generating refined solution...")
216 |                 # Generate refined solution based on feedback
217 |                 refined_code = self.code_generator.reflect_and_refine(
218 |                     problem_description, current_solution.code, feedback
219 |                 )
220 |                 current_solution.code = refined_code
221 |         
222 |         if not current_solution.is_correct:
223 |             print("Failed to generate correct solution within iteration limit")
224 |             return current_solution
225 |             
226 |         # Phase 2: Optimize for performance
227 |         print("\nOptimizing for performance...")
228 |         
229 |         # Identify slow test cases
230 |         _, _, test_times = self.test_environment.execute_tests(current_solution.code)
231 |         slowest_test_idx = test_times.index(max(test_times))
232 |         performance_feedback = f"Slowest test case: #{slowest_test_idx + 1}, " \
233 |                              f"execution time: {max(test_times):.3f}s"
234 |         
235 |         # Generate optimized solution
236 |         optimized_code = self.code_generator.optimize_for_performance(
237 |             problem_description, current_solution.code, performance_feedback
238 |         )
239 |         
240 |         # Verify optimized solution correctness
241 |         is_correct, feedback, test_times = self.test_environment.execute_tests(
242 |             optimized_code
243 |         )
244 |         
245 |         if is_correct and max(test_times) < current_solution.execution_time:
246 |             print("Successfully optimized solution!")
247 |             current_solution.code = optimized_code
248 |             current_solution.execution_time = max(test_times)
249 |             current_solution.test_results = feedback
250 |         else:
251 |             print("Optimization failed or didn't improve performance. Keeping original solution.")
252 |             
253 |         return current_solution
254 | 
255 | # Example usage
256 | if __name__ == "__main__":
257 |     # Example problem description
258 |     problem_desc = """
259 |     Write a function called fibonacci that finds the nth Fibonacci number using dynamic programming.
260 |     The function should take an integer n as input and return the nth Fibonacci number.
261 |     The first two numbers in the sequence are 0 and 1.
262 |     """
263 |     
264 |     # Initialize and run the pipeline
265 |     print("Initializing Code Generation Pipeline...")
266 |     pipeline = CodeGenerationPipeline()
267 |     
268 |     print("\nGenerating solution for Fibonacci problem...")
269 |     solution = pipeline.run(problem_desc)
270 |     
271 |     print("\nFinal Results:")
272 |     print(f"Correct: {solution.is_correct}")
273 |     print(f"Execution Time: {solution.execution_time:.3f}s")
274 |     print("\nFinal Code:")
275 |     print(solution.code)
276 | 


--------------------------------------------------------------------------------
/code-test-analyzer/code_executor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | import subprocess
  4 | import tempfile
  5 | import logging
  6 | import re
  7 | import sys
  8 | import importlib
  9 | from typing import Tuple, List
 10 | 
 11 | from openai import OpenAI
 12 | from tenacity import retry, stop_after_attempt, wait_random_exponential
 13 | 
 14 | # Set up logging to track execution flow and debug issues
 15 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 16 | 
 17 | class CodeExecutor:
 18 |     """
 19 |     A class that safely executes Python code snippets in a temporary environment,
 20 |     handles library dependencies, and captures output and errors.
 21 |     """
 22 |     
 23 |     def __init__(self, timeout: int = 30):
 24 |         """
 25 |         Initialize the CodeExecutor with a configurable timeout.
 26 |         
 27 |         Args:
 28 |             timeout (int): Maximum execution time in seconds before terminating the code
 29 |         """
 30 |         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 31 |         self.timeout = timeout
 32 |         
 33 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 34 |     def completion_with_backoff(self, **kwargs):
 35 |         return self.client.chat.completions.create(**kwargs)
 36 | 
 37 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 38 |     def embedding_with_backoff(self, **kwargs):
 39 |         return self.client.embeddings.create(**kwargs)
 40 |     
 41 |     def _extract_required_libraries(self, code: str) -> List[str]:
 42 |         """
 43 |         Extract required library names from code comments and import statements.
 44 |         
 45 |         Args:
 46 |             code (str): The Python code to analyze
 47 |             
 48 |         Returns:
 49 |             List[str]: List of required library names
 50 |         """
 51 |         libraries = set()
 52 |         
 53 |         # Look for pip install comments
 54 |         pip_matches = re.findall(r'#\s*(?:Required pip installations:|pip install)\s*((?:[\w-]+(?:\s*,\s*)?)+)', code)
 55 |         if pip_matches:
 56 |             for match in pip_matches:
 57 |                 libraries.update(lib.strip() for lib in match.split(','))
 58 |         
 59 |         # Look for import statements
 60 |         import_matches = re.findall(r'^(?:from|import)\s+(\w+)', code, re.MULTILINE)
 61 |         for match in import_matches:
 62 |             # Skip standard library modules
 63 |             if match not in sys.stdlib_module_names:
 64 |                 libraries.add(match)
 65 |         
 66 |         return list(libraries)
 67 |     
 68 |     def _install_libraries(self, libraries: List[str]) -> Tuple[bool, str]:
 69 |         """
 70 |         Install the required libraries using pip.
 71 |         
 72 |         Args:
 73 |             libraries (List[str]): List of library names to install
 74 |             
 75 |         Returns:
 76 |             Tuple[bool, str]: Success status and error message if any
 77 |         """
 78 |         if not libraries:
 79 |             return True, ""
 80 |             
 81 |         logging.info(f"Installing libraries: {', '.join(libraries)}")
 82 |         
 83 |         for lib in libraries:
 84 |             try:
 85 |                 # Check if library is already installed
 86 |                 importlib.import_module(lib.replace('-', '_'))
 87 |                 logging.info(f"{lib} is already installed")
 88 |                 continue
 89 |             except ImportError:
 90 |                 pass
 91 |                 
 92 |             try:
 93 |                 # Install the library using pip
 94 |                 subprocess.check_call(
 95 |                     [sys.executable, "-m", "pip", "install", "--quiet", lib],
 96 |                     stdout=subprocess.PIPE,
 97 |                     stderr=subprocess.PIPE
 98 |                 )
 99 |                 logging.info(f"Successfully installed {lib}")
100 |             except subprocess.CalledProcessError as e:
101 |                 error_msg = f"Failed to install {lib}: {e.stderr.decode() if e.stderr else str(e)}"
102 |                 logging.error(error_msg)
103 |                 return False, error_msg
104 |                 
105 |         return True, ""
106 |     def _extract_clean_code(self, code: str) -> str:
107 |         """
108 |         Extract clean Python code using regex pattern matching and OpenAI as fallback.
109 |         First attempts to find code between ```python and ``` markers.
110 |         If that fails, uses OpenAI to extract the code.
111 |         
112 |         Args:
113 |             code (str): The input string containing Python code and possibly other text
114 |             
115 |         Returns:
116 |             str: Clean Python code with all formatting and non-code text removed
117 |         """
118 |         # First, try to extract code using regex pattern matching
119 |         try:
120 |             # Look for code between ```python and ``` markers
121 |             # Using re.DOTALL to make . match newlines as well
122 |             pattern = r"```python\s*(.*?)\s*```"
123 |             matches = re.findall(pattern, code, re.DOTALL)
124 |             
125 |             if matches:
126 |                 # If we found matches, use the longest one (most likely the main code block)
127 |                 clean_code = max(matches, key=len).strip()
128 |                 logging.info("Successfully extracted code using regex pattern matching")
129 |                 return clean_code
130 |                 
131 |             # If no ```python markers, check for just ``` markers
132 |             pattern = r"```\s*(.*?)\s*```"
133 |             matches = re.findall(pattern, code, re.DOTALL)
134 |             
135 |             if matches:
136 |                 clean_code = max(matches, key=len).strip()
137 |                 logging.info("Successfully extracted code from generic code block")
138 |                 return clean_code
139 |                 
140 |             # If no code blocks found, try OpenAI approach
141 |             logging.info("No code blocks found with regex, attempting OpenAI extraction")
142 |             
143 |             # Attempt OpenAI extraction
144 |             try:
145 |                 response = self.completion_with_backoff(
146 |                     model="gpt-4o",
147 |                     messages=[{
148 |                         "role": "user", 
149 |                         "content": f"""
150 |                         Extract only the Python code from the following text, removing any formatting
151 |                         or non-code text. Return only executable Python code:
152 | 
153 |                         {code}
154 |                         """
155 |                     }],
156 |                     temperature=0
157 |                 )
158 |                 clean_code = response.choices[0].message.content.strip()
159 |                 logging.info("Successfully extracted clean code using OpenAI")
160 |                 return clean_code
161 |                 
162 |             except Exception as e:
163 |                 logging.warning(f"OpenAI extraction failed: {str(e)}")
164 |                 # If both regex and OpenAI fail, return the original code
165 |                 # but remove any obvious markdown markers
166 |                 clean_code = re.sub(r"```.*?\n", "", code)  # Remove opening markers
167 |                 clean_code = re.sub(r"```\s*$", "", clean_code)  # Remove closing markers
168 |                 clean_code = clean_code.strip()
169 |                 logging.info("Returning cleaned original code after all extraction attempts")
170 |                 return clean_code
171 |                 
172 |         except Exception as e:
173 |             logging.error(f"Error during code extraction: {str(e)}")
174 |             # If everything fails, return the original code
175 |             return code.strip()
176 |     def execute(self, code: str, install_libraries: bool = True) -> Tuple[str, str]:
177 |         """
178 |         Execute a given Python code snippet and return its output and any errors.
179 |         Optionally handles library installation before execution.
180 |         
181 |         Args:
182 |             code (str): The Python code to execute
183 |             install_libraries (bool): Whether to attempt installing required libraries
184 |             
185 |         Returns:
186 |             Tuple[str, str]: A tuple containing (output, error)
187 |         """
188 |         # First, clean the code using OpenAI
189 |         clean_code = self._extract_clean_code(code)
190 |         
191 |         if install_libraries:
192 |             # Extract and install required libraries
193 |             libraries = self._extract_required_libraries(clean_code)
194 |             success, error = self._install_libraries(libraries)
195 |             if not success:
196 |                 return "", error
197 |         
198 |         # Create a temporary file in the current directory
199 |         current_dir = os.getcwd()
200 |         with tempfile.NamedTemporaryFile(mode='w', suffix='.py', 
201 |                                        dir=current_dir, delete=False, 
202 |                                        encoding='utf-8') as temp_file:
203 |             temp_file.write(clean_code)
204 |             temp_file_path = temp_file.name
205 | 
206 |         try:
207 |             # Prepare the execution command based on the operating system
208 |             if platform.system() == "Windows":
209 |                 activate_cmd = r"venv\Scripts\activate.bat" if os.path.exists(r"venv\Scripts\activate.bat") else ""
210 |             else:
211 |                 activate_cmd = "source venv/bin/activate" if os.path.exists("venv/bin/activate") else ""
212 | 
213 |             # Construct the command to run the Python script
214 |             run_script = f"python {os.path.basename(temp_file_path)}"
215 |             full_command = f"{activate_cmd} && {run_script}" if activate_cmd else run_script
216 |             
217 |             # Execute the code with appropriate command based on the OS
218 |             if platform.system() == "Windows":
219 |                 result = subprocess.run(
220 |                     full_command,
221 |                     shell=True,
222 |                     cwd=current_dir,
223 |                     capture_output=True,
224 |                     text=True,
225 |                     timeout=self.timeout
226 |                 )
227 |             else:
228 |                 result = subprocess.run(
229 |                     ['/bin/bash', '-c', full_command],
230 |                     cwd=current_dir,
231 |                     capture_output=True,
232 |                     text=True,
233 |                     timeout=self.timeout
234 |                 )
235 |             
236 |             # Capture the output and any errors
237 |             output = result.stdout
238 |             error = result.stderr
239 | 
240 |         except subprocess.TimeoutExpired:
241 |             output = ""
242 |             error = f"Execution timed out after {self.timeout} seconds."
243 |             logging.error(error)
244 |             
245 |         except Exception as e:
246 |             output = ""
247 |             error = f"An error occurred during execution: {str(e)}"
248 |             logging.error(error)
249 |             
250 |         finally:
251 |             # Clean up the temporary file
252 |             try:
253 |                 os.unlink(temp_file_path)
254 |             except Exception as e:
255 |                 logging.error(f"Failed to delete temporary file: {str(e)}")
256 | 
257 |         return output, error
258 | 
259 | # Example usage demonstration
260 | def main():
261 |     executor = CodeExecutor(timeout=30)
262 |     
263 |     # Example code that requires an external library
264 |     test_code = """
265 | # pip install requests
266 | import requests
267 | 
268 | response = requests.get('https://api.github.com')
269 | print(f'GitHub API Status Code: {response.status_code}')
270 | """
271 |     
272 |     print("Executing test code...")
273 |     output, error = executor.execute(test_code, install_libraries=True)
274 |     
275 |     print("\nOutput:")
276 |     print(output if output else "No output")
277 |     
278 |     if error:
279 |         print("\nError:")
280 |         print(error)
281 | 
282 | if __name__ == "__main__":
283 |     main()
284 | 


--------------------------------------------------------------------------------
/code-test-analyzer/readme.md:
--------------------------------------------------------------------------------
  1 | # LLM-Powered Code Generation & Optimization System
  2 | 
  3 | This system leverages Large Language Models (LLMs) to automatically generate, test, and optimize Python code solutions. By combining the power of GPT-4 with a secure execution environment, it creates an end-to-end pipeline for producing high-quality, efficient code solutions from natural language problem descriptions.
  4 | 
  5 | ## Overview
  6 | 
  7 | The system follows an iterative approach to code generation and optimization:
  8 | 
  9 | 1. A problem description is provided as input
 10 | 2. The LLM generates an initial solution
 11 | 3. The solution is tested in a secure execution environment
 12 | 4. Based on test results, the code is either refined for correctness or optimized for performance
 13 | 5. The process continues until an optimal solution is found
 14 | 
 15 | ## Architecture Diagram
 16 | 
 17 | ```mermaid
 18 | graph TB
 19 |     subgraph User["User Interface"]
 20 |         PD[Problem Description]
 21 |     end
 22 | 
 23 |     subgraph Pipeline["Code Generation Pipeline"]
 24 |         CG[Code Generator]
 25 |         TE[Test Environment]
 26 |         CO[Code Optimizer]
 27 |     end
 28 | 
 29 |     subgraph LLM["Language Model Service"]
 30 |         OAI[OpenAI GPT-4 API]
 31 |         subgraph Prompts["Specialized Prompts"]
 32 |             IP[Initial Solution Prompt]
 33 |             RP[Refinement Prompt]
 34 |             OP[Optimization Prompt]
 35 |         end
 36 |     end
 37 | 
 38 |     subgraph Execution["Secure Execution Environment"]
 39 |         CE[Code Executor]
 40 |         subgraph Components["Executor Components"]
 41 |             TC[Test Cases]
 42 |             LD[Library Detector]
 43 |             PI[Package Installer]
 44 |             TF[Temp File Manager]
 45 |         end
 46 |     end
 47 | 
 48 |     subgraph Processing["Code Processing"]
 49 |         CR[Code Refinement]
 50 |         PE[Performance Evaluation]
 51 |         subgraph Extraction["Code Extraction"]
 52 |             RX[Regex Extractor]
 53 |             LX[LLM Extractor]
 54 |         end
 55 |     end
 56 | 
 57 |     %% Flow connections
 58 |     PD --> CG
 59 |     CG --> |1. Generate Initial Solution| OAI
 60 |     OAI --> |Raw Solution| CR
 61 |     CR --> |Clean Code| CE
 62 |     CE --> |Execution Results| TE
 63 |     TE --> |Test Feedback| CO
 64 |     CO --> |Optimization Request| OAI
 65 |     OAI --> |Optimized Code| CR
 66 |     
 67 |     %% Component relationships
 68 |     CE --> |Uses| TC
 69 |     CE --> |Uses| LD
 70 |     LD --> |Triggers| PI
 71 |     CE --> |Manages| TF
 72 |     
 73 |     %% Processing relationships
 74 |     CR --> |Uses| RX
 75 |     CR --> |Fallback| LX
 76 |     PE --> |Feeds into| CO
 77 |     
 78 |     %% Prompt usage
 79 |     CG --> |Uses| IP
 80 |     CO --> |Uses| RP
 81 |     CO --> |Uses| OP
 82 |     
 83 |     classDef primary fill:#f9f,stroke:#333,stroke-width:2px
 84 |     classDef secondary fill:#bbf,stroke:#333,stroke-width:1px
 85 |     classDef tertiary fill:#ddd,stroke:#333,stroke-width:1px
 86 |     
 87 |     class CG,TE,CO primary
 88 |     class CE,OAI secondary
 89 |     class RX,LX,TC,LD,PI,TF tertiary
 90 | ```
 91 | 
 92 | ## Features
 93 | 
 94 | Our system provides several advanced capabilities:
 95 | 
 96 | - **Secure Code Execution**: All generated code runs in an isolated environment with proper security measures
 97 | - **Automatic Dependency Management**: The system detects and installs required Python packages
 98 | - **Intelligent Code Extraction**: Uses both regex pattern matching and LLM-based approaches to clean and prepare code
 99 | - **Performance Optimization**: Automatically identifies and optimizes performance bottlenecks
100 | - **Test-Driven Development**: Validates solutions against predefined test cases
101 | - **Error Recovery**: Maintains previous working solutions while attempting optimizations
102 | 
103 | ## Installation
104 | 
105 | First, clone the repository and set up a Python virtual environment:
106 | 
107 | ```bash
108 | git clone https://github.com/yourusername/llm-code-generator.git
109 | cd llm-code-generator
110 | python -m venv venv
111 | source venv/bin/activate  # On Windows, use: venv\Scripts\activate
112 | pip install -r requirements.txt
113 | ```
114 | 
115 | Create a `.env` file in the project root and add your OpenAI API key:
116 | 
117 | ```env
118 | OPENAI_API_KEY=your_api_key_here
119 | ```
120 | 
121 | 
122 | 
123 | ## Usage
124 | 
125 | Here's a basic example of how to use the system:
126 | 
127 | ```python
128 | from src.code_generation_pipeline import CodeGenerationPipeline
129 | 
130 | # Initialize the pipeline
131 | pipeline = CodeGenerationPipeline()
132 | 
133 | # Define your problem
134 | problem_description = """
135 | Write a function that finds the nth Fibonacci number using dynamic programming.
136 | The function should take an integer n as input and return the nth Fibonacci number.
137 | """
138 | 
139 | # Generate and optimize the solution
140 | solution = pipeline.run(problem_description)
141 | 
142 | # Print results
143 | print(f"Solution is correct: {solution.is_correct}")
144 | print(f"Execution time: {solution.execution_time:.3f}s")
145 | print("\nGenerated Code:")
146 | print(solution.code)
147 | ```
148 | 
149 | ## Configuration
150 | 
151 | The system can be configured through several parameters:
152 | 
153 | - `timeout`: Maximum execution time for generated code (default: 30 seconds)
154 | - `max_iterations`: Maximum number of refinement attempts (default: 5)
155 | - `model_name`: GPT model to use (default: "gpt-4")
156 | 
157 | You can adjust these in the pipeline initialization:
158 | 
159 | ```python
160 | pipeline = CodeGenerationPipeline(
161 |     model_name="gpt-4",
162 |     timeout=60,
163 |     max_iterations=10
164 | )
165 | ```
166 | 
167 | ## Components
168 | 
169 | ### Code Generator
170 | 
171 | The code generator uses OpenAI's GPT-4 to create Python code solutions. It employs carefully crafted prompts for:
172 | - Initial solution generation
173 | - Code refinement based on test feedback
174 | - Performance optimization
175 | 
176 | ### Code Executor
177 | 
178 | The secure execution environment provides:
179 | - Isolation of executed code
180 | - Automatic dependency detection and installation
181 | - Resource usage monitoring
182 | - Timeout enforcement
183 | - Output and error capturing
184 | 
185 | ### Test Environment
186 | 
187 | The testing system:
188 | - Manages test case definitions
189 | - Validates solution correctness
190 | - Measures performance metrics
191 | - Provides detailed feedback for refinement
192 | 
193 | ### Performance Optimizer
194 | 
195 | The optimization component:
196 | - Identifies performance bottlenecks
197 | - Suggests algorithmic improvements
198 | - Validates optimizations maintain correctness
199 | - Tracks execution time improvements
200 | 
201 | 
202 | 
203 | 
204 | ## Acknowledgments
205 | 
206 | This project builds upon several open-source technologies:
207 | - OpenAI's GPT-4 API
208 | - Python's subprocess and tempfile modules
209 | - Various testing and security frameworks
210 | 
211 | For more information or support, please open an issue on the GitHub repository.
212 | 


--------------------------------------------------------------------------------
/diagram-of-thoughts/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from openai import OpenAI
  3 | from dotenv import load_dotenv
  4 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  5 | import json
  6 | import graphviz
  7 | 
  8 | load_dotenv()
  9 | 
 10 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 11 | 
 12 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 13 | def completion_with_backoff(**kwargs):
 14 |     return client.chat.completions.create(**kwargs)
 15 | 
 16 | class DiagramOfThought:
 17 |     def __init__(self, model="gpt-4o-mini"):
 18 |         self.model = model
 19 |         self.conversation_history = []
 20 |         self.roles = ["proposer", "critic", "summarizer"]
 21 |         self.graph = graphviz.Digraph(comment='Diagram of Thought')
 22 |         self.graph.attr(rankdir='TB', size='8,8')
 23 |         self.node_count = 0
 24 | 
 25 |     def generate_response(self, role, prompt):
 26 |         messages = self.conversation_history + [
 27 |             {"role": "system", "content": f"You are now acting as the {role} in the Diagram of Thought process. Always enclose your response in <{role}> tags."},
 28 |             {"role": "user", "content": prompt}
 29 |         ]
 30 |         
 31 |         response = completion_with_backoff(
 32 |             model=self.model,
 33 |             messages=messages,
 34 |             max_tokens=300
 35 |         )
 36 |         
 37 |         content = response.choices[0].message.content
 38 |         self.conversation_history.append({"role": "assistant", "content": content})
 39 |         return content
 40 | 
 41 |     def extract_role_content(self, response, role):
 42 |         start_tag = f"<{role}>"
 43 |         end_tag = f"</{role}>"
 44 |         start = response.find(start_tag)
 45 |         end = response.find(end_tag)
 46 |         if start != -1 and end != -1:
 47 |             return response[start + len(start_tag):end].strip()
 48 |         print(f"Warning: Could not extract {role} content. Using full response.")
 49 |         return response
 50 | 
 51 |     def add_node(self, role, content):
 52 |         self.node_count += 1
 53 |         node_id = f"{role}_{self.node_count}"
 54 |         label = f"{role.capitalize()}:\n{content[:50]}..."  # Truncate long content for readability
 55 |         color = {"proposer": "lightblue", "critic": "lightcoral", "summarizer": "lightgreen"}[role]
 56 |         self.graph.node(node_id, label, style="filled", fillcolor=color)
 57 |         return node_id
 58 | 
 59 |     def add_edge(self, from_node, to_node):
 60 |         self.graph.edge(from_node, to_node)
 61 | 
 62 |     def run(self, problem):
 63 |         print(f"Problem: {problem}\n")
 64 |         self.graph.attr(label=f'Problem: {problem}')
 65 |         iteration = 1
 66 |         prev_node = None
 67 |         while True:
 68 |             print(f"Iteration {iteration}")
 69 |             
 70 |             # Proposer
 71 |             proposer_prompt = f"Based on the current state of reasoning, propose the next step(s) to solve the problem: {problem}"
 72 |             proposer_response = self.generate_response("proposer", proposer_prompt)
 73 |             proposition = self.extract_role_content(proposer_response, "proposer")
 74 |             print(f"Proposer: {proposition}\n")
 75 |             proposer_node = self.add_node("proposer", proposition)
 76 |             if prev_node:
 77 |                 self.add_edge(prev_node, proposer_node)
 78 |             
 79 |             # Critic
 80 |             critic_prompt = f"Critically evaluate the following proposition: {proposition}"
 81 |             critic_response = self.generate_response("critic", critic_prompt)
 82 |             critique = self.extract_role_content(critic_response, "critic")
 83 |             print(f"Critic: {critique}\n")
 84 |             critic_node = self.add_node("critic", critique)
 85 |             self.add_edge(proposer_node, critic_node)
 86 |             
 87 |             # Summarizer
 88 |             summarizer_prompt = "Review the current state of reasoning. Synthesize the validated propositions and determine if a final answer can be reached. If not, indicate what aspects still need to be addressed."
 89 |             summarizer_response = self.generate_response("summarizer", summarizer_prompt)
 90 |             summary = self.extract_role_content(summarizer_response, "summarizer")
 91 |             print(f"Summarizer: {summary}\n")
 92 |             summarizer_node = self.add_node("summarizer", summary)
 93 |             self.add_edge(critic_node, summarizer_node)
 94 |             
 95 |             if "final answer" in summary.lower():
 96 |                 print("Final Answer:", summary)
 97 |                 break
 98 |             
 99 |             iteration += 1
100 |             prev_node = summarizer_node
101 | 
102 | if __name__ == "__main__":
103 |     dot = DiagramOfThought()
104 |     problem = "Imagine a perfect cube of solid gold. If the cube's volume is 1,000 cubic centimeters, and the price of gold is $50 per gram, what is the approximate value of the cube in US dollars? (Assume the density of gold is 19.3 grams per cubic centimeter, and round your answer to the nearest million dollars.)"
105 |     dot.run(problem)
106 | 


--------------------------------------------------------------------------------
/diagram-of-thoughts/readme.md:
--------------------------------------------------------------------------------
 1 | # Diagram of Thought (DoT) Implementation
 2 | 
 3 | ## Overview
 4 | 
 5 | This project implements the Diagram of Thought (DoT) framework, a novel approach to modeling iterative reasoning in large language models (LLMs). DoT organizes propositions, critiques, refinements, and verifications into a cohesive Directed Acyclic Graph (DAG) structure, allowing for complex reasoning pathways while maintaining logical consistency.
 6 | 
 7 | The implementation uses the OpenAI API to generate responses for different roles in the reasoning process and visualizes the reasoning chain using Graphviz.
 8 | 
 9 | ## Features
10 | 
11 | - Iterative reasoning process with three distinct roles: Proposer, Critic, and Summarizer
12 | - Integration with OpenAI's GPT models for generating responses
13 | - Visualization of the reasoning process as a directed graph
14 | - Error handling and retry mechanisms for robust operation
15 | 
16 | ## Requirements
17 | 
18 | - Python 3.7+
19 | - OpenAI API key
20 | - Graphviz (for visualization)
21 | 
22 | ## Installation
23 | 
24 | 
25 | 
26 | 1. Install the required Python packages:
27 |    ```
28 |    pip install openai python-dotenv tenacity graphviz
29 |    ```
30 | 
31 | 2. Install Graphviz on your system:
32 |    - On Ubuntu or Debian: `sudo apt-get install graphviz`
33 |    - On macOS with Homebrew: `brew install graphviz`
34 |    - On Windows: Download and install from [Graphviz's official website](https://graphviz.org/download/)
35 | 
36 | 3. Create a `.env` file in the project root and add your OpenAI API key:
37 |    ```
38 |    OPENAI_API_KEY=your_api_key_here
39 |    ```
40 | 
41 | ## Usage
42 | 
43 | 1. Import the `DiagramOfThought` class from the main script:
44 | 
45 |    ```python
46 |    from diagram_of_thought import DiagramOfThought
47 |    ```
48 | 
49 | 2. Create an instance of the `DiagramOfThought` class:
50 | 
51 |    ```python
52 |    dot = DiagramOfThought()
53 |    ```
54 | 
55 | 3. Define your problem and run the reasoning process:
56 | 
57 |    ```python
58 |    problem = "Your complex problem statement here"
59 |    dot.run(problem)
60 |    ```
61 | 
62 | 4. Visualize the reasoning process:
63 | 
64 |    ```python
65 |    dot.visualize()
66 |    ```
67 | 
68 | ## Example
69 | 
70 | ```python
71 | dot = DiagramOfThought()
72 | problem = "Imagine a perfect cube of solid gold. If the cube's volume is 1,000 cubic centimeters, and the price of gold is $50 per gram, what is the approximate value of the cube in US dollars? (Assume the density of gold is 19.3 grams per cubic centimeter, and round your answer to the nearest million dollars.)"
73 | dot.run(problem)
74 | dot.visualize()
75 | ```
76 | 
77 | This will generate a PNG file named "diagram_of_thought.png" in your working directory, displaying the reasoning process as a directed graph.
78 | 
79 | ## Customization
80 | 
81 | You can customize the behavior of the DoT implementation by modifying the following parameters in the `DiagramOfThought` class:
82 | 
83 | - `model`: Change the OpenAI model used (default is "gpt-3.5-turbo")
84 | - `max_tokens`: Adjust the maximum number of tokens in the API responses
85 | 
86 | ## Contributing
87 | 
88 | Contributions to improve the Diagram of Thought implementation are welcome. Please feel free to submit issues or pull requests.
89 | 
90 | ## License
91 | 
92 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
93 | 
94 | 


--------------------------------------------------------------------------------
/evals/moverscore.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | from scipy.spatial.distance import cosine
  5 | from scipy.stats import wasserstein_distance
  6 | from openai import OpenAI
  7 | from dotenv import load_dotenv
  8 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  9 | 
 10 | class MoverScoreEvaluator:
 11 |     def __init__(self, api_key=None):
 12 |         """
 13 |         Initialize the MoverScore evaluator with GPT-4o API access.
 14 |         
 15 |         Args:
 16 |             api_key (str, optional): OpenAI API key for accessing GPT-4o. If None, loads from environment.
 17 |         """
 18 |         load_dotenv()
 19 |         self.api_key = api_key or os.getenv("OPENAI_API_KEY")
 20 |         self.client = OpenAI(api_key=self.api_key)
 21 |         
 22 |         
 23 |         
 24 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 25 |     def embedding_with_backoff(self, **kwargs):
 26 |         """Wrapper for embeddings API with exponential backoff retry logic"""
 27 |         return self.client.embeddings.create(**kwargs)
 28 |     
 29 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 30 |     def completion_with_backoff(self, **kwargs):
 31 |         """Wrapper for chat completions API with exponential backoff retry logic"""
 32 |         return self.client.chat.completions.create(**kwargs)
 33 |     
 34 |     def get_embeddings_from_gpt4o(self, texts):
 35 |         """
 36 |         Get text embeddings using GPT-4o API with retry logic.
 37 |         
 38 |         Args:
 39 |             texts (list): List of text strings to embed
 40 |             
 41 |         Returns:
 42 |             list: List of embedding vectors
 43 |         """
 44 |         try:
 45 |             # Use text-embedding-3-large for higher quality embeddings
 46 |             embeddings_list = []
 47 |             
 48 |             for text in texts:
 49 |                 response = self.embedding_with_backoff(
 50 |                     model="text-embedding-3-large",
 51 |                     input=text,
 52 |                     dimensions=1536  # High dimensionality for better semantic capture
 53 |                 )
 54 |                 
 55 |                 embeddings_list.append(np.array(response.data[0].embedding))
 56 |             
 57 |             return embeddings_list
 58 |             
 59 |         except Exception as e:
 60 |             print(f"Error getting embeddings from OpenAI API: {e}")
 61 |             # Fall back to local model
 62 |             return self.get_local_embeddings(texts)
 63 |     
 64 |     def get_local_embeddings(self, texts):
 65 |         """
 66 |         Get text embeddings using local GPT-2 model as fallback.
 67 |         
 68 |         Args:
 69 |             texts (list): List of text strings to embed
 70 |             
 71 |         Returns:
 72 |             list: List of embedding vectors
 73 |         """
 74 |         embeddings = []
 75 |         
 76 |         for text in texts:
 77 |             inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
 78 |             with torch.no_grad():
 79 |                 outputs = self.model(**inputs)
 80 |             
 81 |             # Use the last hidden state's mean as the embedding
 82 |             embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
 83 |             embeddings.append(embedding)
 84 |             
 85 |         return embeddings
 86 |     
 87 |     def calculate_word_importance(self, text):
 88 |         """
 89 |         Calculate importance weights for words in text.
 90 |         
 91 |         Args:
 92 |             text (str): Input text
 93 |             
 94 |         Returns:
 95 |             dict: Dictionary mapping words to importance scores
 96 |         """
 97 |         # Simple TF-IDF like approach
 98 |         words = text.lower().split()
 99 |         word_counts = {}
100 |         
101 |         for word in words:
102 |             if word in word_counts:
103 |                 word_counts[word] += 1
104 |             else:
105 |                 word_counts[word] = 1
106 |         
107 |         # Calculate importance based on inverse frequency
108 |         total_words = len(words)
109 |         word_importance = {}
110 |         
111 |         for word, count in word_counts.items():
112 |             # Words that appear less frequently are more important
113 |             word_importance[word] = 1.0 / (count / total_words)
114 |             
115 |         return word_importance
116 |     
117 |     def calculate_moverscore(self, hypothesis, reference):
118 |         """
119 |         Calculate MoverScore between hypothesis and reference texts.
120 |         
121 |         Args:
122 |             hypothesis (str): Generated text to evaluate
123 |             reference (str): Ground truth text
124 |             
125 |         Returns:
126 |             float: MoverScore value (higher means more similar)
127 |         """
128 |         # Get word-level embeddings
129 |         hyp_words = hypothesis.lower().split()
130 |         ref_words = reference.lower().split()
131 |         
132 |         if not hyp_words or not ref_words:
133 |             return 0.0
134 |         # Get embeddings
135 |         embeddings = self.get_embeddings_from_gpt4o([hypothesis, reference])
136 |         if len(embeddings) < 2:
137 |             # Fallback: use word-by-word cosine distance if embeddings fail
138 |             return self.calculate_fallback_score(hyp_words, ref_words)
139 |         
140 |         hyp_embedding, ref_embedding = embeddings
141 |         
142 |         # Calculate Earth Mover's Distance (using Wasserstein)
143 |         # Normalize embeddings for better results
144 |         hyp_embedding_norm = hyp_embedding / np.linalg.norm(hyp_embedding)
145 |         ref_embedding_norm = ref_embedding / np.linalg.norm(ref_embedding)
146 |         
147 |         # Calculate Wasserstein distance between distributions
148 |         distance = wasserstein_distance(hyp_embedding_norm, ref_embedding_norm)
149 |         
150 |         # Convert distance to similarity score (1 - normalized distance)
151 |         # Lower distance = higher similarity
152 |         score = 1.0 - min(distance, 1.0)
153 |         
154 |         return score
155 |     
156 |     def calculate_fallback_score(self, hyp_words, ref_words):
157 |         """
158 |         Calculate a fallback similarity score when embeddings aren't available.
159 |         
160 |         Args:
161 |             hyp_words (list): Words in hypothesis
162 |             ref_words (list): Words in reference
163 |             
164 |         Returns:
165 |             float: Similarity score
166 |         """
167 |         # Simple Jaccard similarity as fallback
168 |         hyp_set = set(hyp_words)
169 |         ref_set = set(ref_words)
170 |         
171 |         intersection = len(hyp_set.intersection(ref_set))
172 |         union = len(hyp_set.union(ref_set))
173 |         
174 |         if union == 0:
175 |             return 0.0
176 |             
177 |         return intersection / union
178 |     
179 |     def evaluate_batch(self, hypotheses, references):
180 |         """
181 |         Evaluate a batch of hypotheses against references.
182 |         
183 |         Args:
184 |             hypotheses (list): List of generated texts
185 |             references (list): List of ground truth texts
186 |             
187 |         Returns:
188 |             list: List of MoverScore values
189 |         """
190 |         if len(hypotheses) != len(references):
191 |             raise ValueError("Number of hypotheses and references must match")
192 |             
193 |         scores = []
194 |         for hyp, ref in zip(hypotheses, references):
195 |             score = self.calculate_moverscore(hyp, ref)
196 |             scores.append(score)
197 |             
198 |         return scores
199 | 
200 | 
201 | if __name__ == "__main__":
202 |     # Mock data - customer feedback summaries
203 |     human_summaries = [
204 |         "The user interface is confusing and difficult to navigate, especially on mobile devices.",
205 |         "Customer service was responsive but unable to resolve my billing issue completely.",
206 |         "The product exceeded expectations with its durability and high-quality materials."
207 |     ]
208 |     
209 |     ai_generated_summaries = [
210 |         "Users find the interface challenging to use, particularly when accessing via smartphones.",
211 |         "Support team replied quickly but didn't fully fix the payment problem.",
212 |         "The item surpassed anticipated quality levels with excellent construction and premium components."
213 |     ]
214 |     
215 |     # Initialize evaluator (API key loaded from .env file)
216 |     evaluator = MoverScoreEvaluator()
217 |     
218 |     # Calculate scores
219 |     scores = evaluator.evaluate_batch(ai_generated_summaries, human_summaries)
220 |     
221 |     # Print results
222 |     print("MoverScore Evaluation Results:")
223 |     for i, (human, ai, score) in enumerate(zip(human_summaries, ai_generated_summaries, scores)):
224 |         print(f"\nExample {i+1}:")
225 |         print(f"Human: {human}")
226 |         print(f"AI: {ai}")
227 |         print(f"MoverScore: {score:.4f}")
228 |         
229 |     # Calculate average score
230 |     avg_score = sum(scores) / len(scores) if scores else 0
231 |     print(f"\nAverage MoverScore: {avg_score:.4f}")
232 | 


--------------------------------------------------------------------------------
/gatr/app.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime, timedelta
  2 | import json
  3 | import os
  4 | import random
  5 | import re
  6 | from typing import List, Dict, Tuple
  7 | from neo4j import GraphDatabase
  8 | import numpy as np
  9 | from dotenv import load_dotenv
 10 | from tenacity import retry, stop_after_attempt, wait_random_exponential
 11 | import logging
 12 | # OpenAI handler setup
 13 | from openai import OpenAI
 14 | 
 15 | 
 16 | # Load environment variables
 17 | load_dotenv()
 18 | 
 19 | # Neo4j connection
 20 | uri = os.getenv("NEO4J_URI")
 21 | user = os.getenv("NEO4J_USERNAME")
 22 | password = os.getenv("NEO4J_PASSWORD")
 23 | 
 24 | 
 25 | 
 26 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 27 | 
 28 | logging.basicConfig(level=logging.INFO)
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 32 | def completion_with_backoff(**kwargs):
 33 |     return client.chat.completions.create(**kwargs)
 34 | 
 35 | class DisasterResponseGRATR:
 36 |     def __init__(self, num_agencies: int):
 37 |         self.num_agencies = num_agencies
 38 |         self.driver = GraphDatabase.driver(uri, auth=(user, password))
 39 |         self.initialize_graph()
 40 | 
 41 |     def initialize_graph(self):
 42 |         with self.driver.session() as session:
 43 |             session.run("MATCH (n) DETACH DELETE n")  # Clear existing graph
 44 |             for i in range(self.num_agencies):
 45 |                 session.run(
 46 |                     "CREATE (:Agency {id: $id, name: $name})",
 47 |                     id=i, name=f"Agency_{i}"
 48 |                 )
 49 |             session.run(
 50 |                 """
 51 |                 MATCH (a1:Agency), (a2:Agency)
 52 |                 WHERE a1.id < a2.id
 53 |                 CREATE (a1)-[:TRUST {weight: 0, evidence: []}]->(a2),
 54 |                        (a2)-[:TRUST {weight: 0, evidence: []}]->(a1)
 55 |                 """
 56 |             )
 57 | 
 58 |     def update_graph(self, agency_id: int, report: str):
 59 |         evidence = self.extract_evidence(report)
 60 |         logger.info(f"Extracted evidence: {evidence}")
 61 |         
 62 |         with self.driver.session() as session:
 63 |             for target_id, action, score in evidence:
 64 |                 evidence_str = json.dumps({
 65 |                     "action": action,
 66 |                     "score": score,
 67 |                     "report": report
 68 |                 })
 69 |                 print(evidence_str)
 70 |                 try:
 71 |                     result = session.run(
 72 |                         """
 73 |                         MATCH (a1:Agency {id: $agency_id})-[t:TRUST]->(a2:Agency {id: $target_id})
 74 |                         SET t.weight = t.weight + $score,
 75 |                             t.evidence = CASE
 76 |                                 WHEN t.evidence IS NULL THEN [$evidence]
 77 |                                 ELSE t.evidence + $evidence
 78 |                             END
 79 |                         RETURN t.weight as new_weight, t.evidence as new_evidence
 80 |                         """,
 81 |                         agency_id=agency_id,
 82 |                         target_id=target_id,
 83 |                         score=score,
 84 |                         evidence=evidence_str
 85 |                     )
 86 |                     
 87 |                     record = result.single()
 88 |                     if record:
 89 |                         logger.info(f"Updated relationship between Agency {agency_id} and Agency {target_id}:")
 90 |                         logger.info(f"New weight: {record['new_weight']}")
 91 |                         logger.info(f"New evidence: {record['new_evidence']}")
 92 |                     else:
 93 |                         logger.warning(f"No relationship found between Agency {agency_id} and Agency {target_id}")
 94 |                 
 95 |                 except  e:
 96 |                     logger.error(f"Neo4j error occurred: {str(e)}")
 97 |                 except Exception as e:
 98 |                     logger.error(f"An unexpected error occurred: {str(e)}")
 99 | 
100 |         # Verify the updates
101 |         self.verify_graph_updates(agency_id)
102 | 
103 |     def verify_graph_updates(self, agency_id: int):
104 |         with self.driver.session() as session:
105 |             result = session.run(
106 |                 """
107 |                 MATCH (a:Agency {id: $agency_id})-[t:TRUST]->(other:Agency)
108 |                 RETURN other.id as target_id, t.weight as weight, t.evidence as evidence
109 |                 """,
110 |                 agency_id=agency_id
111 |             )
112 |             
113 |             for record in result:
114 |                 logger.info(f"Verification - Relationship from Agency {agency_id} to Agency {record['target_id']}:")
115 |                 logger.info(f"Weight: {record['weight']}")
116 |                 logger.info(f"Evidence: {record['evidence']}")
117 | 
118 |     def extract_evidence(self, report: str) -> List[Tuple[int, str, float]]:
119 |         prompt = f"""
120 |         Analyze the following disaster response report:
121 |         {report}
122 |         
123 |         Identify the agencies mentioned, their actions (Collaborate, Assist, Conflict),
124 |         and assign a score from -10 to 10 indicating the impact on trust.
125 |         
126 |         Return the results strictly in the format and do not return anything else:
127 |         agency_id,action,score
128 |         """
129 |         
130 |         response = completion_with_backoff(
131 |             model="gpt-4o-mini",
132 |             messages=[{"role": "user", "content": prompt}]
133 |         )
134 |         
135 |         evidence = []
136 |         print(response.choices[0].message.content)
137 |         matches = re.findall(r'(\d+),(\w+),([-]?\d+(?:\.\d+)?)', response.choices[0].message.content)
138 |     
139 |         for match in matches:
140 |             try:
141 |                 agency_id, action, score = match
142 |                 evidence.append((int(agency_id), action.strip(), float(score)))
143 |             except ValueError as e:
144 |                 print(f"Warning: Could not parse match: {match}. Error: {e}")
145 |                 continue
146 |             
147 |             if evidence == []:
148 |                 raise KeyError
149 |         return evidence
150 | 
151 |     def assess_coordination(self, agency_id: int) -> Dict[int, str]:
152 |         with self.driver.session() as session:
153 |             agency_info = session.run(
154 |                 """
155 |                 MATCH (a:Agency {id: $agency_id})-[t:TRUST]->(other:Agency)
156 |                 RETURN other.id as id, other.name as name, t.weight as trust, t.evidence as evidence
157 |                 """,
158 |                 agency_id=agency_id
159 |             )
160 |             
161 |             assessments = {}
162 |             for record in agency_info:
163 |                 target_id = record['id']
164 |                 target_name = record['name']
165 |                 trust = record['trust']
166 |                 evidence = [json.loads(e) for e in record['evidence']] if record['evidence'] else []
167 |                 
168 |                 prompt = f"""
169 |                 Agency {target_name} (ID: {target_id}):
170 |                 - Current trust level: {trust}
171 |                 - Evidence of past interactions: {evidence}
172 |                 
173 |                 Based on this information, provide a brief assessment of the coordination potential
174 |                 with Agency {target_name}. Consider their past actions and reliability in the disaster response context.
175 |                 """
176 |                 
177 |                 response = completion_with_backoff(
178 |                     model="gpt-4o-mini",
179 |                     messages=[{"role": "user", "content": prompt}]
180 |                 )
181 |                 
182 |                 assessment = response.choices[0].message.content
183 |                 assessments[target_id] = assessment
184 |             
185 |             return assessments
186 | 
187 |     def simulate_disaster_response(self, num_rounds: int):
188 |         print("\nEnhancing graph with detailed information...")
189 |         self.enhance_graph()  # Call this before starting the simulation
190 |         print("Graph enhancement complete.")
191 |         disaster_types = ["Flood", "Earthquake", "Wildfire", "Hurricane"]
192 |         actions = ["deployed resources", "shared information", "requested assistance", "coordinated efforts"]
193 |         
194 |         for round in range(num_rounds):
195 |             print(f"\n--- Round {round + 1} ---")
196 |             disaster = random.choice(disaster_types)
197 |             print(f"Current Disaster: {disaster}")
198 |             
199 |             for agency_id in range(self.num_agencies):
200 |                 action = random.choice(actions)
201 |                 target_id = random.choice([i for i in range(self.num_agencies) if i != agency_id])
202 |                 report = f"Agency_{agency_id} {action} with Agency_{target_id} during the {disaster} response."
203 |                 print(f"\nReport: {report}")
204 |                 
205 |                 self.update_graph(agency_id, report)
206 |                 
207 |                 if round == num_rounds - 1:  # Only perform assessment in the last round
208 |                     assessments = self.assess_coordination(agency_id)
209 |                     print(f"\nAgency_{agency_id} Coordination Assessments:")
210 |                     for target_id, assessment in assessments.items():
211 |                         print(f"  Agency_{target_id}: {assessment[:100]}...")  # Truncate long assessments
212 | 
213 |             # Verify all updates after each round
214 |             print("\nVerifying all graph updates:")
215 |             for agency_id in range(self.num_agencies):
216 |                 self.verify_graph_updates(agency_id)
217 |     def enhance_graph(self):
218 |         with self.driver.session() as session:
219 |             for agency_id in range(self.num_agencies):
220 |                 self._enhance_agency(session, agency_id)
221 |             
222 |             # Create additional nodes for disasters, resources, and news channels
223 |             self._create_disaster_nodes(session)
224 |             self._create_resource_nodes(session)
225 |             self._create_news_channel_nodes(session)
226 | 
227 |     def _enhance_agency(self, session, agency_id):
228 |         agency_types = ["Fire Department", "Police Department", "Medical Services", "Red Cross", "FEMA"]
229 |         agency_type = random.choice(agency_types)
230 |         city = random.choice(["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"])
231 |         state = random.choice(["NY", "CA", "IL", "TX", "AZ"])
232 |         budget = random.randint(1000000, 100000000)
233 |         staff_count = random.randint(50, 5000)
234 |         founding_date = datetime.now() - timedelta(days=random.randint(365*10, 365*100))
235 | 
236 |         session.run("""
237 |             MATCH (a:Agency {id: $agency_id})
238 |             SET a.type = $type,
239 |                 a.city = $city,
240 |                 a.state = $state,
241 |                 a.budget = $budget,
242 |                 a.staff_count = $staff_count,
243 |                 a.founding_date = $founding_date,
244 |                 a.response_rate = $response_rate,
245 |                 a.success_rate = $success_rate
246 |         """, agency_id=agency_id, type=agency_type, city=city, state=state, 
247 |              budget=budget, staff_count=staff_count, founding_date=founding_date.isoformat(),
248 |              response_rate=random.uniform(0.7, 0.99), success_rate=random.uniform(0.6, 0.95))
249 | 
250 |         # Create and connect specialized units
251 |         specialized_units = ["Rescue Team", "Hazmat Unit", "Emergency Medical Team", "Logistics Support"]
252 |         for unit in random.sample(specialized_units, random.randint(1, len(specialized_units))):
253 |             session.run("""
254 |                 MATCH (a:Agency {id: $agency_id})
255 |                 CREATE (u:SpecializedUnit {name: $unit_name, agency_id: $agency_id})
256 |                 CREATE (a)-[:HAS_UNIT]->(u)
257 |             """, agency_id=agency_id, unit_name=f"{unit} of {agency_type} {agency_id}")
258 | 
259 |     def _create_disaster_nodes(self, session):
260 |         disasters = [
261 |             ("Hurricane Zeta", "Hurricane", "2023-09-15"),
262 |             ("California Wildfire", "Wildfire", "2023-07-20"),
263 |             ("Midwest Floods", "Flood", "2023-04-10"),
264 |             ("New Madrid Earthquake", "Earthquake", "2023-11-03")
265 |         ]
266 |         for name, type, date in disasters:
267 |             session.run("""
268 |                 CREATE (d:Disaster {name: $name, type: $type, date: $date})
269 |             """, name=name, type=type, date=date)
270 | 
271 |             # Connect agencies to disasters they responded to
272 |             agencies_responded = random.sample(range(self.num_agencies), random.randint(2, self.num_agencies))
273 |             for agency_id in agencies_responded:
274 |                 session.run("""
275 |                     MATCH (a:Agency {id: $agency_id}), (d:Disaster {name: $disaster_name})
276 |                     CREATE (a)-[:RESPONDED_TO {effectiveness: $effectiveness}]->(d)
277 |                 """, agency_id=agency_id, disaster_name=name, effectiveness=random.uniform(0.5, 1.0))
278 | 
279 |     def _create_resource_nodes(self, session):
280 |         resources = ["Emergency Vehicles", "Medical Supplies", "Food and Water", "Temporary Shelters"]
281 |         for resource in resources:
282 |             session.run("""
283 |                 CREATE (r:Resource {name: $name, total_quantity: $quantity})
284 |             """, name=resource, quantity=random.randint(100, 10000))
285 | 
286 |             # Distribute resources to agencies
287 |             for agency_id in range(self.num_agencies):
288 |                 session.run("""
289 |                     MATCH (a:Agency {id: $agency_id}), (r:Resource {name: $resource_name})
290 |                     CREATE (a)-[:HAS_RESOURCE {quantity: $quantity}]->(r)
291 |                 """, agency_id=agency_id, resource_name=resource, quantity=random.randint(10, 1000))
292 | 
293 |     def _create_news_channel_nodes(self, session):
294 |         news_channels = ["CNN", "Fox News", "MSNBC", "ABC News", "CBS News"]
295 |         for channel in news_channels:
296 |             session.run("""
297 |                 CREATE (n:NewsChannel {name: $name})
298 |             """, name=channel)
299 | 
300 |             # Create coverage relationships between news channels and disasters
301 |             session.run("""
302 |                 MATCH (n:NewsChannel {name: $channel_name}), (d:Disaster)
303 |                 WITH n, d, rand() AS r
304 |                 WHERE r < 0.7
305 |                 CREATE (n)-[:COVERED {hours_of_coverage: $hours}]->(d)
306 |             """, channel_name=channel, hours=random.randint(1, 100))
307 | 
308 | if __name__ == "__main__":
309 |     num_agencies = 8
310 |     num_rounds = 1
311 | 
312 |     print("Initializing Disaster Response Coordination Simulation")
313 |     print(f"Number of agencies: {num_agencies}")
314 |     print(f"Number of rounds: {num_rounds}")
315 | 
316 |     gratr = DisasterResponseGRATR(num_agencies)
317 |     gratr.simulate_disaster_response(num_rounds)
318 | 
319 |     print("\nSimulation complete.")
320 | 


--------------------------------------------------------------------------------
/gatr/readme.md:
--------------------------------------------------------------------------------
  1 | # Disaster Response GRATR
  2 | 
  3 | ## Graph Retrieval Augmented Trustworthiness Reasoning for Disaster Response Coordination
  4 | 
  5 | ### Table of Contents
  6 | 1. [Introduction](#introduction)
  7 | 2. [Features](#features)
  8 | 3. [System Architecture](#system-architecture)
  9 | 4. [Installation](#installation)
 10 | 5. [Usage](#usage)
 11 | 6. [Components](#components)
 12 | 7. [Simulation Process](#simulation-process)
 13 | 8. [Customization](#customization)
 14 | 9. [Contributing](#contributing)
 15 | 10. [License](#license)
 16 | 
 17 | ## Introduction
 18 | 
 19 | Disaster Response GRATR is an innovative system that applies Graph Retrieval Augmented Trustworthiness Reasoning (GRATR) to enhance coordination and decision-making in disaster response scenarios. By leveraging graph-based trust modeling and machine learning techniques, this system provides a dynamic and adaptive approach to assessing trustworthiness and potential for coordination among multiple agencies involved in disaster response efforts.
 20 | 
 21 | ## Features
 22 | 
 23 | - Dynamic trust graph representation of agencies and their relationships
 24 | - Real-time updates based on agency actions and interactions
 25 | - Integration with Large Language Models (LLMs) for natural language processing of reports and assessments
 26 | - Comprehensive simulation of disaster response scenarios
 27 | - Detailed agency profiles including resources, specialized units, and historical performance
 28 | - Integration of external factors such as disaster events and media coverage
 29 | 
 30 | ## System Architecture
 31 | 
 32 | The Disaster Response GRATR system consists of the following key components:
 33 | 
 34 | 1. **Graph Database (Neo4j)**: Stores the trust network, agency information, and disaster-related data.
 35 | 2. **GRATR Algorithm**: Implements the core logic for trust assessment and evidence processing.
 36 | 3. **Large Language Model Integration**: Utilizes OpenAI's GPT models for natural language understanding and generation.
 37 | 4. **Simulation Engine**: Generates and runs disaster response scenarios.
 38 | 5. **Analysis and Visualization Tools**: Provides insights into trust dynamics and agency performance.
 39 | 
 40 | ## Installation
 41 | 
 42 | 1. Clone the repository:
 43 |    ```
 44 |    git clone https://github.com/yourusername/disaster-response-gratr.git
 45 |    cd disaster-response-gratr
 46 |    ```
 47 | 
 48 | 2. Install required dependencies:
 49 |    ```
 50 |    pip install -r requirements.txt
 51 |    ```
 52 | 
 53 | 3. Set up Neo4j:
 54 |    - [Download and install Neo4j](https://neo4j.com/download/)
 55 |    - Create a new database and note down the URI, username, and password
 56 | 
 57 | 4. Set up environment variables:
 58 |    - Create a `.env` file in the project root
 59 |    - Add the following variables:
 60 |      ```
 61 |      NEO4J_URI=bolt://localhost:7687
 62 |      NEO4J_USERNAME=your_username
 63 |      NEO4J_PASSWORD=your_password
 64 |      OPENAI_API_KEY=your_openai_api_key
 65 |      ```
 66 | 
 67 | ## Usage
 68 | 
 69 | To run a simulation:
 70 | 
 71 | ```python
 72 | from disaster_response_gratr import DisasterResponseGRATR
 73 | 
 74 | # Initialize the system
 75 | gratr = DisasterResponseGRATR(num_agencies=5)
 76 | 
 77 | # Run a simulation
 78 | gratr.simulate_disaster_response(num_rounds=3)
 79 | ```
 80 | 
 81 | ## Components
 82 | 
 83 | ### 1. Agency Nodes
 84 | Represent disaster response agencies with properties such as type, location, budget, staff count, and performance metrics.
 85 | 
 86 | ### 2. Disaster Nodes
 87 | Represent specific disaster events with properties like type, date, and severity.
 88 | 
 89 | ### 3. Resource Nodes
 90 | Represent various resources available for disaster response, linked to agencies that possess them.
 91 | 
 92 | ### 4. News Channel Nodes
 93 | Represent media entities covering disaster events, providing an additional layer of information flow.
 94 | 
 95 | ### 5. Specialized Unit Nodes
 96 | Represent specific capabilities of agencies, such as search and rescue teams or hazmat units.
 97 | 
 98 | ### 6. Trust Relationships
 99 | Dynamic edges between agency nodes, updated based on interactions and performance during disaster response.
100 | 
101 | ## Simulation Process
102 | 
103 | 1. **Graph Enhancement**: Populates the graph with detailed agency and disaster information.
104 | 2. **Disaster Generation**: Randomly selects a disaster type for each round of the simulation.
105 | 3. **Agency Actions**: Simulates actions taken by each agency in response to the disaster.
106 | 4. **Graph Updates**: Modifies trust relationships based on agency actions and interactions.
107 | 5. **Evidence Extraction**: Processes action reports to extract structured evidence.
108 | 6. **Trust Assessment**: Updates trust scores between agencies based on extracted evidence.
109 | 7. **Coordination Assessment**: Evaluates potential for coordination between agencies based on trust scores and historical data.
110 | 
111 | ## Customization
112 | 
113 | The system is designed to be flexible and customizable:
114 | 
115 | - Modify the `enhance_graph` method to add or change agency properties.
116 | - Extend the `extract_evidence` method to handle more complex action reports.
117 | - Adjust the trust update logic in `update_graph` to reflect different trust dynamics.
118 | - Add new node types or relationships to represent additional aspects of disaster response.
119 | 
120 | ## Contributing
121 | 
122 | Contributions to the Disaster Response GRATR project are welcome! Please refer to the `CONTRIBUTING.md` file for guidelines on how to submit issues, feature requests, and pull requests.
123 | 
124 | ## License
125 | 
126 | This project is licensed under the MIT License - see the `LICENSE` file for details.
127 | 
128 | ---
129 | 
130 | For more information or support, please open an issue in the GitHub repository or contact the project maintainers.
131 | 
132 | 
133 | ## Architecture Diagram
134 | 
135 | ```mermaid
136 | graph TD
137 |     A[Start] --> B[Initialize GRATR]
138 |     B --> C[Initialize Graph]
139 |     C --> D[Run Game Simulation]
140 |     D --> E{For each round}
141 |     E --> F{For each player}
142 |     F --> G[Generate Synthetic Observation]
143 |     G --> H[Update Graph]
144 |     H --> I[Extract Evidence]
145 |     I --> J[Update Trust Relationships]
146 |     J --> K[Perform Reasoning Process]
147 |     K --> L[Forward Retrieval]
148 |     L --> M[Evidence Merging]
149 |     M --> N[LLM-based Assessment]
150 |     N --> O[Backward Update]
151 |     O --> P{More players?}
152 |     P -->|Yes| F
153 |     P -->|No| Q{More rounds?}
154 |     Q -->|Yes| E
155 |     Q -->|No| R[End]
156 | 
157 |     subgraph "Neo4j Graph Database"
158 |         S[Player Nodes]
159 |         T[TRUST Relationships]
160 |         S --- T
161 |     end
162 | 
163 |     subgraph "OpenAI API"
164 |         U[LLM]
165 |     end
166 | 
167 |     H --> S
168 |     H --> T
169 |     L --> S
170 |     L --> T
171 |     M --> T
172 |     O --> T
173 |     K --> U
174 |     N --> U
175 | 
176 |     subgraph "GRATR Class Methods"
177 |         V[initialize_graph]
178 |         W[update_graph]
179 |         X[evidence_merging]
180 |         Y[forward_retrieval]
181 |         Z[backward_update]
182 |         AA[reasoning_process]
183 |         AB[extract_evidence]
184 |     end
185 | 
186 |     C --> V
187 |     H --> W
188 |     M --> X
189 |     L --> Y
190 |     O --> Z
191 |     K --> AA
192 |     I --> AB
193 | ```
194 | 


--------------------------------------------------------------------------------
/gatr/requirements.txt:
--------------------------------------------------------------------------------
 1 | annotated-types==0.7.0
 2 | anyio==4.4.0
 3 | black==24.8.0
 4 | certifi==2024.7.4
 5 | click==8.1.7
 6 | colorama==0.4.6
 7 | distro==1.9.0
 8 | h11==0.14.0
 9 | httpcore==1.0.5
10 | httpx==0.27.0
11 | idna==3.7
12 | jiter==0.5.0
13 | joblib==1.4.2
14 | mypy-extensions==1.0.0
15 | neo4j==5.23.1
16 | numpy==2.1.0
17 | openai==1.42.0
18 | packaging==24.1
19 | pathspec==0.12.1
20 | platformdirs==4.2.2
21 | pydantic==2.8.2
22 | pydantic_core==2.20.1
23 | python-dotenv==1.0.1
24 | pytz==2024.1
25 | scikit-learn==1.5.1
26 | scipy==1.14.1
27 | sniffio==1.3.1
28 | tenacity==9.0.0
29 | threadpoolctl==3.5.0
30 | tqdm==4.66.5
31 | typing_extensions==4.12.2
32 | 


--------------------------------------------------------------------------------
/multi-stream-processor/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from openai import OpenAI
  3 | from dotenv import load_dotenv
  4 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  5 | import numpy as np
  6 | from sklearn.metrics.pairwise import cosine_similarity
  7 | import csv
  8 | 
  9 | load_dotenv()
 10 | 
 11 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 12 | 
 13 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 14 | def completion_with_backoff(**kwargs):
 15 |     return client.chat.completions.create(**kwargs)
 16 | 
 17 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 18 | def embedding_with_backoff(**kwargs):
 19 |     return client.embeddings.create(**kwargs)
 20 | 
 21 | def get_embedding(text):
 22 |     print(text)
 23 |     result = embedding_with_backoff(model="text-embedding-ada-002", input=text)
 24 |     return result.data[0].embedding
 25 | 
 26 | def generate_code(prompt):
 27 |     response = completion_with_backoff(
 28 |         model="gpt-3.5-turbo",
 29 |         messages=[{"role": "user", "content": f"Generate Python code for: {prompt}"}]
 30 |     )
 31 |     return response.choices[0].message.content
 32 | 
 33 | def cosine_sim(a, b):
 34 |     return cosine_similarity([a], [b])[0][0]
 35 | 
 36 | def multi_stream_code_search(query, functions, classes):
 37 |     augmented_query = f"Python code for: {query}"
 38 |     query_embedding = get_embedding(augmented_query)
 39 |     function_embeddings = [get_embedding(func) for func in functions]
 40 |     class_embeddings = [get_embedding(cls) for cls in classes]
 41 |     
 42 |     # Stream 1
 43 |     stream1_results = sorted(range(len(function_embeddings)), 
 44 |                              key=lambda i: cosine_sim(query_embedding, function_embeddings[i]), 
 45 |                              reverse=True)[:3]
 46 |     
 47 |     # Stream 2
 48 |     generated_code = generate_code(augmented_query)
 49 |     code_embedding = get_embedding(generated_code)
 50 |     stream2_func_results = sorted(range(len(function_embeddings)), 
 51 |                                   key=lambda i: cosine_sim(code_embedding, function_embeddings[i]), 
 52 |                                   reverse=True)[:3]
 53 |     stream2_class_results = sorted(range(len(class_embeddings)), 
 54 |                                    key=lambda i: cosine_sim(code_embedding, class_embeddings[i]), 
 55 |                                    reverse=True)[:3]
 56 |     
 57 |     # Stream 3 (simplified)
 58 |     component_embeddings = [get_embedding(generated_code)]
 59 |     stream3_results = [max(range(len(function_embeddings)), 
 60 |                            key=lambda i: cosine_sim(comp_emb, function_embeddings[i])) 
 61 |                        for comp_emb in component_embeddings]
 62 |     
 63 |     final_set = set(stream1_results + stream2_func_results + [i + len(functions) for i in stream2_class_results] + stream3_results)
 64 |     return list(final_set)
 65 | 
 66 | def direct_embedding_search(query, functions, classes):
 67 |     query_embedding = get_embedding(query)
 68 |     all_code = functions + classes
 69 |     all_embeddings = [get_embedding(code) for code in all_code]
 70 |     
 71 |     similarities = [cosine_sim(query_embedding, emb) for emb in all_embeddings]
 72 |     top_5 = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:5]
 73 |     return top_5
 74 | 
 75 | # Complex, real-life code snippets
 76 | functions = [
 77 |     """
 78 |     def quick_sort(arr):
 79 |         if len(arr) <= 1:
 80 |             return arr
 81 |         pivot = arr[len(arr) // 2]
 82 |         left = [x for x in arr if x < pivot]
 83 |         middle = [x for x in arr if x == pivot]
 84 |         right = [x for x in arr if x > pivot]
 85 |         return quick_sort(left) + middle + quick_sort(right)
 86 |     """,
 87 |     """
 88 |     def fibonacci(n, memo={}):
 89 |         if n in memo:
 90 |             return memo[n]
 91 |         if n <= 2:
 92 |             return 1
 93 |         memo[n] = fibonacci(n-1, memo) + fibonacci(n-2, memo)
 94 |         return memo[n]
 95 |     """,
 96 |     """
 97 |     import pandas as pd
 98 |     import matplotlib.pyplot as plt
 99 |     
100 |     def analyze_stock_data(file_path):
101 |         df = pd.read_csv(file_path)
102 |         df['Date'] = pd.to_datetime(df['Date'])
103 |         df.set_index('Date', inplace=True)
104 |         
105 |         # Calculate moving averages
106 |         df['MA50'] = df['Close'].rolling(window=50).mean()
107 |         df['MA200'] = df['Close'].rolling(window=200).mean()
108 |         
109 |         # Plot the data
110 |         plt.figure(figsize=(12,6))
111 |         plt.plot(df.index, df['Close'], label='Close Price')
112 |         plt.plot(df.index, df['MA50'], label='50-day MA')
113 |         plt.plot(df.index, df['MA200'], label='200-day MA')
114 |         plt.title('Stock Price Analysis')
115 |         plt.xlabel('Date')
116 |         plt.ylabel('Price')
117 |         plt.legend()
118 |         plt.show()
119 |         
120 |         return df
121 |     """,
122 |     """
123 |     from sklearn.model_selection import train_test_split
124 |     from sklearn.ensemble import RandomForestClassifier
125 |     from sklearn.metrics import accuracy_score, classification_report
126 |     
127 |     def train_random_forest(X, y):
128 |         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
129 |         
130 |         rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
131 |         rf_classifier.fit(X_train, y_train)
132 |         
133 |         y_pred = rf_classifier.predict(X_test)
134 |         accuracy = accuracy_score(y_test, y_pred)
135 |         report = classification_report(y_test, y_pred)
136 |         
137 |         return rf_classifier, accuracy, report
138 |     """,
139 |     """
140 |     import requests
141 |     from bs4 import BeautifulSoup
142 |     
143 |     def scrape_news_headlines(url):
144 |         response = requests.get(url)
145 |         soup = BeautifulSoup(response.text, 'html.parser')
146 |         headlines = soup.find_all('h2', class_='headline')
147 |         return [headline.text.strip() for headline in headlines]
148 |     """
149 | ]
150 | 
151 | classes = [
152 |     """
153 |     class BinarySearchTree:
154 |         def __init__(self, value):
155 |             self.value = value
156 |             self.left = None
157 |             self.right = None
158 | 
159 |         def insert(self, value):
160 |             if value < self.value:
161 |                 if self.left is None:
162 |                     self.left = BinarySearchTree(value)
163 |                 else:
164 |                     self.left.insert(value)
165 |             else:
166 |                 if self.right is None:
167 |                     self.right = BinarySearchTree(value)
168 |                 else:
169 |                     self.right.insert(value)
170 | 
171 |         def search(self, value):
172 |             if value == self.value:
173 |                 return True
174 |             elif value < self.value and self.left:
175 |                 return self.left.search(value)
176 |             elif value > self.value and self.right:
177 |                 return self.right.search(value)
178 |             return False
179 |     """,
180 |     """
181 |     import threading
182 |     
183 |     class ThreadSafeCounter:
184 |         def __init__(self):
185 |             self._value = 0
186 |             self._lock = threading.Lock()
187 | 
188 |         def increment(self):
189 |             with self._lock:
190 |                 self._value += 1
191 | 
192 |         def decrement(self):
193 |             with self._lock:
194 |                 self._value -= 1
195 | 
196 |         @property
197 |         def value(self):
198 |             with self._lock:
199 |                 return self._value
200 |     """
201 | ]
202 | 
203 | # Evaluation
204 | queries = [
205 |     "implement quicksort algorithm",
206 |     "create a function for fibonacci sequence with memoization",
207 |     "analyze stock market data using pandas and matplotlib",
208 |     "train a random forest classifier",
209 |     "web scraping to get news headlines",
210 |     "implement a binary search tree",
211 |     "create a thread-safe counter class"
212 | ]
213 | 
214 | # Function to get snippet names
215 | def get_snippet_names(indices, functions, classes):
216 |     all_snippets = functions + classes
217 |     return [all_snippets[i].split('\n')[1].strip() for i in indices]
218 | 
219 | # CSV generation
220 | with open('code_search_comparison.csv', 'w', newline='') as file:
221 |     writer = csv.writer(file)
222 |     writer.writerow(["Query", "Multi-Stream Architecture Results", "Direct Embedding Results"])
223 | 
224 |     for query in queries:
225 |         multi_stream_results = multi_stream_code_search(query, functions, classes)
226 |         direct_results = direct_embedding_search(query, functions, classes)
227 | 
228 |         multi_stream_names = get_snippet_names(multi_stream_results, functions, classes)
229 |         direct_names = get_snippet_names(direct_results, functions, classes)
230 | 
231 |         writer.writerow([query, ", ".join(multi_stream_names[:2]), ", ".join(direct_names[:2])])
232 | 
233 | print("CSV file 'code_search_comparison.csv' has been created with the comparison results.")
234 | 
235 | # Print results
236 | print("\nMulti-Stream Architecture Results:")
237 | for query in queries:
238 |     results = multi_stream_code_search(query, functions, classes)
239 |     print(f"\nQuery: {query}")
240 |     print(f"Relevant snippet indices: {results}")
241 |     print(f"Snippet names: {get_snippet_names(results, functions, classes)}")
242 | 
243 | print("\nDirect Embedding Approach Results:")
244 | for query in queries:
245 |     results = direct_embedding_search(query, functions, classes)
246 |     print(f"\nQuery: {query}")
247 |     print(f"Relevant snippet indices: {results}")
248 |     print(f"Snippet names: {get_snippet_names(results, functions, classes)}")
249 | 


--------------------------------------------------------------------------------
/multi-stream-processor/requirements.txt:
--------------------------------------------------------------------------------
 1 | annotated-types==0.7.0
 2 | anyio==4.4.0
 3 | black==24.8.0
 4 | certifi==2024.7.4
 5 | click==8.1.7
 6 | colorama==0.4.6
 7 | distro==1.9.0
 8 | h11==0.14.0
 9 | httpcore==1.0.5
10 | httpx==0.27.0
11 | idna==3.7
12 | jiter==0.5.0
13 | joblib==1.4.2
14 | mypy-extensions==1.0.0
15 | numpy==2.1.0
16 | openai==1.42.0
17 | packaging==24.1
18 | pathspec==0.12.1
19 | platformdirs==4.2.2
20 | pydantic==2.8.2
21 | pydantic_core==2.20.1
22 | python-dotenv==1.0.1
23 | scikit-learn==1.5.1
24 | scipy==1.14.1
25 | sniffio==1.3.1
26 | tenacity==9.0.0
27 | threadpoolctl==3.5.0
28 | tqdm==4.66.5
29 | typing_extensions==4.12.2
30 | 


--------------------------------------------------------------------------------
/nexusflow/readme.md:
--------------------------------------------------------------------------------
  1 | # NexusFlow: AI Pipeline Generator
  2 | 
  3 | ## Overview
  4 | 
  5 | NexusFlow is an intelligent assistant designed to help users create AI pipelines using natural language descriptions. It leverages the power of large language models (LLMs) to interpret user requirements, plan tasks, and generate a complete workflow for AI-driven data processing and analysis.
  6 | 
  7 | ## Architecture Diagram
  8 | ```mermaid
  9 | graph TD
 10 |     A[User] -->|Input| B[Streamlit Interface]
 11 |     B -->|User Query| C[NexusFlow]
 12 |     C -->|Extract Requirements| D[RequirementAgent]
 13 |     D -->|User Goal| E[PlannerAgent]
 14 |     E -->|Task List| F[TaskAgent]
 15 |     F -->|Nodes| G[ConnectionAgent]
 16 |     G -->|Connected Flow| H[ReviewerAgent]
 17 |     H -->|Review| C
 18 |     C -->|Generated Flow| B
 19 |     B -->|Display Results| A
 20 | 
 21 |     subgraph "OpenAI API"
 22 |         I[GPT-4o]
 23 |         J[GPT-4o-mini]
 24 |     end
 25 | 
 26 |     D -.->|API Call| I
 27 |     E -.->|API Call| I
 28 |     F -.->|API Call| J
 29 |     G -.->|API Call| J
 30 |     H -.->|API Call| I
 31 | 
 32 |     style A fill:#f9f,stroke:#333,stroke-width:2px
 33 |     style B fill:#bbf,stroke:#333,stroke-width:2px
 34 |     style C fill:#bfb,stroke:#333,stroke-width:2px
 35 |     style D fill:#fbb,stroke:#333,stroke-width:2px
 36 |     style E fill:#fbb,stroke:#333,stroke-width:2px
 37 |     style F fill:#fbb,stroke:#333,stroke-width:2px
 38 |     style G fill:#fbb,stroke:#333,stroke-width:2px
 39 |     style H fill:#fbb,stroke:#333,stroke-width:2px
 40 |     style I fill:#ff9,stroke:#333,stroke-width:2px
 41 |     style J fill:#ff9,stroke:#333,stroke-width:2px
 42 | ```
 43 | ## Features
 44 | 
 45 | - **Natural Language Interface**: Describe your AI pipeline needs in plain English.
 46 | - **Automated Pipeline Generation**: Converts user requirements into a structured flow.
 47 | - **Modular Architecture**: Utilizes specialized agents for each step of the pipeline creation process.
 48 | - **Extensible Design**: Easy to add new capabilities or modify existing ones.
 49 | - **Interactive Web Interface**: Built with Streamlit for easy use and visualization.
 50 | 
 51 | ## Use Cases
 52 | 
 53 | NexusFlow can be used in various scenarios, including:
 54 | 
 55 | 1. **Rapid Prototyping**: Quickly generate initial AI pipeline designs for proof-of-concept projects.
 56 | 2. **Educational Tool**: Help students and beginners understand AI pipeline construction.
 57 | 3. **Workflow Optimization**: Suggest improvements or alternatives to existing AI pipelines.
 58 | 4. **Cross-domain Application**: Assist domain experts in creating AI pipelines without deep technical knowledge.
 59 | 
 60 | ## Components
 61 | 
 62 | NexusFlow consists of several key components:
 63 | 
 64 | 1. **RequirementAgent**: Interprets user input and extracts key requirements.
 65 | 2. **PlannerAgent**: Creates a detailed plan based on the extracted requirements.
 66 | 3. **TaskAgent**: Executes individual tasks in the plan, generating pipeline nodes.
 67 | 4. **ConnectionAgent**: Connects the nodes to form a coherent flow.
 68 | 5. **ReviewerAgent**: Evaluates the generated pipeline against the original requirements.
 69 | 6. **NexusFlow**: The main class that orchestrates the entire process.
 70 | 
 71 | ## Installation
 72 | 
 73 | 1. Clone the repository:
 74 |    ```
 75 |    git clone https://github.com/yourusername/nexusflow.git
 76 |    cd nexusflow
 77 |    ```
 78 | 
 79 | 2. Install the required dependencies:
 80 |    ```
 81 |    pip install -r requirements.txt
 82 |    ```
 83 | 
 84 | 3. Set up your OpenAI API key in a `.env` file:
 85 |    ```
 86 |    OPENAI_API_KEY=your_api_key_here
 87 |    ```
 88 | 
 89 | ## Usage
 90 | 
 91 | Run the Streamlit app:
 92 | 
 93 | ```
 94 | streamlit run nexusflow_app.py
 95 | ```
 96 | 
 97 | Then, follow these steps:
 98 | 
 99 | 1. Enter a description of your desired AI pipeline in the text area.
100 | 2. Click the "Generate Flow" button.
101 | 3. Wait for NexusFlow to process your request (this may take a few moments).
102 | 4. Review the generated pipeline, including the interpreted user goal, the flow structure, and the review feedback.
103 | 5. Download the generated JSON file for use with your preferred pipeline execution environment or for further modification.
104 | 
105 | ## Limitations and Future Work
106 | 
107 | - Currently relies on the OpenAI API, which may have usage costs.
108 | - The generated pipelines may require further refinement by domain experts.
109 | - Future versions could include more specialized agents for specific AI tasks or domains.
110 | - Integration with actual execution environments for the generated pipelines is a potential next step.
111 | 
112 | ## Contributing
113 | 
114 | Contributions to NexusFlow are welcome! Please feel free to submit pull requests, create issues, or suggest new features.
115 | 
116 | ## License
117 | 
118 | This project is licensed under the MIT License - see the LICENSE file for details.
119 | 
120 | 


--------------------------------------------------------------------------------
/planner/Readme.md:
--------------------------------------------------------------------------------
 1 | ## Architecture
 2 | 
 3 | ```mermaid
 4 | flowchart TD
 5 |     Start([Start]) --> Input[/Input Project Task/]
 6 |     Input --> Orchestrator[Planning Orchestrator Initialize]
 7 |     
 8 |     subgraph Orchestration
 9 |         Orchestrator --> SD[Subtask Decomposition Planning]
10 |         
11 |         subgraph "Subtask Planning"
12 |             SD --> |Call LLM| SD1[Generate Subtasks]
13 |             SD1 --> SD2[Calculate Effort]
14 |             SD2 --> SD3[Create Schedule]
15 |         end
16 |         
17 |         SD3 --> KF[Kalman Filter Planning]
18 |         
19 |         subgraph "Kalman Planning"
20 |             KF --> |Initial Plan| KF1[State Estimation]
21 |             KF1 --> KF2{Has Previous State?}
22 |             KF2 --> |Yes| KF3[Update State & Uncertainty]
23 |             KF2 --> |No| KF4[Initialize State]
24 |             KF3 --> KF5[Calculate Confidence]
25 |             KF4 --> KF5
26 |         end
27 |         
28 |         KF5 --> MP[Model Predictive Planning]
29 |         
30 |         subgraph "Predictive Planning"
31 |             MP --> MP1[Analyze Current State]
32 |             MP1 --> MP2[Generate Predictions]
33 |             MP2 --> MP3[Identify Risks]
34 |         end
35 |     end
36 |     
37 |     MP3 --> Result[Combine Results]
38 |     Result --> Output[/Final Comprehensive Plan/]
39 |     Output --> Save[Save to JSON]
40 |     Save --> End([End])
41 |     
42 |     %% Error handling
43 |     SD1 --x E1[Error Handler]
44 |     KF1 --x E1
45 |     MP1 --x E1
46 |     E1 --> |Retry| Orchestrator
47 |     
48 |     %% LLM calls with retry
49 |     subgraph "LLM Interaction"
50 |         direction TB
51 |         L1[Prepare Prompt] --> L2{Call LLM}
52 |         L2 --> |Success| L3[Parse JSON]
53 |         L2 --> |Failure| L4[Retry Logic]
54 |         L4 --> |Max Retries| E1
55 |         L4 --> |Retry| L2
56 |     end
57 | ```
58 | 


--------------------------------------------------------------------------------
/planner/heterogenous-planner.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | from typing import List, Dict, Any, Optional, Tuple
  4 | from dataclasses import dataclass
  5 | from enum import Enum
  6 | import numpy as np
  7 | from openai import AsyncOpenAI
  8 | import logging
  9 | import json
 10 | from datetime import datetime, timedelta
 11 | import asyncio
 12 | from tenacity import retry, stop_after_attempt, wait_exponential
 13 | 
 14 | # Configure logging
 15 | logging.basicConfig(
 16 |     level=logging.INFO,
 17 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 18 | )
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | class PlanningStrategy(Enum):
 22 |     PSEUDO_KALMAN = "pseudo_kalman"
 23 |     SUBTASK_DECOMPOSITION = "subtask_decomposition"
 24 |     MODEL_PREDICTIVE = "model_predictive"
 25 |     EXPLICIT_CRITERIA = "explicit_criteria"
 26 | 
 27 | @dataclass
 28 | class PlanningConfig:
 29 |     max_iterations: int = 5
 30 |     confidence_threshold: float = 0.85
 31 |     temperature: float = 0.7
 32 |     model: str = "gpt-4o"
 33 |     default_criteria_weights: Dict[str, float] = None
 34 |     
 35 |     def __post_init__(self):
 36 |         if self.default_criteria_weights is None:
 37 |             self.default_criteria_weights = {
 38 |                 "feasibility": 0.3,
 39 |                 "efficiency": 0.2,
 40 |                 "reliability": 0.2,
 41 |                 "maintainability": 0.15,
 42 |                 "scalability": 0.15
 43 |             }
 44 | 
 45 | class PromptTemplates:
 46 |     SYSTEM_PROMPT = "You are an AI system specialized in project planning and task decomposition. Always provide responses in valid JSON format."
 47 |     
 48 |     SUBTASK_DECOMPOSITION = """Break down the following task into clear, manageable subtasks.
 49 | Task: {task}
 50 | 
 51 | Provide your response in the following JSON format:
 52 | {{
 53 |     "subtasks": [
 54 |         {{
 55 |             "id": "subtask-1",
 56 |             "objective": "Description of the first subtask",
 57 |             "expected_output": "Expected deliverable",
 58 |             "dependencies": [],
 59 |             "effort_hours": 4
 60 |         }}
 61 |     ]
 62 | }}
 63 | """
 64 |     
 65 |     KALMAN_REFINEMENT = """Review and refine the following plan with new information.
 66 | Current Plan: {current_plan}
 67 | New Information: {new_data}
 68 | 
 69 | Provide your response in JSON format with confidence scores for each component.
 70 | """
 71 |     
 72 |     MODEL_PREDICTIVE = """Analyze the path from current state to goal state.
 73 | Current State: {current_state}
 74 | Goal State: {goal_state}
 75 | Prediction Horizon: {horizon} steps
 76 | 
 77 | Provide your response in the following JSON format:
 78 | {{
 79 |     "next_steps": [],
 80 |     "estimated_completion": "YYYY-MM-DD",
 81 |     "risk_factors": []
 82 | }}
 83 | """
 84 |     
 85 |     CRITERIA_EVALUATION = """Evaluate the following plan against specified criteria.
 86 | Plan: {plan}
 87 | Criteria: {criteria}
 88 | 
 89 | Provide your evaluation in JSON format with scores and justification.
 90 | """
 91 | 
 92 | class BasePlanner:
 93 |     def __init__(self, config: PlanningConfig):
 94 |         self.config = config
 95 |         self.client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 96 |     
 97 |     @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 98 |     async def _call_llm(self, prompt: str, temperature: Optional[float] = None) -> str:
 99 |         try:
100 |             response = await self.client.chat.completions.create(
101 |                 model=self.config.model,
102 |                 messages=[
103 |                     {"role": "system", "content": PromptTemplates.SYSTEM_PROMPT},
104 |                     {"role": "user", "content": prompt}
105 |                 ],
106 |                 temperature=temperature or self.config.temperature
107 |             )
108 |             return response.choices[0].message.content
109 |         except Exception as e:
110 |             logger.error(f"Error calling LLM: {str(e)}")
111 |             raise
112 | 
113 |     def _parse_json_response(self, response: str) -> Dict:
114 |         try:
115 |             # Clean the response string
116 |             response = response.strip()
117 |             # Handle potential markdown code blocks
118 |             if "```json" in response:
119 |                 response = response.split("```json")[1].split("```")[0]
120 |             elif "```" in response:
121 |                 response = response.split("```")[1].split("```")[0]
122 |             response = response.strip()
123 |             
124 |             return json.loads(response)
125 |         except json.JSONDecodeError as e:
126 |             logger.error(f"Failed to parse JSON response: {e}")
127 |             logger.debug(f"Response content: {response}")
128 |             return {"error": "Failed to parse response", "raw_response": response}
129 | 
130 | class PseudoKalmanPlanner(BasePlanner):
131 |     def __init__(self, config: PlanningConfig):
132 |         super().__init__(config)
133 |         self.state_estimate = None
134 |         self.uncertainty = 1.0
135 |         self.process_noise = 0.1
136 |         self.measurement_noise = 0.2
137 | 
138 |     async def plan(self, context: Dict[str, Any]) -> Dict[str, Any]:
139 |         new_data = context.get('initial_plan', {})
140 |         current_plan = self.state_estimate or {"steps": [], "confidence": 0}
141 |         
142 |         prompt = PromptTemplates.KALMAN_REFINEMENT.format(
143 |             current_plan=json.dumps(current_plan),
144 |             new_data=json.dumps(new_data)
145 |         )
146 |         
147 |         response = await self._call_llm(prompt, temperature=0.3)
148 |         new_estimate = self._parse_json_response(response)
149 |         
150 |         if self.state_estimate:
151 |             kalman_gain = self.uncertainty / (self.uncertainty + self.measurement_noise)
152 |             self.uncertainty = (1 - kalman_gain) * self.uncertainty + self.process_noise
153 |         else:
154 |             self.state_estimate = new_estimate
155 |             
156 |         confidence = 1 - self.uncertainty
157 |         
158 |         return {
159 |             "plan": new_estimate,
160 |             "confidence": confidence,
161 |             "uncertainty": self.uncertainty
162 |         }
163 | 
164 | class SubtaskDecompositionPlanner(BasePlanner):
165 |     async def plan(self, task: str) -> Dict[str, Any]:
166 |         prompt = PromptTemplates.SUBTASK_DECOMPOSITION.format(task=task)
167 |         response = await self._call_llm(prompt)
168 |         result = self._parse_json_response(response)
169 |         
170 |         if "error" in result:
171 |             logger.error(f"Error in subtask decomposition: {result['error']}")
172 |             return {
173 |                 "subtasks": [],
174 |                 "detailed_plans": {},
175 |                 "total_effort": 0,
176 |                 "schedule": []
177 |             }
178 |         
179 |         subtasks = result.get("subtasks", [])
180 |         detailed_plans = {
181 |             subtask["id"]: subtask
182 |             for subtask in subtasks
183 |         }
184 |         
185 |         total_effort = sum(
186 |             subtask.get("effort_hours", 0)
187 |             for subtask in subtasks
188 |         )
189 |         
190 |         return {
191 |             "subtasks": subtasks,
192 |             "detailed_plans": detailed_plans,
193 |             "total_effort": total_effort,
194 |             "schedule": self._create_schedule(detailed_plans)
195 |         }
196 | 
197 |     def _create_schedule(self, plans: Dict[str, Dict]) -> List[Dict[str, Any]]:
198 |         schedule = []
199 |         start_date = datetime.now()
200 |         
201 |         for task_id, plan in plans.items():
202 |             effort = plan.get("effort_hours", 8)
203 |             schedule.append({
204 |                 "task_id": task_id,
205 |                 "start_date": start_date.isoformat(),
206 |                 "end_date": (start_date + timedelta(hours=effort)).isoformat(),
207 |                 "effort": effort
208 |             })
209 |             start_date += timedelta(hours=effort)
210 |             
211 |         return schedule
212 | 
213 | class ModelPredictivePlanner(BasePlanner):
214 |     def __init__(self, config: PlanningConfig):
215 |         super().__init__(config)
216 |         self.prediction_horizon = 3
217 | 
218 |     async def plan(self, current_state: Dict[str, Any], goal_state: Dict[str, Any]) -> Dict[str, Any]:
219 |         prompt = PromptTemplates.MODEL_PREDICTIVE.format(
220 |             current_state=json.dumps(current_state),
221 |             goal_state=json.dumps(goal_state),
222 |             horizon=self.prediction_horizon
223 |         )
224 |         
225 |         response = await self._call_llm(prompt)
226 |         predictions = self._parse_json_response(response)
227 |         
228 |         return {
229 |             "predictions": predictions,
230 |             "next_steps": predictions.get("next_steps", []),
231 |             "estimated_completion": predictions.get("estimated_completion"),
232 |             "risk_factors": predictions.get("risk_factors", [])
233 |         }
234 | 
235 | class PlanningOrchestrator:
236 |     def __init__(self):
237 |         self.config = PlanningConfig()
238 |         self.planners = {
239 |             PlanningStrategy.PSEUDO_KALMAN: PseudoKalmanPlanner(self.config),
240 |             PlanningStrategy.SUBTASK_DECOMPOSITION: SubtaskDecompositionPlanner(self.config),
241 |             PlanningStrategy.MODEL_PREDICTIVE: ModelPredictivePlanner(self.config),
242 |         }
243 | 
244 |     async def create_comprehensive_plan(self, task: str) -> Dict[str, Any]:
245 |         try:
246 |             logger.info("Starting decomposition planning...")
247 |             subtask_planner = self.planners[PlanningStrategy.SUBTASK_DECOMPOSITION]
248 |             decomposition = await subtask_planner.plan(task)
249 |             
250 |             logger.info("Refining plan with Kalman filter...")
251 |             kalman_planner = self.planners[PlanningStrategy.PSEUDO_KALMAN]
252 |             refined_plan = await kalman_planner.plan({"task": task, "initial_plan": decomposition})
253 |             
254 |             logger.info("Generating predictions...")
255 |             predictive_planner = self.planners[PlanningStrategy.MODEL_PREDICTIVE]
256 |             predictions = await predictive_planner.plan(
257 |                 current_state={"phase": "planning", "progress": 0},
258 |                 goal_state={"phase": "completed", "progress": 100}
259 |             )
260 |             
261 |             return {
262 |                 "decomposition": decomposition,
263 |                 "refined_plan": refined_plan,
264 |                 "predictions": predictions,
265 |                 "total_effort": decomposition["total_effort"]
266 |             }
267 |             
268 |         except Exception as e:
269 |             logger.error(f"Error in comprehensive planning: {str(e)}")
270 |             raise
271 | 
272 | async def main():
273 |     task = """
274 |     Build a new e-commerce feature with:
275 |     1. Product recommendation system
276 |     2. Shopping cart optimization
277 |     3. Real-time inventory tracking
278 |     4. Performance monitoring
279 |     """
280 |     
281 |     orchestrator = PlanningOrchestrator()
282 |     
283 |     try:
284 |         logger.info("Starting planning process...")
285 |         result = await orchestrator.create_comprehensive_plan(task)
286 |         print("Testing")
287 |         logger.info("\n=== Planning Results ===")
288 |         
289 |         logger.info("\n1. Task Decomposition:")
290 |         for subtask in result["decomposition"]["subtasks"]:
291 |             logger.info(f"- {subtask['objective']} (Effort: {subtask.get('effort_hours', 'N/A')}h)")
292 |         
293 |         logger.info(f"\n2. Refined Plan Confidence: {result['refined_plan']['confidence']:.2%}")
294 |         
295 |         logger.info("\n3. Predictions:")
296 |         for step in result["predictions"]["next_steps"]:
297 |             logger.info(f"- {step}")
298 |             
299 |         logger.info(f"\nTotal Estimated Effort: {result['total_effort']} hours")
300 |         
301 |         # Save results
302 |         with open("planning_results.json", "w") as f:
303 |             json.dump(result, f, indent=2)
304 |             
305 |         logger.info("\nResults saved to 'planning_results.json'")
306 |         
307 |     except Exception as e:
308 |         logger.error(f"Error in planning process: {str(e)}")
309 |         raise
310 | 
311 | if __name__ == "__main__":
312 |     asyncio.run(main())
313 | 


--------------------------------------------------------------------------------
/python-code-execution-agent/Readme.md:
--------------------------------------------------------------------------------
  1 | # AgentPro
  2 | 
  3 | AgentPro is an intelligent Python code generator and execution system that leverages OpenAI's GPT models to create and run Python scripts based on user prompts.
  4 | 
  5 | ## Table of Contents
  6 | 1. [Features](#features)
  7 | 2. [Requirements](#requirements)
  8 | 3. [Installation](#installation)
  9 | 4. [Usage](#usage)
 10 | 5. [How It Works](#how-it-works)
 11 | 6. [Configuration](#configuration)
 12 | 7. [Error Handling](#error-handling)
 13 | 
 14 | 
 15 | ## Features
 16 | 
 17 | - Generate Python code from natural language prompts
 18 | - Automatically install required libraries
 19 | - Execute generated code in a safe environment
 20 | - Handle errors and retry code generation
 21 | - Logging for debugging and monitoring
 22 | 
 23 | ## Architecture Diagram
 24 | 
 25 | ```mermaid
 26 | graph TD
 27 |     A[User] -->|Provides Prompt| B[AgentPro]
 28 |     B -->|Sends API Request| C[OpenAI API]
 29 |     C -->|Returns Generated Code| B
 30 |     B -->|Analyzes Code| D[Library Detector]
 31 |     D -->|Identifies Required Libraries| E[Library Installer]
 32 |     E -->|Installs Libraries| F[Python Environment]
 33 |     B -->|Executes Code| G[Code Executor]
 34 |     G -->|Runs in| F
 35 |     G -->|Captures Output/Errors| B
 36 |     B -->|Displays Results| A
 37 |     H[.env File] -->|Provides API Key| B
 38 |     
 39 |     subgraph AgentPro System
 40 |     B
 41 |     D
 42 |     E
 43 |     G
 44 |     end
 45 |     
 46 |     subgraph External Services
 47 |     C
 48 |     F
 49 |     end
 50 |     
 51 |     style A fill:#f9f,stroke:#333,stroke-width:2px
 52 |     style B fill:#bbf,stroke:#333,stroke-width:2px
 53 |     style C fill:#bfb,stroke:#333,stroke-width:2px
 54 |     style F fill:#bfb,stroke:#333,stroke-width:2px
 55 |     style H fill:#ff9,stroke:#333,stroke-width:2px
 56 | ```
 57 | 
 58 | ## Requirements
 59 | 
 60 | - Python 3.6+
 61 | - OpenAI API key
 62 | 
 63 | ## Installation
 64 | 
 65 | 1. Create a virtual environment:
 66 |    ```
 67 |    python -m venv venv
 68 |    source venv/bin/activate  # On Windows, use `venv\Scripts\activate`
 69 |    ```
 70 | 
 71 | 3. Install the required packages:
 72 |    ```
 73 |    pip install -r requirements.txt
 74 |    ```
 75 | 
 76 | 4. Set up your OpenAI API key:
 77 |    Create a `.env` file in the project root and add your API key:
 78 |    ```
 79 |    OPENAI_API_KEY=your_api_key_here
 80 |    ```
 81 | 
 82 | ## Usage
 83 | 
 84 | Run the main script:
 85 | 
 86 | ```
 87 | python app.py
 88 | ```
 89 | 
 90 | Follow the prompts to enter your code generation requests. Type 'exit' to quit the program.
 91 | 
 92 | ## How It Works
 93 | 
 94 | 1. The user provides a prompt describing the desired Python code.
 95 | 2. AgentPro uses OpenAI's GPT model to generate Python code based on the prompt.
 96 | 3. The system analyzes the generated code to identify required libraries.
 97 | 4. Required libraries are automatically installed using pip.
 98 | 5. The code is executed in a temporary file within the current working directory.
 99 | 6. Output and errors are captured and displayed to the user.
100 | 7. If errors occur, the system can retry code generation with error information.
101 | 
102 | ## Configuration
103 | 
104 | - Modify the `OpenAI` client initialization in the `AgentPro` class to change the API key source or other settings.
105 | - Adjust the `max_retries` parameter in the `run` method to change the number of retry attempts for code generation.
106 | 
107 | ## Error Handling
108 | 
109 | AgentPro implements robust error handling:
110 | - Retries API calls with exponential backoff
111 | - Catches and logs exceptions during code generation and execution
112 | - Provides informative error messages to the user
113 | 
114 | 


--------------------------------------------------------------------------------
/python-code-execution-agent/app,py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | from openai import OpenAI
  4 | from dotenv import load_dotenv
  5 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  6 | import subprocess
  7 | import tempfile
  8 | import re
  9 | import importlib
 10 | import sys
 11 | import logging
 12 | 
 13 | # Load environment variables
 14 | load_dotenv()
 15 | 
 16 | # Set up logging
 17 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 18 | 
 19 | class AgentPro:
 20 |     def __init__(self):
 21 |         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 22 |         if not self.client.api_key:
 23 |             raise ValueError("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
 24 | 
 25 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 26 |     def completion_with_backoff(self, **kwargs):
 27 |         return self.client.chat.completions.create(**kwargs)
 28 | 
 29 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 30 |     def embedding_with_backoff(self, **kwargs):
 31 |         return self.client.embeddings.create(**kwargs)
 32 | 
 33 |     def generate_code(self, prompt, error_info=None):
 34 |         try:
 35 |             messages = [
 36 |                 {"role": "system", "content": "You are a Python code generator. Respond only with executable Python code, no explanations or comments except for required pip installations at the top."},
 37 |                 {"role": "user", "content": f"Generate Python code to {prompt}. If you need to use any external libraries, include a comment at the top of the code listing the required pip installations."}
 38 |             ]
 39 |             
 40 |             if error_info:
 41 |                 messages.append({"role": "user", "content": f"The previous code generated an error. Please fix the following error and regenerate the code: {error_info}"})
 42 | 
 43 |             response = self.completion_with_backoff(
 44 |                 model="gpt-4o",
 45 |                 messages=messages,
 46 |                 max_tokens=4000,
 47 |                 temperature=0.7,
 48 |                 top_p=1,
 49 |                 frequency_penalty=0,
 50 |                 presence_penalty=0
 51 |             )
 52 |             code = response.choices[0].message.content.strip()
 53 |             code = re.sub(r'^```python\n|^```\n|```$', '', code, flags=re.MULTILINE)
 54 |             code_lines = code.split('\n')
 55 |             while code_lines and not (code_lines[0].startswith('import') or code_lines[0].startswith('from') or code_lines[0].startswith('#')):
 56 |                 code_lines.pop(0)
 57 |             return '\n'.join(code_lines)
 58 |         except Exception as e:
 59 |             logging.error(f"Error generating code: {str(e)}")
 60 |             raise
 61 | 
 62 |     def get_required_libraries(self, code):
 63 |         # Improved regex pattern to handle multiple libraries in a single line
 64 |         libraries = re.findall(r'#\s*(?:Required pip installations:|pip install)\s*((?:[\w-]+(?:\s*,\s*)?)+)', code)
 65 |         if libraries:
 66 |             # Flatten the list and split by commas if present
 67 |             libraries = [lib.strip() for sublist in libraries for lib in sublist.split(',')]
 68 |         
 69 |         if not libraries:
 70 |             # If regex fails, use LLM to extract libraries
 71 |             response = self.completion_with_backoff(
 72 |                 model="gpt-4o",
 73 |                 messages=[
 74 |                     {"role": "system", "content": "You are a helpful assistant that identifies required Python libraries from code."},
 75 |                     {"role": "user", "content": f"Please list all the external libraries that need to be installed for this code to run. Respond with only the library names, separated by commas:\n\n{code}"}
 76 |                 ],
 77 |                 max_tokens=100,
 78 |                 temperature=0.3
 79 |             )
 80 |             libraries = [lib.strip() for lib in response.choices[0].message.content.split(',')]
 81 |         logging.info("Libraries",libraries)
 82 |         return libraries
 83 | 
 84 |     def install_libraries(self, code):
 85 |         libraries = self.get_required_libraries(code)
 86 |         if libraries:
 87 |             logging.info("Installing required libraries...")
 88 |             for lib in libraries:
 89 |                 try:
 90 |                     importlib.import_module(lib.replace('-', '_'))
 91 |                     logging.info(f"{lib} is already installed.")
 92 |                 except ImportError:
 93 |                     logging.info(f"Installing {lib}...")
 94 |                     try:
 95 |                         subprocess.check_call([sys.executable, "-m", "pip", "install", lib])
 96 |                     except subprocess.CalledProcessError as e:
 97 |                         logging.error(f"Failed to install {lib}: {str(e)}")
 98 |                         raise
 99 |             logging.info("Libraries installed successfully.")
100 | 
101 |     def execute_code(self, code):
102 |         current_dir = os.getcwd()
103 |         with tempfile.NamedTemporaryFile(mode='w', suffix='.py', dir=current_dir, delete=False, encoding='utf-8') as temp_file:
104 |             temp_file.write(code)
105 |             temp_file_path = temp_file.name
106 | 
107 |         try:
108 |             if platform.system() == "Windows":
109 |                 activate_cmd = r"venv\Scripts\activate.bat"
110 |             else:
111 |                 activate_cmd = "source venv/bin/activate"
112 | 
113 |             run_script = f"python {os.path.basename(temp_file_path)}"
114 |             full_command = f"{activate_cmd} && {run_script}"
115 |             
116 |             if platform.system() == "Windows":
117 |                 result = subprocess.run(full_command, 
118 |                                         shell=True,
119 |                                         cwd=current_dir,
120 |                                         capture_output=True, 
121 |                                         text=True, 
122 |                                         timeout=30)
123 |             else:
124 |                 result = subprocess.run(['/bin/bash', '-c', full_command],
125 |                                         cwd=current_dir,
126 |                                         capture_output=True, 
127 |                                         text=True, 
128 |                                         timeout=30)
129 |             
130 |             output = result.stdout
131 |             error = result.stderr
132 |         except subprocess.TimeoutExpired:
133 |             output = ""
134 |             error = "Execution timed out after 30 seconds."
135 |         except Exception as e:
136 |             output = ""
137 |             error = f"An error occurred during execution: {str(e)}"
138 |         finally:
139 |             os.unlink(temp_file_path)
140 | 
141 |         return output, error
142 | 
143 |     def run(self, prompt, max_retries=3):
144 |         for attempt in range(max_retries):
145 |             try:
146 |                 logging.info(f"Generating code for: {prompt}")
147 |                 code = self.generate_code(prompt)
148 |                 logging.info("Generated code:\n%s", code)
149 | 
150 |                 self.install_libraries(code)
151 | 
152 |                 logging.info("Executing code...")
153 |                 output, error = self.execute_code(code)
154 | 
155 |                 if output:
156 |                     logging.info("Output:\n%s", output)
157 |                 if error:
158 |                     logging.error("Error:\n%s", error)
159 |                     if attempt < max_retries - 1:
160 |                         logging.info(f"Retrying... (Attempt {attempt + 2}/{max_retries})")
161 |                         continue
162 |                 
163 |                 return output, error
164 |             except Exception as e:
165 |                 logging.error(f"An error occurred during execution: {str(e)}")
166 |                 if attempt < max_retries - 1:
167 |                     logging.info(f"Retrying... (Attempt {attempt + 2}/{max_retries})")
168 |                     continue
169 |                 raise
170 |         
171 |         return None, f"Failed to generate valid code after {max_retries} attempts."
172 | 
173 | def main():
174 |     agent = AgentPro()
175 |     
176 |     print("Welcome to AgentPro!")
177 |     print("Enter your prompt below. Type 'exit' to quit.")
178 |     
179 |     while True:
180 |         prompt = input("\nEnter your prompt: ")
181 |         
182 |         if prompt.lower() == 'exit':
183 |             print("Thank you for using AgentPro. Goodbye!")
184 |             break
185 |         
186 |         output, error = agent.run(prompt)
187 |         
188 |         print("\nOutput:")
189 |         print(output)
190 |         
191 |         if error:
192 |             print("\nError:")
193 |             print(error)
194 | 
195 | if __name__ == "__main__":
196 |     main()
197 | 


--------------------------------------------------------------------------------
/query-optimizer/config.py:
--------------------------------------------------------------------------------
 1 | # ====================
 2 | # config.py
 3 | # ====================
 4 | import os
 5 | from dotenv import load_dotenv
 6 | 
 7 | load_dotenv()
 8 | 
 9 | class Config:
10 |     # OpenAI Configuration
11 |     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
12 |     
13 |     # Model Configuration
14 |     EMBEDDING_MODEL = "text-embedding-ada-003"  # ada-3
15 |     RESPONSE_MODEL = "gpt-4.1-nano"
16 |     JUDGE_MODEL = "gpt-4.1-mini"
17 |     
18 |     # Algorithm Parameters
19 |     TRAINING_SAMPLE_SIZE = 6000
20 |     TEST_SAMPLE_SIZE = 100
21 |     SIMILARITY_K = 10
22 |     NUM_TRIALS = 20
23 |     
24 |     # Response Parameters
25 |     MAX_TOKENS = 1000
26 |     TEMPERATURE = 0.7
27 |     JUDGE_TEMPERATURE = 0.1
28 |     
29 |     # Dataset Configuration
30 |     DATASET_NAME = "llm-blender/Unified-Feedback"
31 |     
32 |     # File Paths
33 |     DATA_DIR = "data"
34 |     RESULTS_DIR = "results"
35 |     
36 |     @classmethod
37 |     def validate(cls):
38 |         """Validate configuration"""
39 |         if not cls.OPENAI_API_KEY:
40 |             raise ValueError("OPENAI_API_KEY environment variable is required")
41 |         
42 |         # Create directories if they don't exist
43 |         os.makedirs(cls.DATA_DIR, exist_ok=True)
44 |         os.makedirs(cls.RESULTS_DIR, exist_ok=True)


--------------------------------------------------------------------------------
/query-optimizer/data_loader.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from datasets import load_dataset
 3 | from typing import Dict, List, Tuple
 4 | import pickle
 5 | import os
 6 | from config import Config
 7 | 
 8 | class DataLoader:
 9 |     def __init__(self):
10 |         self.config = Config()
11 |         self.dataset = None
12 |         self.training_data = None
13 |         self.test_data = None
14 |     
15 |     def load_dataset(self) -> None:
16 |         """Load the Unified Feedback Dataset from Hugging Face"""
17 |         print("Loading dataset from Hugging Face...")
18 |         try:
19 |             self.dataset = load_dataset(self.config.DATASET_NAME)
20 |             print(f"Dataset loaded successfully. Total samples: {len(self.dataset['train'])}")
21 |         except Exception as e:
22 |             print(f"Error loading dataset: {e}")
23 |             raise
24 |     
25 |     def sample_data(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
26 |         """Sample training and test data from the dataset"""
27 |         if self.dataset is None:
28 |             self.load_dataset()
29 |         
30 |         print(f"Sampling {self.config.TRAINING_SAMPLE_SIZE} training samples...")
31 |         print(f"Sampling {self.config.TEST_SAMPLE_SIZE} test samples...")
32 |         
33 |         # Convert to pandas DataFrame for easier manipulation
34 |         df_full = pd.DataFrame(self.dataset['train'])
35 |         
36 |         # Sample data
37 |         df_shuffled = df_full.sample(frac=1, random_state=42).reset_index(drop=True)
38 |         
39 |         self.training_data = df_shuffled.head(self.config.TRAINING_SAMPLE_SIZE)
40 |         self.test_data = df_shuffled.tail(self.config.TEST_SAMPLE_SIZE)
41 |         
42 |         print(f"Training data shape: {self.training_data.shape}")
43 |         print(f"Test data shape: {self.test_data.shape}")
44 |         
45 |         return self.training_data, self.test_data
46 |     
47 |     def save_data(self, training_data: pd.DataFrame, test_data: pd.DataFrame) -> None:
48 |         """Save processed data to files"""
49 |         training_path = os.path.join(self.config.DATA_DIR, "training_data.pkl")
50 |         test_path = os.path.join(self.config.DATA_DIR, "test_data.pkl")
51 |         
52 |         with open(training_path, 'wb') as f:
53 |             pickle.dump(training_data, f)
54 |         
55 |         with open(test_path, 'wb') as f:
56 |             pickle.dump(test_data, f)
57 |         
58 |         print(f"Data saved to {training_path} and {test_path}")
59 |     
60 |     def load_data(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
61 |         """Load processed data from files"""
62 |         training_path = os.path.join(self.config.DATA_DIR, "training_data.pkl")
63 |         test_path = os.path.join(self.config.DATA_DIR, "test_data.pkl")
64 |         
65 |         if os.path.exists(training_path) and os.path.exists(test_path):
66 |             with open(training_path, 'rb') as f:
67 |                 training_data = pickle.load(f)
68 |             
69 |             with open(test_path, 'rb') as f:
70 |                 test_data = pickle.load(f)
71 |             
72 |             print("Data loaded from saved files")
73 |             return training_data, test_data
74 |         else:
75 |             print("No saved data found. Loading fresh data...")
76 |             return self.sample_data()


--------------------------------------------------------------------------------
/query-optimizer/embedding_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from openai import OpenAI
 3 | from typing import List, Union
 4 | import time
 5 | import pickle
 6 | import os
 7 | from config import Config
 8 | from tqdm import tqdm
 9 | 
10 | class EmbeddingGenerator:
11 |     def __init__(self):
12 |         self.config = Config()
13 |         self.client = OpenAI(api_key=self.config.OPENAI_API_KEY)
14 |     
15 |     def get_embedding(self, text: str) -> List[float]:
16 |         """Generate embedding for a single text using OpenAI ada-3"""
17 |         try:
18 |             response = self.client.embeddings.create(
19 |                 model=self.config.EMBEDDING_MODEL,
20 |                 input=text
21 |             )
22 |             return response.data[0].embedding
23 |         except Exception as e:
24 |             print(f"Error generating embedding: {e}")
25 |             time.sleep(1)  # Rate limiting
26 |             return self.get_embedding(text)  # Retry
27 |     
28 |     def get_embeddings_batch(self, texts: List[str], batch_size: int = 100) -> List[List[float]]:
29 |         """Generate embeddings for a list of texts in batches"""
30 |         embeddings = []
31 |         
32 |         print(f"Generating embeddings for {len(texts)} texts...")
33 |         
34 |         for i in tqdm(range(0, len(texts), batch_size)):
35 |             batch = texts[i:i + batch_size]
36 |             batch_embeddings = []
37 |             
38 |             for text in batch:
39 |                 embedding = self.get_embedding(text)
40 |                 batch_embeddings.append(embedding)
41 |                 time.sleep(0.1)  # Rate limiting
42 |             
43 |             embeddings.extend(batch_embeddings)
44 |         
45 |         return embeddings
46 |     
47 |     def save_embeddings(self, embeddings: List[List[float]], filename: str) -> None:
48 |         """Save embeddings to file"""
49 |         filepath = os.path.join(self.config.DATA_DIR, filename)
50 |         with open(filepath, 'wb') as f:
51 |             pickle.dump(embeddings, f)
52 |         print(f"Embeddings saved to {filepath}")
53 |     
54 |     def load_embeddings(self, filename: str) -> List[List[float]]:
55 |         """Load embeddings from file"""
56 |         filepath = os.path.join(self.config.DATA_DIR, filename)
57 |         if os.path.exists(filepath):
58 |             with open(filepath, 'rb') as f:
59 |                 embeddings = pickle.load(f)
60 |             print(f"Embeddings loaded from {filepath}")
61 |             return embeddings
62 |         else:
63 |             raise FileNotFoundError(f"Embeddings file not found: {filepath}")


--------------------------------------------------------------------------------
/query-optimizer/evaluator.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import pandas as pd
 3 | from typing import Dict, List, Any
 4 | from pydantic import BaseModel
 5 | import json
 6 | from config import Config
 7 | 
 8 | class JudgeResponse(BaseModel):
 9 |     score: int  # 0 or 1
10 |     reasoning: str = ""
11 | 
12 | class Evaluator:
13 |     def __init__(self):
14 |         self.config = Config()
15 |         self.client = OpenAI(api_key=self.config.OPENAI_API_KEY)
16 |     
17 |     def build_judge_prompt(self, user_query: str, response: str, few_shot_examples: pd.DataFrame) -> str:
18 |         """Build prompt for LLM-as-a-judge evaluation"""
19 |         
20 |         judge_prompt = (
21 |             "You are an expert evaluator. Based on the examples provided, "
22 |             "evaluate if the response satisfies the user query and would likely "
23 |             "receive positive feedback from the user.\n\n"
24 |             "Look at the patterns in the examples below to understand what makes "
25 |             "a good vs bad response:\n\n"
26 |         )
27 |         
28 |         # Add few-shot examples for context
29 |         for i, row in few_shot_examples.iterrows():
30 |             judge_prompt += f"User Query: {row['conv_A_user']}\n"
31 |             judge_prompt += f"Response: {row['conv_A_assistant']}\n"
32 |             judge_prompt += f"User Feedback: {'👍 (Liked)' if row['conv_A_rating'] == 1 else '👎 (Disliked)'}\n\n"
33 |         
34 |         # Add current evaluation
35 |         judge_prompt += (
36 |             "Now, evaluate the following response based on the patterns above:\n\n"
37 |             f"User Query: {user_query}\n"
38 |             f"Response: {response}\n\n"
39 |             "Return a JSON with:\n"
40 |             "- 'score': 1 if the response is good (likely to be liked), 0 if bad (likely to be disliked)\n"
41 |             "- 'reasoning': Brief explanation of your evaluation\n\n"
42 |             "JSON Response:"
43 |         )
44 |         
45 |         return judge_prompt
46 |     
47 |     def judge_response(self, user_query: str, response: str, few_shot_examples: pd.DataFrame) -> Dict[str, Any]:
48 |         """Use LLM as judge to evaluate response quality"""
49 |         
50 |         judge_prompt = self.build_judge_prompt(user_query, response, few_shot_examples)
51 |         
52 |         try:
53 |             # Call judge model
54 |             judge_response = self.client.responses.create(
55 |                 model=self.config.JUDGE_MODEL,
56 |                 input=judge_prompt
57 |             )
58 |             
59 |             response_text = judge_response.output_text.strip()
60 |             
61 |             # Try to parse JSON response
62 |             if response_text.startswith('{') and response_text.endswith('}'):
63 |                 parsed_response = json.loads(response_text)
64 |                 score = int(parsed_response.get('score', 0))
65 |                 reasoning = parsed_response.get('reasoning', '')
66 |             else:
67 |                 # Fallback: try to extract score from text
68 |                 if '1' in response_text and 'good' in response_text.lower():
69 |                     score = 1
70 |                     reasoning = "Extracted from text analysis"
71 |                 else:
72 |                     score = 0
73 |                     reasoning = "Extracted from text analysis"
74 |             
75 |             return {
76 |                 'score': max(0, min(1, score)),  # Ensure score is 0 or 1
77 |                 'reasoning': reasoning
78 |             }
79 |         
80 |         except Exception as e:
81 |             print(f"Error in judge evaluation: {e}")
82 |             return {'score': 0, 'reasoning': f"Error in evaluation: {str(e)}"}
83 |     
84 |     def evaluate_responses(self, 
85 |                          user_query: str, 
86 |                          optimized_response: str, 
87 |                          baseline_response: str, 
88 |                          few_shot_examples: pd.DataFrame) -> Dict[str, Any]:
89 |         """Evaluate both optimized and baseline responses"""
90 |         
91 |         optimized_eval = self.judge_response(user_query, optimized_response, few_shot_examples)
92 |         baseline_eval = self.judge_response(user_query, baseline_response, few_shot_examples)
93 |         
94 |         return {
95 |             'optimized_score': optimized_eval['score'],
96 |             'optimized_reasoning': optimized_eval['reasoning'],
97 |             'baseline_score': baseline_eval['score'],
98 |             'baseline_reasoning': baseline_eval['reasoning']
99 |         }


--------------------------------------------------------------------------------
/query-optimizer/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | from typing import List, Dict, Any
  4 | from tqdm import tqdm
  5 | 
  6 | from config import Config
  7 | from data_loader import DataLoader
  8 | from embedding_generator import EmbeddingGenerator
  9 | from similarity_search import SimilaritySearch
 10 | from prompt_optimizer import PromptOptimizer
 11 | from response_generator import ResponseGenerator
 12 | from evaluator import Evaluator
 13 | from utils import Utils
 14 | 
 15 | class QueryOptimizer:
 16 |     def __init__(self):
 17 |         self.config = Config()
 18 |         self.config.validate()
 19 |         
 20 |         # Initialize components
 21 |         self.data_loader = DataLoader()
 22 |         self.embedding_generator = EmbeddingGenerator()
 23 |         self.similarity_search = SimilaritySearch()
 24 |         self.prompt_optimizer = PromptOptimizer()
 25 |         self.response_generator = ResponseGenerator()
 26 |         self.evaluator = Evaluator()
 27 |         self.utils = Utils()
 28 |         
 29 |         # Data storage
 30 |         self.training_data = None
 31 |         self.test_data = None
 32 |         self.training_embeddings = None
 33 |     
 34 |     def prepare_data(self) -> None:
 35 |         """Prepare training and test data with embeddings"""
 36 |         
 37 |         print("Step 1: Loading and preparing data...")
 38 |         
 39 |         # Load or sample data
 40 |         self.training_data, self.test_data = self.data_loader.load_data()
 41 |         
 42 |         # Generate or load embeddings
 43 |         embeddings_file = "training_embeddings.pkl"
 44 |         
 45 |         try:
 46 |             self.training_embeddings = self.embedding_generator.load_embeddings(embeddings_file)
 47 |         except FileNotFoundError:
 48 |             print("Generating new embeddings...")
 49 |             queries = self.training_data['conv_A_user'].tolist()
 50 |             self.training_embeddings = self.embedding_generator.get_embeddings_batch(queries)
 51 |             self.embedding_generator.save_embeddings(self.training_embeddings, embeddings_file)
 52 |         
 53 |         # Add embeddings to training data
 54 |         self.training_data['embedding'] = self.training_embeddings
 55 |         
 56 |         print(f"Data preparation complete. Training samples: {len(self.training_data)}")
 57 |     
 58 |     def optimize_single_query(self, user_query: str) -> Dict[str, Any]:
 59 |         """Optimize response for a single query"""
 60 |         
 61 |         # Generate embedding for the query
 62 |         query_embedding = self.embedding_generator.get_embedding(user_query)
 63 |         
 64 |         # Find similar examples
 65 |         similar_examples = self.similarity_search.get_matched_conversations(
 66 |             user_query, query_embedding, self.training_data
 67 |         )
 68 |         
 69 |         # Generate optimized prompt
 70 |         optimization_result = self.prompt_optimizer.get_optimized_prompt(
 71 |             user_query, similar_examples
 72 |         )
 73 |         
 74 |         # Generate both responses
 75 |         responses = self.response_generator.generate_both_responses(
 76 |             user_query, optimization_result['optimized_prompt']
 77 |         )
 78 |         
 79 |         # Evaluate responses
 80 |         evaluation = self.evaluator.evaluate_responses(
 81 |             user_query,
 82 |             responses['optimized_response'],
 83 |             responses['baseline_response'],
 84 |             similar_examples
 85 |         )
 86 |         
 87 |         return {
 88 |             'query': user_query,
 89 |             'optimized_prompt': optimization_result['optimized_prompt'],
 90 |             'optimization_reasoning': optimization_result['reasoning'],
 91 |             'optimized_response': responses['optimized_response'],
 92 |             'baseline_response': responses['baseline_response'],
 93 |             'optimized_score': evaluation['optimized_score'],
 94 |             'baseline_score': evaluation['baseline_score'],
 95 |             'optimized_reasoning': evaluation['optimized_reasoning'],
 96 |             'baseline_reasoning': evaluation['baseline_reasoning'],
 97 |             'similar_examples_count': len(similar_examples)
 98 |         }
 99 |     
100 |     def run_evaluation(self) -> None:
101 |         """Run complete evaluation on test dataset"""
102 |         
103 |         print("\nStep 2: Running evaluation on test dataset...")
104 |         
105 |         results = []
106 |         test_queries = self.test_data['conv_A_user'].tolist()
107 |         
108 |         # Process each test query
109 |         for query in tqdm(test_queries, desc="Processing queries"):
110 |             try:
111 |                 result = self.optimize_single_query(query)
112 |                 results.append(result)
113 |             except Exception as e:
114 |                 print(f"Error processing query '{query[:50]}...': {e}")
115 |                 continue
116 |         
117 |         # Calculate statistics
118 |         optimized_scores = [r['optimized_score'] for r in results]
119 |         baseline_scores = [r['baseline_score'] for r in results]
120 |         
121 |         stats = self.utils.calculate_statistics(optimized_scores, baseline_scores)
122 |         
123 |         # Print results
124 |         self.utils.print_statistics(stats)
125 |         
126 |         # Save results
127 |         self.utils.save_results(results, stats, "evaluation_results.json")
128 |         
129 |         return results, stats
130 |     
131 |     def run_demo(self, demo_queries: List[str] = None) -> None:
132 |         """Run demo with sample queries"""
133 |         
134 |         if demo_queries is None:
135 |             demo_queries = [
136 |                 "How do I reset my password?",
137 |                 "What is the company's vacation policy?",
138 |                 "How to improve team productivity?",
139 |                 "Explain machine learning in simple terms",
140 |                 "What are the best practices for remote work?"
141 |             ]
142 |         
143 |         print("\nStep 3: Running demo with sample queries...")
144 |         
145 |         for query in demo_queries:
146 |             print(f"\n{'='*60}")
147 |             print(f"Query: {query}")
148 |             print('='*60)
149 |             
150 |             try:
151 |                 result = self.optimize_single_query(query)
152 |                 
153 |                 print(f"Optimized Response Score: {result['optimized_score']}")
154 |                 print(f"Baseline Response Score: {result['baseline_score']}")
155 |                 print(f"\nOptimized Response:\n{result['optimized_response']}")
156 |                 print(f"\nBaseline Response:\n{result['baseline_response']}")
157 |                 
158 |             except Exception as e:
159 |                 print(f"Error processing query: {e}")
160 | 
161 | def main():
162 |     """Main execution function"""
163 |     
164 |     print("OpenAI Query Optimizer - Starting...")
165 |     
166 |     # Initialize optimizer
167 |     optimizer = QueryOptimizer()
168 |     
169 |     # Prepare data
170 |     optimizer.prepare_data()
171 |     
172 |     # Run evaluation
173 |     results, stats = optimizer.run_evaluation()
174 |     
175 |     # Run demo
176 |     optimizer.run_demo()
177 |     
178 |     print("\nOptimization complete!")


--------------------------------------------------------------------------------
/query-optimizer/prompt_optimizer.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import pandas as pd
 3 | from typing import Dict, Any
 4 | from pydantic import BaseModel
 5 | import json
 6 | from config import Config
 7 | 
 8 | class OptimizedPromptResponse(BaseModel):
 9 |     optimized_prompt: str
10 |     reasoning: str = ""
11 | 
12 | class PromptOptimizer:
13 |     def __init__(self):
14 |         self.config = Config()
15 |         self.client = OpenAI(api_key=self.config.OPENAI_API_KEY)
16 |     
17 |     def build_few_shot_prompt(self, user_query: str, df_examples: pd.DataFrame) -> str:
18 |         """Build few-shot prompt using similar examples"""
19 |         
20 |         few_shot_prompt = (
21 |             "You are an expert prompt optimizer. Given a user query and examples of "
22 |             "similar queries with user feedback, create an optimized prompt that will "
23 |             "generate better responses based on the patterns you observe.\n\n"
24 |             "Analyze the examples where users gave positive feedback (👍) vs negative feedback (👎) "
25 |             "and create a prompt that incorporates the successful patterns.\n\n"
26 |         )
27 |         
28 |         few_shot_prompt += "Examples of user queries, responses, and feedback:\n\n"
29 |         
30 |         # Add few-shot examples
31 |         for i, row in df_examples.iterrows():
32 |             few_shot_prompt += f"User Query: {row['conv_A_user']}\n"
33 |             few_shot_prompt += f"Response: {row['conv_A_assistant']}\n"
34 |             few_shot_prompt += f"User Feedback: {'👍 (Liked)' if row['conv_A_rating'] == 1 else '👎 (Disliked)'}\n"
35 |             few_shot_prompt += f"Similarity Score: {row['similarity']:.3f}\n\n"
36 |         
37 |         # Add the current query
38 |         few_shot_prompt += (
39 |             "Based on the patterns above, create an optimized prompt that will generate "
40 |             "a better response for the following user query. Focus on the characteristics "
41 |             "that led to positive feedback in the examples.\n\n"
42 |             f"Target User Query: {user_query}\n\n"
43 |             "Please provide:\n"
44 |             "1. An optimized prompt that incorporates successful patterns\n"
45 |             "2. Brief reasoning for your optimization choices\n\n"
46 |             "Format your response as JSON with 'optimized_prompt' and 'reasoning' fields."
47 |         )
48 |         
49 |         return few_shot_prompt
50 |     
51 |     def get_optimized_prompt(self, user_query: str, similar_examples: pd.DataFrame) -> Dict[str, str]:
52 |         """Generate optimized prompt using few-shot examples"""
53 |         
54 |         # Build few-shot prompt
55 |         few_shot_prompt = self.build_few_shot_prompt(user_query, similar_examples)
56 |         
57 |         try:
58 |             # Call OpenAI to generate optimized prompt
59 |             response = self.client.responses.create(
60 |                 model=self.config.RESPONSE_MODEL,
61 |                 input=few_shot_prompt
62 |             )
63 |             
64 |             # Parse the JSON response
65 |             response_text = response.output_text.strip()
66 |             
67 |             # Try to extract JSON from the response
68 |             if response_text.startswith('{') and response_text.endswith('}'):
69 |                 parsed_response = json.loads(response_text)
70 |                 return {
71 |                     'optimized_prompt': parsed_response.get('optimized_prompt', ''),
72 |                     'reasoning': parsed_response.get('reasoning', '')
73 |                 }
74 |             else:
75 |                 # Fallback: treat entire response as optimized prompt
76 |                 return {
77 |                     'optimized_prompt': response_text,
78 |                     'reasoning': 'Generated from few-shot examples'
79 |                 }
80 |         
81 |         except Exception as e:
82 |             print(f"Error generating optimized prompt: {e}")
83 |             # Fallback to a basic prompt
84 |             return {
85 |                 'optimized_prompt': f"Please provide a helpful and accurate response to: {user_query}",
86 |                 'reasoning': 'Fallback prompt due to generation error'
87 |             }


--------------------------------------------------------------------------------
/query-optimizer/requirements.txt:
--------------------------------------------------------------------------------
 1 | openai
 2 | datasets
 3 | scikit-learn
 4 | numpy
 5 | pandas
 6 | pydantic
 7 | scipy
 8 | python-dotenv
 9 | tqdm
10 | 


--------------------------------------------------------------------------------
/query-optimizer/response_generator.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from typing import Dict, Any
 3 | from config import Config
 4 | 
 5 | class ResponseGenerator:
 6 |     def __init__(self):
 7 |         self.config = Config()
 8 |         self.client = OpenAI(api_key=self.config.OPENAI_API_KEY)
 9 |     
10 |     def generate_response_with_optimized_prompt(self, optimized_prompt: str, user_query: str) -> str:
11 |         """Generate response using optimized prompt"""
12 |         
13 |         final_prompt = f"{optimized_prompt}\n\nUser Query: {user_query}\n\nResponse:"
14 |         
15 |         try:
16 |             response = self.client.responses.create(
17 |                 model=self.config.RESPONSE_MODEL,
18 |                 input=final_prompt
19 |             )
20 |             return response.output_text.strip()
21 |         
22 |         except Exception as e:
23 |             print(f"Error generating optimized response: {e}")
24 |             return f"Error generating response: {str(e)}"
25 |     
26 |     def generate_baseline_response(self, user_query: str) -> str:
27 |         """Generate baseline response without optimization"""
28 |         
29 |         baseline_prompt = f"Please provide a helpful response to the following query:\n\nUser Query: {user_query}\n\nResponse:"
30 |         
31 |         try:
32 |             response = self.client.responses.create(
33 |                 model=self.config.RESPONSE_MODEL,
34 |                 input=baseline_prompt
35 |             )
36 |             return response.output_text.strip()
37 |         
38 |         except Exception as e:
39 |             print(f"Error generating baseline response: {e}")
40 |             return f"Error generating response: {str(e)}"
41 |     
42 |     def generate_both_responses(self, user_query: str, optimized_prompt: str) -> Dict[str, str]:
43 |         """Generate both optimized and baseline responses"""
44 |         
45 |         return {
46 |             'optimized_response': self.generate_response_with_optimized_prompt(optimized_prompt, user_query),
47 |             'baseline_response': self.generate_baseline_response(user_query)
48 |         }


--------------------------------------------------------------------------------
/query-optimizer/similarity_search.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.metrics.pairwise import cosine_similarity
 4 | from typing import List, Tuple
 5 | from config import Config
 6 | 
 7 | class SimilaritySearch:
 8 |     def __init__(self):
 9 |         self.config = Config()
10 |     
11 |     def compute_cosine_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
12 |         """Compute cosine similarity between two embeddings"""
13 |         embedding1 = np.array(embedding1).reshape(1, -1)
14 |         embedding2 = np.array(embedding2).reshape(1, -1)
15 |         return cosine_similarity(embedding1, embedding2)[0][0]
16 |     
17 |     def find_similar_examples(self, 
18 |                             query_embedding: List[float], 
19 |                             df: pd.DataFrame, 
20 |                             k: int = None) -> pd.DataFrame:
21 |         """Find K most similar examples for a given query embedding"""
22 |         if k is None:
23 |             k = self.config.SIMILARITY_K
24 |         
25 |         # Compute similarity with each row in the DataFrame
26 |         similarities = []
27 |         for _, row in df.iterrows():
28 |             similarity = self.compute_cosine_similarity(query_embedding, row['embedding'])
29 |             similarities.append(similarity)
30 |         
31 |         df_copy = df.copy()
32 |         df_copy['similarity'] = similarities
33 |         
34 |         # Sort by similarity score (descending order)
35 |         df_sorted = df_copy.sort_values(by='similarity', ascending=False)
36 |         
37 |         # Return top K matches
38 |         top_matches = df_sorted.head(k)
39 |         return top_matches
40 |     
41 |     def get_matched_conversations(self, 
42 |                                 query: str, 
43 |                                 query_embedding: List[float], 
44 |                                 df: pd.DataFrame, 
45 |                                 k: int = None) -> pd.DataFrame:
46 |         """Get similar conversations for a given query"""
47 |         similar_examples = self.find_similar_examples(query_embedding, df, k)
48 |         
49 |         return similar_examples[['conv_A_user', 'conv_A_assistant', 'conv_A_rating', 'similarity']]
50 | 


--------------------------------------------------------------------------------
/query-optimizer/utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from scipy import stats
 4 | from typing import List, Dict, Any
 5 | import json
 6 | import os
 7 | from config import Config
 8 | 
 9 | class Utils:
10 |     def __init__(self):
11 |         self.config = Config()
12 |     
13 |     @staticmethod
14 |     def calculate_statistics(optimized_scores: List[int], baseline_scores: List[int]) -> Dict[str, float]:
15 |         """Calculate statistical metrics for the evaluation"""
16 |         
17 |         optimized_mean = np.mean(optimized_scores)
18 |         baseline_mean = np.mean(baseline_scores)
19 |         improvement = optimized_mean - baseline_mean
20 |         improvement_percent = (improvement / baseline_mean) * 100 if baseline_mean > 0 else 0
21 |         
22 |         # Paired t-test
23 |         t_stat, p_value = stats.ttest_rel(optimized_scores, baseline_scores)
24 |         
25 |         return {
26 |             'optimized_mean': optimized_mean,
27 |             'baseline_mean': baseline_mean,
28 |             'improvement': improvement,
29 |             'improvement_percent': improvement_percent,
30 |             't_statistic': t_stat,
31 |             'p_value': p_value,
32 |             'is_significant': p_value < 0.05
33 |         }
34 |     
35 |     def save_results(self, results: List[Dict[str, Any]], stats: Dict[str, float], filename: str) -> None:
36 |         """Save evaluation results to file"""
37 |         
38 |         output = {
39 |             'statistics': stats,
40 |             'detailed_results': results,
41 |             'config': {
42 |                 'training_sample_size': self.config.TRAINING_SAMPLE_SIZE,
43 |                 'test_sample_size': self.config.TEST_SAMPLE_SIZE,
44 |                 'similarity_k': self.config.SIMILARITY_K,
45 |                 'embedding_model': self.config.EMBEDDING_MODEL,
46 |                 'response_model': self.config.RESPONSE_MODEL
47 |             }
48 |         }
49 |         
50 |         filepath = os.path.join(self.config.RESULTS_DIR, filename)
51 |         with open(filepath, 'w') as f:
52 |             json.dump(output, f, indent=2)
53 |         
54 |         print(f"Results saved to {filepath}")
55 |     
56 |     @staticmethod
57 |     def print_statistics(stats: Dict[str, float]) -> None:
58 |         """Print statistical results in a formatted way"""
59 |         
60 |         print("\n" + "="*50)
61 |         print("EVALUATION RESULTS")
62 |         print("="*50)
63 |         print(f"Optimized Mean Score: {stats['optimized_mean']:.4f}")
64 |         print(f"Baseline Mean Score: {stats['baseline_mean']:.4f}")
65 |         print(f"Absolute Improvement: {stats['improvement']:.4f}")
66 |         print(f"Percentage Improvement: {stats['improvement_percent']:.2f}%")
67 |         print(f"T-statistic: {stats['t_statistic']:.4f}")
68 |         print(f"P-value: {stats['p_value']:.6f}")
69 |         print(f"Statistically Significant: {'Yes' if stats['is_significant'] else 'No'}")
70 |         print("="*50)


--------------------------------------------------------------------------------
/reasoning-mode/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | tenacity
3 | python-dotenv
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | annotated-types==0.7.0
 2 | anyio==4.4.0
 3 | certifi==2024.7.4
 4 | colorama==0.4.6
 5 | distro==1.9.0
 6 | h11==0.14.0
 7 | httpcore==1.0.5
 8 | httpx==0.27.0
 9 | idna==3.7
10 | jiter==0.5.0
11 | openai==1.40.6
12 | pydantic==2.8.2
13 | pydantic_core==2.20.1
14 | python-dotenv==1.0.1
15 | sniffio==1.3.1
16 | tqdm==4.66.5
17 | typing_extensions==4.12.2
18 | 


--------------------------------------------------------------------------------
/reverse-reasoning/Readme.md:
--------------------------------------------------------------------------------
  1 | # Bidirectional Reasoning in LLMs
  2 | 
  3 | This repository implements bidirectional reasoning for Large Language Models, inspired by the research paper "Reverse Thinking Makes LLMs Stronger Reasoners" (UNC Chapel Hill & Google Research). The implementation demonstrates how LLMs can validate their reasoning by thinking both forward and backward, similar to human problem-solving processes.
  4 | 
  5 | ## Extended Architecture Diagram
  6 | 
  7 | ```mermaid
  8 | flowchart TB
  9 |     subgraph Client["Client Layer"]
 10 |         UI[User Interface]
 11 |         API[API Endpoints]
 12 |     end
 13 | 
 14 |     subgraph Core["Core Processing Layer"]
 15 |         direction TB
 16 |         QP[Question Processor]
 17 |         ES[Embedding Service]
 18 |         RS[Reasoning Service]
 19 |         VS[Verification Service]
 20 |     end
 21 | 
 22 |     subgraph Models["Model Layer"]
 23 |         GPT4[GPT-4 Teacher Model]
 24 |         ADA[ADA Embeddings Model]
 25 |         GPT4T[GPT-4 Turbo Student Model]
 26 |     end
 27 | 
 28 |     subgraph Storage["Storage Layer"]
 29 |         VStore[Vector Store]
 30 |         QStore[Question Store]
 31 |         RStore[Reasoning Chain Store]
 32 |     end
 33 | 
 34 |     subgraph External["External Services"]
 35 |         OpenAI[OpenAI API]
 36 |     end
 37 | 
 38 |     %% Client Layer Connections
 39 |     UI -->|HTTP Request| API
 40 |     API -->|Forward Request| QP
 41 | 
 42 |     %% Core Layer Internal Connections
 43 |     QP -->|Get Embeddings| ES
 44 |     QP -->|Process Question| RS
 45 |     RS -->|Verify Reasoning| VS
 46 |     ES -->|Store Vectors| VStore
 47 | 
 48 |     %% Model Layer Connections
 49 |     ES -->|Embedding Request| ADA
 50 |     RS -->|Forward Reasoning| GPT4
 51 |     RS -->|Generate Answer| GPT4T
 52 |     VS -->|Backward Reasoning| GPT4
 53 | 
 54 |     %% Storage Layer Connections
 55 |     VStore -->|Retrieve Similar| RS
 56 |     QStore -->|Store Questions| QP
 57 |     RStore -->|Store Chains| RS
 58 | 
 59 |     %% External Service Connections
 60 |     ADA -->|API Call| OpenAI
 61 |     GPT4 -->|API Call| OpenAI
 62 |     GPT4T -->|API Call| OpenAI
 63 | 
 64 |     %% Data Flow Annotations
 65 |     QP -->|"1. Process Input"| ES
 66 |     ES -->|"2. Get Embeddings"| VStore
 67 |     VStore -->|"3. Find Similar"| RS
 68 |     RS -->|"4. Forward Logic"| VS
 69 |     VS -->|"5. Verify & Store"| RStore
 70 | 
 71 |     classDef primary fill:#2563eb,stroke:#1e40af,stroke-width:2px,color:#fff
 72 |     classDef secondary fill:#10b981,stroke:#059669,stroke-width:2px,color:#fff
 73 |     classDef storage fill:#6366f1,stroke:#4f46e5,stroke-width:2px,color:#fff
 74 |     classDef external fill:#f59e0b,stroke:#d97706,stroke-width:2px,color:#fff
 75 | 
 76 |     class UI,API primary
 77 |     class QP,ES,RS,VS secondary
 78 |     class VStore,QStore,RStore storage
 79 |     class OpenAI external
 80 |     class GPT4,ADA,GPT4T external
 81 | ```
 82 | 
 83 | ## 🚀 Key Features
 84 | 
 85 | - Forward and backward reasoning paths
 86 | - Similarity-based case retrieval
 87 | - Consistency verification
 88 | - In-memory vector storage
 89 | - Configurable model selection (GPT-4, other compatible models)
 90 | 
 91 | ## 💡 How It Works
 92 | 
 93 | The system implements a four-stage process for enhanced reasoning:
 94 | 
 95 | ### 1. Input Processing
 96 | - Takes a question as input
 97 | - Generates embeddings using OpenAI's embedding model
 98 | - Finds similar cases from the knowledge base
 99 | 
100 | ### 2. Forward Reasoning
101 | - Retrieves context from similar cases
102 | - Generates step-by-step forward reasoning
103 | - Produces an initial answer
104 | 
105 | ### 3. Backward Verification
106 | - Generates a reverse question from the answer
107 | - Performs backward reasoning
108 | - Verifies consistency between forward and backward paths
109 | 
110 | ### 4. Result Processing
111 | - Stores verified results in the knowledge base
112 | - Updates similar cases database
113 | - Discards inconsistent results
114 | 
115 | ## 🛠️ Installation
116 | 
117 | ```bash
118 | # Clone the repository
119 | git clone https://github.com/yourusername/bidirectional-reasoning.git
120 | 
121 | # Install dependencies
122 | pip install -r requirements.txt
123 | 
124 | # Set up environment variables
125 | cp .env.example .env
126 | # Add your OpenAI API key to .env
127 | ```
128 | 
129 | ## 📋 Requirements
130 | 
131 | - Python 3.8+
132 | - OpenAI API key
133 | - Dependencies:
134 |   - openai
135 |   - python-dotenv
136 |   - numpy
137 |   - scipy
138 |   - tenacity
139 | 
140 | ## 🔧 Usage
141 | 
142 | ```python
143 | from reasoning_service import HealthcareReasoningService
144 | 
145 | # Initialize the service
146 | service = HealthcareReasoningService()
147 | 
148 | # Process a question
149 | async def ask_question():
150 |     question = "Your medical question here"
151 |     response = await service.answer_question(question)
152 |     print(response)
153 | ```
154 | 
155 | ## 🌟 Example
156 | 
157 | ```python
158 | # Example medical diagnosis question
159 | question = "A 50-year-old patient presents with chest tightness and arm pain after exercise. What should be the immediate assessment?"
160 | 
161 | # Get response with both forward and backward reasoning
162 | response = await service.answer_question(question)
163 | ```
164 | 
165 | ## 🔄 Process Flow
166 | 
167 | 1. **Question Input**
168 |    - User submits a question
169 |    - System generates embeddings
170 |    - Similar cases are retrieved
171 | 
172 | 2. **Forward Analysis**
173 |    - Context compilation from similar cases
174 |    - Step-by-step forward reasoning
175 |    - Initial answer generation
176 | 
177 | 3. **Backward Verification**
178 |    - Generate reverse question
179 |    - Perform backward reasoning
180 |    - Verify consistency
181 | 
182 | 4. **Result Handling**
183 |    - Store verified results
184 |    - Update knowledge base
185 |    - Handle inconsistencies
186 | 
187 | ## 🔍 Current Limitations
188 | 
189 | - In-memory storage (can be extended to vector databases)
190 | - Limited to the context window of the underlying LLM
191 | - Requires API calls for each reasoning step
192 | - Performance depends on the quality of similar cases
193 | 
194 | ## 🚀 Future Improvements
195 | 
196 | 1. Integration with dedicated vector databases
197 | 2. Model fine-tuning for specific domains
198 | 3. Batch processing capabilities
199 | 4. Caching mechanism for frequent queries
200 | 5. Extended validation metrics
201 | 


--------------------------------------------------------------------------------
/reverse-reasoning/app.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional
  2 | from dataclasses import dataclass
  3 | from enum import Enum
  4 | import os
  5 | from openai import OpenAI
  6 | from dotenv import load_dotenv
  7 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  8 | import numpy as np
  9 | from scipy.spatial.distance import cosine
 10 | 
 11 | # Load environment variables
 12 | load_dotenv()
 13 | 
 14 | # Initialize OpenAI client
 15 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 16 | 
 17 | class QuestionType(str, Enum):
 18 |     DIAGNOSIS = "diagnosis"
 19 |     TREATMENT = "treatment"
 20 |     MEDICATION = "medication"
 21 |     PROCEDURE = "procedure"
 22 |     PREVENTIVE_CARE = "preventive_care"
 23 | 
 24 | @dataclass
 25 | class ReasoningChain:
 26 |     forward_reasoning: str
 27 |     backward_question: str
 28 |     backward_reasoning: str
 29 | 
 30 | @dataclass
 31 | class Question:
 32 |     question: str
 33 |     answer: str
 34 |     question_type: QuestionType
 35 |     reasoning_chain: ReasoningChain
 36 |     embedding: Optional[List[float]] = None
 37 | 
 38 | class HealthcareReasoningService:
 39 |     def __init__(self):
 40 |         self.questions: List[Question] = []
 41 |         # Initialize with example healthcare questions
 42 |         self._initialize_examples()
 43 | 
 44 |     def _initialize_examples(self):
 45 |         """Initialize with example healthcare questions and their reasoning chains."""
 46 |         examples = [
 47 |             {
 48 |                 "question": "A 45-year-old patient presents with sudden chest pain, shortness of breath, and sweating. What is the most likely diagnosis?",
 49 |                 "answer": "Acute Myocardial Infarction (Heart Attack)",
 50 |                 "type": QuestionType.DIAGNOSIS,
 51 |                 "forward_reasoning": """1. Key symptoms analysis:
 52 |                    - Sudden chest pain (classic heart attack symptom)
 53 |                    - Shortness of breath (indicates reduced oxygen)
 54 |                    - Sweating (common autonomic response)
 55 |                 2. Age consideration: 45 years old is within risk group
 56 |                 3. Symptom combination strongly suggests cardiac origin
 57 |                 4. Given the acute onset and classic triad of symptoms, acute MI is most likely""",
 58 |                 "backward_question": "What symptoms and patient characteristics would typically lead to a diagnosis of Acute Myocardial Infarction?",
 59 |                 "backward_reasoning": """1. Start with diagnosis of MI
 60 |                    2. Typical presentation includes:
 61 |                       - Age usually >40 years
 62 |                       - Classic triad: chest pain, dyspnea, diaphoresis
 63 |                    3. These match our patient's presentation
 64 |                    4. Therefore, working backward confirms the diagnostic reasoning"""
 65 |             },
 66 |             {
 67 |                 "question": "A patient with type 2 diabetes has consistent blood glucose readings above 200 mg/dL despite lifestyle changes. What is the appropriate next step in treatment?",
 68 |                 "answer": "Initiate Metformin therapy",
 69 |                 "type": QuestionType.TREATMENT,
 70 |                 "forward_reasoning": """1. Assessment of current situation:
 71 |                    - Type 2 diabetes confirmed
 72 |                    - Blood glucose consistently elevated
 73 |                    - Lifestyle changes already attempted
 74 |                 2. Treatment guidelines analysis:
 75 |                    - Metformin is first-line medication
 76 |                    - Well-established safety profile
 77 |                    - Effective at lowering blood glucose
 78 |                 3. Therefore, Metformin is the appropriate next step""",
 79 |                 "backward_question": "When would Metformin be the most appropriate treatment choice for a diabetic patient?",
 80 |                 "backward_reasoning": """1. Consider Metformin's role as first-line therapy
 81 |                    2. Work backward to patient conditions:
 82 |                       - Need for medication beyond lifestyle changes
 83 |                       - Type 2 diabetes
 84 |                       - Elevated blood glucose
 85 |                    3. These match our patient's scenario
 86 |                    4. Confirms Metformin as appropriate choice"""
 87 |             }
 88 |         ]
 89 | 
 90 |         for example in examples:
 91 |             chain = ReasoningChain(
 92 |                 forward_reasoning=example["forward_reasoning"],
 93 |                 backward_question=example["backward_question"],
 94 |                 backward_reasoning=example["backward_reasoning"]
 95 |             )
 96 |             question = Question(
 97 |                 question=example["question"],
 98 |                 answer=example["answer"],
 99 |                 question_type=example["type"],
100 |                 reasoning_chain=chain
101 |             )
102 |             self._add_question(question)
103 | 
104 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(3))
105 |     async def _get_embedding(self, text: str) -> List[float]:
106 |         """Get embedding for text using OpenAI's embedding model."""
107 |         response = client.embeddings.create(
108 |             model="text-embedding-ada-002",
109 |             input=text
110 |         )
111 |         return response.data[0].embedding
112 | 
113 |     async def _add_question(self, question: Question):
114 |         """Add a question to the in-memory storage with its embedding."""
115 |         question.embedding = await self._get_embedding(question.question)
116 |         self.questions.append(question)
117 | 
118 |     def _find_similar_questions(self, embedding: List[float], limit: int = 2) -> List[Question]:
119 |         """Find similar questions using cosine similarity."""
120 |         similarities = []
121 |         for q in self.questions:
122 |             if q.embedding is not None:
123 |                 similarity = 1 - cosine(embedding, q.embedding)
124 |                 similarities.append((similarity, q))
125 |         
126 |         # Sort by similarity and return top matches
127 |         similarities.sort(reverse=True, key=lambda x: x[0])
128 |         return [q for _, q in similarities[:limit]]
129 | 
130 |     @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(3))
131 |     async def answer_question(self, question: str) -> dict:
132 |         """Process a new question using the REVTHINK approach."""
133 |         # Get embedding for the question
134 |         question_embedding = await self._get_embedding(question)
135 |         
136 |         # Find similar questions
137 |         similar_questions = self._find_similar_questions(question_embedding)
138 |         
139 |         # Prepare context from similar questions
140 |         context = ""
141 |         for q in similar_questions:
142 |             context += f"\nSimilar Question: {q.question}\n"
143 |             context += f"Answer: {q.answer}\n"
144 |             context += f"Forward Reasoning: {q.reasoning_chain.forward_reasoning}\n"
145 |             context += f"Backward Question: {q.reasoning_chain.backward_question}\n"
146 |             context += f"Backward Reasoning: {q.reasoning_chain.backward_reasoning}\n"
147 | 
148 |         # Generate response using the student model
149 |         completion = client.chat.completions.create(
150 |             model="gpt-4o",  # Replace with your desired model
151 |             messages=[
152 |                 {"role": "user", "content": f"""Use these similar healthcare scenarios as reference:
153 |                 {context}
154 |                 
155 |                 Now, answer this question with detailed reasoning:
156 |                 {question}
157 |                 
158 |                 Provide:
159 |                 1. Your step-by-step reasoning
160 |                 2. The final answer
161 |                 3. A verification approach (backward reasoning)"""}
162 |             ],
163 |             temperature=0.7
164 |         )
165 |         
166 |         return {
167 |             "response": completion.choices[0].message.content,
168 |             "similar_questions": [
169 |                 {
170 |                     "question": q.question,
171 |                     "answer": q.answer,
172 |                     "reasoning_chain": {
173 |                         "forward_reasoning": q.reasoning_chain.forward_reasoning,
174 |                         "backward_question": q.reasoning_chain.backward_question,
175 |                         "backward_reasoning": q.reasoning_chain.backward_reasoning
176 |                     }
177 |                 }
178 |                 for q in similar_questions
179 |             ]
180 |         }
181 | 
182 | # Example usage
183 | async def main():
184 |     # Initialize the service
185 |     service = HealthcareReasoningService()
186 |     
187 |     # Example question
188 |     question = "A 50-year-old patient presents with chest tightness and arm pain after exercise. What should be the immediate assessment?"
189 |     
190 |     # Get response
191 |     response = await service.answer_question(question)
192 |     
193 |     print("Response:", response["response"])
194 |     print("\nSimilar Questions Used:")
195 |     for q in response["similar_questions"]:
196 |         print(f"\nQuestion: {q['question']}")
197 |         print(f"Answer: {q['answer']}")
198 | 
199 | if __name__ == "__main__":
200 |     import asyncio
201 |     asyncio.run(main())
202 | 


--------------------------------------------------------------------------------
/sandbox-rag/Readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Sandbox RAG System
 4 | 
 5 | This README provides step-by-step instructions to set up and run a Qdrant-based Retrieval Augmented Generation (RAG) system.
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | 1. **Docker:** Ensure Docker is installed and running on your system. You can download Docker from [here](https://www.docker.com/products/docker-desktop).
10 | 2. **Python Environment:** Make sure you have Python installed along with `pip`.
11 | 
12 | ## Setup
13 | 
14 | ### 1. Pull the Qdrant Docker Image
15 | 
16 | To get started, you need to pull the Qdrant Docker image from Docker Hub:
17 | 
18 | ```bash
19 | docker pull qdrant/qdrant
20 | ```
21 | 
22 | ### 2. Run the Qdrant Docker Container
23 | 
24 | Run the following command to start the Qdrant service in a Docker container:
25 | 
26 | ```bash
27 | docker run -p 6333:6333 -p 6334:6334 \
28 |     -v $(pwd)/qdrant_storage:/qdrant/storage \
29 |     qdrant/qdrant
30 | ```
31 | 
32 | This command will:
33 | - Expose Qdrant on ports `6333` (for HTTP) and `6334` (for gRPC).
34 | - Mount a local directory (`qdrant_storage`) to persist data.
35 | 
36 | ### 3. Install Python Dependencies
37 | 
38 | Ensure you have all required Python packages by installing them via `pip`:
39 | 
40 | ```bash
41 | pip install -r requirements.txt
42 | ```
43 | 
44 | ## Running the System
45 | 
46 | ### 1. Run the Chunking Script
47 | 
48 | Before running the main RAG system, execute the `chunking.py` script to preprocess data:
49 | 
50 | ```bash
51 | python chunking.py
52 | ```
53 | 
54 | ### 2. Run the RAG System
55 | 
56 | After chunking is complete, run the `rag-system.py` script to interact with the RAG system and ask questions:
57 | 
58 | ```bash
59 | python rag-system.py
60 | ```
61 | 
62 | ## Usage
63 | 
64 | - Start by running the `chunking.py` script to prepare the data.
65 | - Then, use the `rag-system.py` script to perform queries and retrieve augmented results.
66 | 
67 | 
68 | ### Chunk Visualizer
69 | 
70 | It is a web-based application that allows you to visualize and analyze data stored in a Qdrant vector database. This tool is particularly useful for exploring embeddings, chunks, and associated metadata in your Qdrant collection.
71 | #### Features
72 | 
73 | 1. **Raw Data Display**: View the raw data fetched from your Qdrant collection, including vectors, chunks, and margins.
74 | 
75 | 2. **Data Overview**: Displays a DataFrame with chunks and margins for a quick summary of your data.
76 | 
77 | 3. **Embedding Visualization**: Creates a 2D scatter plot of your embeddings using t-SNE dimensionality reduction, allowing you to visualize the relationships between your vectors.
78 | 
79 | 4. **Chunk and Margin Analysis**: 
80 |    - Histogram of chunk lengths
81 |    - Histogram of margin values
82 | 
83 | 5. **Search Functionality**: Allows you to search for specific terms within the chunks.
84 | 
85 | 6. **Vector Statistics**: Provides basic statistics about the vector lengths and their distribution.
86 | 
87 | 7. **Similarity Search**: Performs a random similarity search to demonstrate how similar chunks can be found in your collection.
88 | 
89 | ## Additional Information
90 | 
91 | - Make sure the Qdrant service is running before executing any scripts.
92 | - All data will be stored in the `qdrant_storage` directory.
93 | 


--------------------------------------------------------------------------------
/sandbox-rag/assets/book:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/sandbox-rag/assets/situationalawareness.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Cenrax/AdvancedRAGTechniques/e84090852bc95dc1f9eeb8854f623cce22e18195/sandbox-rag/assets/situationalawareness.docx


--------------------------------------------------------------------------------
/sandbox-rag/chunking.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from openai import OpenAI
  3 | from dotenv import load_dotenv
  4 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  5 | from docx import Document
  6 | import nltk
  7 | from nltk.tokenize import sent_tokenize
  8 | from qdrant_client import QdrantClient
  9 | from qdrant_client.http.models import Distance, VectorParams
 10 | import uuid
 11 | 
 12 | nltk.download('punkt_tab')
 13 | # Download necessary NLTK data
 14 | #nltk.download('punkt',download_dir="C:\\UsersSubham\\Desktop\\amentities\\code\\asyncCalls\\venv\\nltk_data")
 15 | 
 16 | # Load environment variables
 17 | load_dotenv()
 18 | 
 19 | # Initialize OpenAI client
 20 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 21 | 
 22 | # Initialize Qdrant client
 23 | qdrant_client = QdrantClient("localhost", port=6333)
 24 | 
 25 | # Retry decorators
 26 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 27 | def completion_with_backoff(**kwargs):
 28 |     return client.chat.completions.create(**kwargs)
 29 | 
 30 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 31 | def embedding_with_backoff(**kwargs):
 32 |     return client.embeddings.create(**kwargs)
 33 | 
 34 | def read_docx(file_path):
 35 |     """
 36 |     Read content from a docx file.
 37 |     """
 38 |     doc = Document(file_path)
 39 |     return "\n".join([paragraph.text for paragraph in doc.paragraphs])
 40 | 
 41 | def chunk_text(text, max_chunk_size=4000):
 42 |     """
 43 |     Split the text into chunks of approximately max_chunk_size tokens.
 44 |     """
 45 |     sentences = sent_tokenize(text)
 46 |     chunks = []
 47 |     current_chunk = []
 48 |     current_size = 0
 49 |     
 50 |     for sentence in sentences:
 51 |         sentence_size = len(sentence.split())
 52 |         if current_size + sentence_size > max_chunk_size:
 53 |             chunks.append(" ".join(current_chunk))
 54 |             current_chunk = [sentence]
 55 |             current_size = sentence_size
 56 |         else:
 57 |             current_chunk.append(sentence)
 58 |             current_size += sentence_size
 59 |     
 60 |     if current_chunk:
 61 |         chunks.append(" ".join(current_chunk))
 62 |     
 63 |     return chunks
 64 | 
 65 | def generate_margin(chunk):
 66 |     """
 67 |     Generate a margin (summary) for a given chunk of text.
 68 |     """
 69 |     response = completion_with_backoff(
 70 |         model="gpt-4o-mini",
 71 |         messages=[
 72 |             {"role": "system", "content": "You are a helpful assistant that summarizes text."},
 73 |             {"role": "user", "content": f"Summarize the following text in 2-3 sentences, focusing on key information:\n\n{chunk}"}
 74 |         ]
 75 |     )
 76 |     return response.choices[0].message.content
 77 | 
 78 | def get_embedding(text):
 79 |     """
 80 |     Get the embedding for a given text.
 81 |     """
 82 |     response = embedding_with_backoff(
 83 |         model="text-embedding-ada-002",
 84 |         input=text
 85 |     )
 86 |     return response.data[0].embedding
 87 | 
 88 | def store_in_qdrant(chunk, margin, embedding):
 89 |     """
 90 |     Store the chunk, margin, and embedding in Qdrant.
 91 |     """
 92 |     qdrant_client.upsert(
 93 |         collection_name="situational_awareness",
 94 |         points=[
 95 |             {
 96 |                 "id": str(uuid.uuid4()),
 97 |                 "vector": embedding,
 98 |                 "payload": {
 99 |                     "chunk": chunk,
100 |                     "margin": margin
101 |                 }
102 |             }
103 |         ]
104 |     )
105 | 
106 | def process_book(file_path):
107 |     """
108 |     Process the book: read, chunk, generate margins, create embeddings, and store in Qdrant.
109 |     """
110 |     # Read the document
111 |     print("Reading the document...")
112 |     text = read_docx(file_path)
113 |     
114 |     # Create Qdrant collection if it doesn't exist
115 |     collections = qdrant_client.get_collections().collections
116 |     if not any(collection.name == "situational_awareness" for collection in collections):
117 |         qdrant_client.create_collection(
118 |             collection_name="situational_awareness",
119 |             vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
120 |         )
121 |         print("Created 'situational_awareness' collection in Qdrant.")
122 |     
123 |     # Process the text
124 |     chunks = chunk_text(text)
125 |     print(f"Document split into {len(chunks)} chunks.")
126 |     
127 |     for i, chunk in enumerate(chunks):
128 |         print(f"Processing chunk {i+1}/{len(chunks)}...")
129 |         
130 |         # Generate margin
131 |         margin = generate_margin(chunk)
132 |         
133 |         # Generate embedding for the chunk
134 |         embedding = get_embedding(chunk)
135 |         
136 |         # Store in Qdrant
137 |         store_in_qdrant(chunk, margin, embedding)
138 |         
139 |         if i+1 == 50:
140 |             break
141 |         
142 |     print("Processing complete. All chunks stored in Qdrant.")
143 | 
144 | # Main execution
145 | if __name__ == "__main__":
146 |     book_path = "assets/situationalawareness.docx"
147 |     process_book(book_path)
148 | 


--------------------------------------------------------------------------------
/sandbox-rag/rag-system.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from openai import OpenAI
  3 | from dotenv import load_dotenv
  4 | from qdrant_client import QdrantClient
  5 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  6 | 
  7 | # Load environment variables
  8 | load_dotenv()
  9 | 
 10 | # Initialize OpenAI client
 11 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 12 | 
 13 | # Initialize Qdrant client
 14 | qdrant_client = QdrantClient("localhost", port=6333)
 15 | 
 16 | # Retry decorators
 17 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 18 | def completion_with_backoff(**kwargs):
 19 |     return client.chat.completions.create(**kwargs)
 20 | 
 21 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 22 | def embedding_with_backoff(**kwargs):
 23 |     return client.embeddings.create(**kwargs)
 24 | 
 25 | def get_embedding(text):
 26 |     """
 27 |     Get the embedding for a given text.
 28 |     """
 29 |     response = embedding_with_backoff(
 30 |         model="text-embedding-ada-002",
 31 |         input=text
 32 |     )
 33 |     return response.data[0].embedding
 34 | 
 35 | def search_similar_chunks(query, top_k=5):
 36 |     """
 37 |     Search for similar chunks in the Qdrant database.
 38 |     """
 39 |     query_vector = get_embedding(query)
 40 |     search_result = qdrant_client.search(
 41 |         collection_name="situational_awareness",
 42 |         query_vector=query_vector,
 43 |         limit=top_k
 44 |     )
 45 |     return search_result
 46 | 
 47 | def format_context(similar_chunks):
 48 |     """
 49 |     Format the similar chunks and their margins into a context string.
 50 |     """
 51 |     context = ""
 52 |     for i, chunk in enumerate(similar_chunks):
 53 |         context += f"Chunk {i+1}:\n"
 54 |         context += f"Content: {chunk.payload['chunk']}\n"
 55 |         context += f"Summary: {chunk.payload['margin']}\n\n"
 56 |     return context
 57 | 
 58 | def generate_answer(query, context):
 59 |     """
 60 |     Generate an answer using the GPT model with the given context.
 61 |     """
 62 |     response = completion_with_backoff(
 63 |         model="gpt-4o-mini",
 64 |         messages=[
 65 |             {"role": "system", "content": "You are a helpful assistant that answers questions based on the provided context. If the answer cannot be found in the context, say so."},
 66 |             {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}\n\nAnswer:"}
 67 |         ]
 68 |     )
 69 |     return response.choices[0].message.content
 70 | 
 71 | def rag_query(query):
 72 |     """
 73 |     Perform a RAG query: retrieve similar chunks, format context, and generate an answer.
 74 |     """
 75 |     print(f"Query: {query}")
 76 |     print("Searching for relevant chunks...")
 77 |     similar_chunks = search_similar_chunks(query)
 78 |     
 79 |     print("Formatting context...")
 80 |     context = format_context(similar_chunks)
 81 |     
 82 |     print("Generating answer...")
 83 |     answer = generate_answer(query, context)
 84 |     
 85 |     return answer
 86 | 
 87 | def interactive_rag():
 88 |     """
 89 |     Run an interactive RAG session.
 90 |     """
 91 |     print("Welcome to the Situational Awareness RAG system!")
 92 |     print("You can ask questions about the book, or type 'quit' to exit.")
 93 |     
 94 |     while True:
 95 |         query = input("\nEnter your question: ")
 96 |         if query.lower() == 'quit':
 97 |             print("Thank you for using the RAG system. Goodbye!")
 98 |             break
 99 |         
100 |         answer = rag_query(query)
101 |         print("\nAnswer:", answer)
102 | 
103 | if __name__ == "__main__":
104 |     interactive_rag()
105 | 


--------------------------------------------------------------------------------
/sandbox-rag/requirements.txt:
--------------------------------------------------------------------------------
 1 | altair==5.4.1
 2 | annotated-types==0.7.0
 3 | anyio==4.4.0
 4 | attrs==24.2.0
 5 | black==24.8.0
 6 | blinker==1.8.2
 7 | cachetools==5.5.0
 8 | certifi==2024.7.4
 9 | charset-normalizer==3.3.2
10 | click==8.1.7
11 | colorama==0.4.6
12 | distro==1.9.0
13 | gitdb==4.0.11
14 | GitPython==3.1.43
15 | grpcio==1.66.1
16 | grpcio-tools==1.66.1
17 | h11==0.14.0
18 | h2==4.1.0
19 | hpack==4.0.0
20 | httpcore==1.0.5
21 | httpx==0.27.0
22 | hyperframe==6.0.1
23 | idna==3.7
24 | Jinja2==3.1.4
25 | jiter==0.5.0
26 | joblib==1.4.2
27 | jsonschema==4.23.0
28 | jsonschema-specifications==2023.12.1
29 | lxml==5.3.0
30 | markdown-it-py==3.0.0
31 | MarkupSafe==2.1.5
32 | mdurl==0.1.2
33 | mypy-extensions==1.0.0
34 | narwhals==1.6.0
35 | neo4j==5.23.1
36 | nltk==3.9.1
37 | numpy==2.1.0
38 | openai==1.42.0
39 | packaging==24.1
40 | pandas==2.2.2
41 | pathspec==0.12.1
42 | pillow==10.4.0
43 | platformdirs==4.2.2
44 | plotly==5.24.0
45 | portalocker==2.10.1
46 | protobuf==5.28.0
47 | pyarrow==17.0.0
48 | pydantic==2.8.2
49 | pydantic_core==2.20.1
50 | pydeck==0.9.1
51 | Pygments==2.18.0
52 | python-dateutil==2.9.0.post0
53 | python-docx==1.1.2
54 | python-dotenv==1.0.1
55 | pytz==2024.1
56 | pywin32==306
57 | qdrant-client==1.11.1
58 | referencing==0.35.1
59 | regex==2024.7.24
60 | requests==2.32.3
61 | rich==13.8.0
62 | rpds-py==0.20.0
63 | scikit-learn==1.5.1
64 | scipy==1.14.1
65 | six==1.16.0
66 | smmap==5.0.1
67 | sniffio==1.3.1
68 | streamlit==1.38.0
69 | tenacity==8.5.0
70 | threadpoolctl==3.5.0
71 | toml==0.10.2
72 | tornado==6.4.1
73 | tqdm==4.66.5
74 | typing_extensions==4.12.2
75 | tzdata==2024.1
76 | urllib3==2.2.2
77 | watchdog==4.0.2
78 | 


--------------------------------------------------------------------------------
/sandbox-rag/streamlit-visualizer.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import numpy as np
  3 | import pandas as pd
  4 | from qdrant_client import QdrantClient
  5 | from sklearn.manifold import TSNE
  6 | import plotly.express as px
  7 | 
  8 | # Initialize Qdrant client
  9 | @st.cache_resource
 10 | def init_qdrant_client():
 11 |     return QdrantClient("localhost", port=6333)
 12 | 
 13 | qdrant_client = init_qdrant_client()
 14 | 
 15 | st.title("Qdrant Data Visualizer")
 16 | 
 17 | # Fetch data from Qdrant
 18 | @st.cache_data
 19 | def fetch_qdrant_data(limit=1000):
 20 |     response = qdrant_client.scroll(
 21 |         collection_name="situational_awareness",
 22 |         limit=limit,
 23 |         with_vectors=True,
 24 |         with_payload=True
 25 |     )
 26 |     points = response[0]
 27 |     if not points:
 28 |         st.warning("No data found in the Qdrant collection.")
 29 |         return np.array([]), [], []
 30 |     vectors = np.array([point.vector for point in points])
 31 |     chunks = [point.payload['chunk'] for point in points]
 32 |     margins = [point.payload['margin'] for point in points]
 33 |     return vectors, chunks, margins
 34 | 
 35 | # Load data
 36 | vectors, chunks, margins = fetch_qdrant_data()
 37 | 
 38 | if len(vectors) == 0:
 39 |     st.warning("No data available. Please check your Qdrant collection.")
 40 |     st.stop()
 41 | 
 42 | # Display raw data
 43 | st.subheader("Raw Data")
 44 | if st.checkbox("Show raw data"):
 45 |     raw_data = [{"vector": v.tolist(), "chunk": c, "margin": m} for v, c, m in zip(vectors, chunks, margins)]
 46 |     st.write(raw_data)
 47 | 
 48 | # Create a DataFrame
 49 | df = pd.DataFrame({
 50 |     'chunk': chunks,
 51 |     'margin': margins
 52 | })
 53 | 
 54 | # Display DataFrame
 55 | st.subheader("Data Overview")
 56 | st.dataframe(df)
 57 | 
 58 | # Visualize embeddings
 59 | st.subheader("Embedding Visualization")
 60 | 
 61 | # Dimensionality reduction
 62 | @st.cache_data
 63 | def reduce_dimensions(vectors):
 64 |     n_samples = vectors.shape[0]
 65 |     perplexity = min(30, n_samples - 1)  # Ensure perplexity is valid
 66 |     if n_samples >= 5:  # t-SNE requires at least 5 samples
 67 |         tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
 68 |         return tsne.fit_transform(vectors)
 69 |     else:
 70 |         st.warning("Not enough data points for t-SNE. Displaying raw 2D projection.")
 71 |         return vectors[:, :2]  # Just take the first two dimensions
 72 | 
 73 | if vectors.shape[0] > 1:  # Check if we have more than one vector
 74 |     vectors_2d = reduce_dimensions(vectors)
 75 | 
 76 |     # Create a DataFrame for plotting
 77 |     plot_df = pd.DataFrame({
 78 |         'x': vectors_2d[:, 0],
 79 |         'y': vectors_2d[:, 1],
 80 |         'chunk': chunks,
 81 |         'margin': margins
 82 |     })
 83 | 
 84 |     # Create an interactive scatter plot
 85 |     fig = px.scatter(
 86 |         plot_df, 
 87 |         x='x', 
 88 |         y='y', 
 89 |         hover_data=['chunk', 'margin'],
 90 |         title='2D Visualization of Embeddings'
 91 |     )
 92 |     st.plotly_chart(fig)
 93 | else:
 94 |     st.warning("Not enough data points to create a meaningful visualization.")
 95 | 
 96 | # Chunk and Margin Analysis
 97 | st.subheader("Chunk and Margin Analysis")
 98 | 
 99 | if len(chunks) > 0:
100 |     # Chunk length distribution
101 |     st.write("Chunk Length Distribution")
102 |     chunk_lengths = [len(chunk) for chunk in chunks]
103 |     fig_chunk_len = px.histogram(chunk_lengths, nbins=20, title="Chunk Length Distribution")
104 |     st.plotly_chart(fig_chunk_len)
105 | 
106 |     # Margin distribution
107 |     st.write("Margin Distribution")
108 |     fig_margin = px.histogram(margins, nbins=20, title="Margin Distribution")
109 |     st.plotly_chart(fig_margin)
110 | else:
111 |     st.warning("Not enough data for chunk and margin analysis.")
112 | 
113 | # Search functionality
114 | st.subheader("Search Chunks")
115 | search_query = st.text_input("Enter a search term")
116 | if search_query:
117 |     filtered_df = df[df['chunk'].str.contains(search_query, case=False)]
118 |     st.write(f"Found {len(filtered_df)} matches:")
119 |     st.dataframe(filtered_df)
120 | 
121 | # Vector statistics
122 | st.subheader("Vector Statistics")
123 | if len(vectors) > 0:
124 |     vector_lengths = np.linalg.norm(vectors, axis=1)
125 |     st.write(f"Average vector length: {np.mean(vector_lengths):.2f}")
126 |     st.write(f"Min vector length: {np.min(vector_lengths):.2f}")
127 |     st.write(f"Max vector length: {np.max(vector_lengths):.2f}")
128 | 
129 |     fig_vector_len = px.histogram(vector_lengths, nbins=20, title="Vector Length Distribution")
130 |     st.plotly_chart(fig_vector_len)
131 | else:
132 |     st.warning("Not enough data for vector statistics.")
133 | 
134 | # Similarity Search
135 | st.subheader("Similarity Search")
136 | if len(vectors) > 0:
137 |     if st.button("Perform Random Similarity Search"):
138 |         # Select a random vector
139 |         random_index = np.random.randint(0, len(vectors))
140 |         query_vector = vectors[random_index]
141 |         
142 |         # Perform similarity search
143 |         search_results = qdrant_client.search(
144 |             collection_name="situational_awareness",
145 |             query_vector=query_vector.tolist(),
146 |             limit=min(5, len(vectors))
147 |         )
148 |         
149 |         st.write("Query chunk:")
150 |         st.write(chunks[random_index])
151 |         
152 |         st.write("Similar chunks:")
153 |         for result in search_results:
154 |             st.write(f"Chunk: {result.payload['chunk']}")
155 |             st.write(f"Similarity: {result.score:.4f}")
156 |             st.write("---")
157 | else:
158 |     st.warning("Not enough data for similarity search.")
159 | 


--------------------------------------------------------------------------------
/self-correction/Readme.md:
--------------------------------------------------------------------------------
  1 | # Reasoning Enhancement System
  2 | 
  3 | ## Overview
  4 | The Reasoning Enhancement System is an AI-powered question-answering pipeline that utilizes multiple language models to produce high-quality responses to complex questions. The system combines DeepSeek Reasoner's step-by-step reasoning capabilities with OpenAI's models for verification and refinement.
  5 | 
  6 | ## Features
  7 | - Step-by-step reasoning for complex questions across multiple domains
  8 | - Self-correction mechanism to identify and fix logical errors
  9 | - Final response generation that is clear and concise
 10 | - Support for a variety of question types:
 11 |   - Complex mathematical reasoning
 12 |   - Logical reasoning and paradoxes
 13 |   - Systems thinking
 14 |   - Ethical dilemmas
 15 | 
 16 | ## Architecture
 17 | The system follows a three-stage pipeline:
 18 | 1. **Initial Reasoning**: DeepSeek Reasoner generates detailed step-by-step reasoning
 19 | 2. **Self-Correction**: GPT-4o Mini reviews the reasoning for errors and inconsistencies
 20 | 3. **Final Response**: GPT-4o Mini synthesizes a clear, concise answer based on the corrected reasoning
 21 | 
 22 | ```mermaid
 23 | flowchart TD
 24 |     A[Load Questions] --> B[Initialize Clients]
 25 |     B --> C{Process Each Question}
 26 |     C --> D[Generate Initial Reasoning]
 27 |     D --> E[Self-Correct Reasoning]
 28 |     E --> F[Generate Final Response]
 29 |     F --> G[Store Results]
 30 |     C --> H[Next Question]
 31 |     H --> |More Questions| C
 32 |     H --> |All Done| I[Save to CSV]
 33 |     
 34 |     subgraph "DeepSeek API"
 35 |     D
 36 |     end
 37 |     
 38 |     subgraph "OpenAI API"
 39 |     E
 40 |     F
 41 |     end
 42 |     
 43 |     classDef api fill:#f9f,stroke:#333,stroke-width:2px
 44 |     classDef storage fill:#bbf,stroke:#333,stroke-width:2px
 45 |     classDef process fill:#dfd,stroke:#333,stroke-width:2px
 46 |     
 47 |     class D,E,F api
 48 |     class A,G,I storage
 49 |     class B,C,H process
 50 | ```
 51 | 
 52 | ## Flow Diagram
 53 | ```mermaid
 54 | sequenceDiagram
 55 |     participant Main as Main Process
 56 |     participant DSR as DeepSeek Reasoner
 57 |     participant GPT1 as GPT-4o Mini (Correction)
 58 |     participant GPT2 as GPT-4o Mini (Response)
 59 |     participant CSV as CSV File
 60 |     
 61 |     Main->>+DSR: generate_reasoning(question)
 62 |     Note over DSR: System prompt: "You are a helpful assistant that thinks step by step."
 63 |     DSR-->>-Main: Initial reasoning
 64 |     
 65 |     Main->>+GPT1: self_correct(initial_reasoning)
 66 |     Note over GPT1: Reviews for errors, inconsistencies, logical fallacies
 67 |     GPT1-->>-Main: Corrected reasoning
 68 |     
 69 |     Main->>+GPT2: generate_final_response(question, corrected_reasoning)
 70 |     Note over GPT2: Synthesizes clear, concise answer
 71 |     GPT2-->>-Main: Final response
 72 |     
 73 |     Main->>Main: Store results in memory
 74 |     
 75 |     Note over Main: Process repeats for each question
 76 |     
 77 |     Main->>CSV: Write all results
 78 | ```
 79 | 
 80 | ## Requirements
 81 | - Python 3.6+
 82 | - OpenAI API key
 83 | - DeepSeek API key
 84 | - Required Python packages:
 85 |   - openai
 86 |   - python-dotenv
 87 | 
 88 | ## Installation
 89 | 
 90 | 1. Clone the repository:
 91 | ```bash
 92 | git clone https://github.com/yourusername/reasoning-enhancement-system.git
 93 | cd reasoning-enhancement-system
 94 | ```
 95 | 
 96 | 2. Install the required packages:
 97 | ```bash
 98 | pip install -r requirements.txt
 99 | ```
100 | 
101 | 3. Create a `.env` file in the project root with your API keys:
102 | ```
103 | OPENAI_API_KEY=your_openai_api_key
104 | DEEPSEEK_API_KEY=your_deepseek_api_key
105 | ```
106 | 
107 | ## Usage
108 | 
109 | Run the main script:
110 | ```bash
111 | python main.py
112 | ```
113 | 
114 | The script will:
115 | - Process each question in the predefined list
116 | - Generate reasoning, corrections, and final responses
117 | - Print the results to the console
118 | - Save all results to a CSV file named `enhanced_qa_results.csv`
119 | 
120 | ## Customization
121 | 
122 | To customize the questions or expand the system's capabilities:
123 | 
124 | 1. Modify the `questions` list in the main script
125 | 2. Adjust model parameters like temperature and max_tokens for different outputs
126 | 3. Extend the pipeline with additional processing steps
127 | 
128 | ## Output
129 | 
130 | The system outputs:
131 | - Console logs showing each question and its final response
132 | - A CSV file containing:
133 |   - Question ID
134 |   - Question text
135 |   - Initial reasoning from DeepSeek
136 |   - Corrected reasoning
137 |   - Final response
138 | 
139 | ## Performance Considerations
140 | 
141 | - API rate limits may affect processing speed for large batches
142 | - Consider costs associated with model usage, especially for longer reasoning chains
143 | - For production use, add error handling and retry mechanisms
144 | 
145 | ## Future Enhancements
146 | 
147 | - Web interface for interactive querying
148 | - Support for additional models and providers
149 | - Benchmarking against human expert answers
150 | - Domain-specific reasoning templates
151 | 


--------------------------------------------------------------------------------
/self-correction/app.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI
  2 | from dotenv import load_dotenv
  3 | import csv
  4 | import os
  5 | 
  6 | load_dotenv()
  7 | 
  8 | questions = [
  9 |     
 10 |     # Complex mathematical reasoning
 11 |     "If a sphere has a radius of 4 cm, and we increase its volume by 30%, what is the new radius?",
 12 |     "In how many ways can 8 people be seated at a round table, considering that seating arrangements are considered the same if one can be obtained from the other by rotation?",
 13 |     "Solve the differential equation: dy/dx + 2xy = x, with the initial condition y(0) = 1",
 14 |     
 15 |     # Logical reasoning and paradoxes
 16 |     "Explain the Ship of Theseus paradox and its philosophical implications",
 17 |     "Analyze the validity of the following argument: All mammals are warm-blooded. All whales are mammals. Therefore, all warm-blooded animals are whales.",
 18 |     
 19 |     # Systems thinking questions
 20 |     "How might a carbon tax affect different sectors of the economy over short and long timeframes?",
 21 |     "What would be the ecological consequences if all honeybees went extinct?",
 22 |     
 23 |     # Ethical dilemmas
 24 |     "Analyze the trolley problem from utilitarian and deontological perspectives",
 25 |     "How should society balance individual privacy rights against public health monitoring during a pandemic?"
 26 | ]
 27 | 
 28 | def generate_reasoning(client, question):
 29 |     print("Starting")
 30 |     response = client.chat.completions.create(
 31 |         model="deepseek-reasoner",
 32 |         messages=[
 33 |             {"role": "system", "content": "You are a helpful assistant that thinks step by step."},
 34 |             {"role": "user", "content": question},
 35 |         ],
 36 |         stream=False,
 37 |         max_tokens=5000
 38 |     )
 39 |     print("Response",response)
 40 |     return response.choices[0].message.reasoning_content
 41 | 
 42 | def self_correct(openai_client, reasoning):
 43 |     correction_prompt = f"""
 44 |     Here is a detailed reasoning process about a question:
 45 |     
 46 |     {reasoning}
 47 |     
 48 |     Please carefully review this reasoning for any errors, inconsistencies, or logical fallacies. If you find any issues:
 49 |     1. Identify the specific error or problematic step
 50 |     2. Explain why it's incorrect
 51 |     3. Provide a corrected version of that step
 52 |     4. Continue with the correct reasoning from that point
 53 |     
 54 |     If the reasoning is correct, confirm this and summarize the key insights.
 55 |     """
 56 |     
 57 |     response = openai_client.chat.completions.create(
 58 |         model="gpt-4o-mini",
 59 |         messages=[
 60 |             {"role": "system", "content": "You are a careful reviewer focused on accuracy."},
 61 |             {"role": "user", "content": correction_prompt},
 62 |         ],
 63 |         temperature=0.2,
 64 |         max_tokens=2000
 65 |     )
 66 |     
 67 |     return response.choices[0].message.content
 68 | 
 69 | def generate_final_response(openai_client, question, corrected_reasoning):
 70 |     response = openai_client.chat.completions.create(
 71 |         model="gpt-4o-mini",
 72 |         messages=[
 73 |             {"role": "system", "content": "You are a helpful assistant that provides accurate information."},
 74 |             {"role": "user", "content": question},
 75 |             {"role": "assistant", "content": f"Let me think about this carefully.\n\n{corrected_reasoning}"},
 76 |             {"role": "user", "content": "Based on this analysis, provide a clear and concise answer."}
 77 |         ],
 78 |         temperature=0.3,
 79 |         max_tokens=1000
 80 |     )
 81 |     
 82 |     return response.choices[0].message.content
 83 | 
 84 | def main():
 85 |     deepseek_client = OpenAI(
 86 |         api_key=os.getenv("DEEPSEEK_API_KEY"),
 87 |         base_url="https://api.deepseek.com"
 88 |     )
 89 |     
 90 |     openai_client = OpenAI(
 91 |         api_key=os.getenv("OPENAI_API_KEY")
 92 |     )
 93 |     
 94 |     results = []
 95 |     for i, question in enumerate(questions, 1):
 96 |         # Get initial reasoning from DeepSeek
 97 |         initial_reasoning = generate_reasoning(deepseek_client, question)
 98 |         
 99 |         # Self-correction process with OpenAI
100 |         corrected_reasoning = self_correct(openai_client, initial_reasoning)
101 |         
102 |         # Generate final response
103 |         final_response = generate_final_response(openai_client, question, corrected_reasoning)
104 |         
105 |         print(f"Question {i}: {question}")
106 |         print(f"Final Response: {final_response}\n")
107 |         
108 |         results.append([i, question, initial_reasoning, corrected_reasoning, final_response])
109 |     
110 |     # Save results to CSV
111 |     with open('enhanced_qa_results.csv', 'w', newline='', encoding='utf-8') as file:
112 |         writer = csv.writer(file)
113 |         writer.writerow(['ID', 'Question', 'Initial Reasoning', 'Corrected Reasoning', 'Final Response'])
114 |         writer.writerows(results)
115 | 
116 | if __name__ == "__main__":
117 |     main()
118 | 


--------------------------------------------------------------------------------
/self-taught-reasoners/Readme.md:
--------------------------------------------------------------------------------
 1 | # SELF-TAUGHT Algorithm
 2 | 
 3 | SELF-TAUGHT is an innovative algorithm designed to enhance problem-solving capabilities of Large Language Models (LLMs) through self-generated, tailored demonstrations.
 4 | 
 5 | ## Algorithm Overview
 6 | 
 7 | 1. **Input**: A problem or question to be solved.
 8 | 
 9 | 2. **Phase I: Information Identification**
10 |    - Analyzes the input problem to identify key information and knowledge required for solving.
11 |    - Produces an abstract representation of the necessary information.
12 | 
13 | 3. **Phase II: Tailored Demonstration Creation**
14 |    - Generates pseudo-problems related to the identified information.
15 |    - Creates solutions for these pseudo-problems.
16 |    - Implements a confidence-based filtering mechanism to ensure high-quality demonstrations.
17 | 
18 | 4. **Phase III: Self-Directed Problem-Solving**
19 |    - Uses the generated demonstrations to guide the solution process for the original problem.
20 |    - Produces a final solution based on the insights from the tailored demonstrations.
21 | 
22 | ## Key Features
23 | 
24 | - **Self-Generation**: Creates its own examples and solutions, reducing reliance on pre-existing demonstrations.
25 | - **Tailored Approach**: Generates demonstrations specific to each input problem, ensuring relevance.
26 | - **Quality Control**: Employs a confidence scoring system to filter and select high-quality demonstrations.
27 | - **Flexibility**: Can be implemented with various types of LLMs and APIs.
28 | - **Zero-Shot Capability**: Operates without the need for task-specific training or fine-tuning.
29 | 
30 | ## How It Works
31 | 
32 | 1. The algorithm receives a problem as input.
33 | 2. It identifies the core information and knowledge areas relevant to the problem.
34 | 3. Based on this information, it generates a set of related pseudo-problems.
35 | 4. For each pseudo-problem, it creates a solution and assigns a confidence score.
36 | 5. High-confidence solutions are collected as demonstrations.
37 | 6. Using these tailored demonstrations, the algorithm then approaches the original problem.
38 | 7. Finally, it generates a solution to the original problem, leveraging insights from the demonstrations.
39 | 
40 | ## Benefits
41 | 
42 | - Enhances problem-solving capabilities of LLMs across various domains.
43 | - Reduces the need for manually crafted demonstrations or extensive datasets.
44 | - Adapts to each unique problem, potentially improving accuracy and relevance of solutions.
45 | - Can be applied to a wide range of question-answering and problem-solving tasks.
46 | 
47 | ## Potential Applications
48 | 
49 | - Complex question-answering systems
50 | - Tutoring and educational tools
51 | - Research assistance and hypothesis generation
52 | - Creative writing and ideation support
53 | - Technical problem-solving in specialized domains
54 | 
55 | ## Implementation Considerations
56 | 
57 | - Requires integration with a capable LLM or API.
58 | - Performance may vary based on the underlying language model's capabilities.
59 | - Proper prompt engineering is crucial for effective information identification and demonstration generation.
60 | - Consider computational resources, as the algorithm involves multiple LLM calls per problem.
61 | 
62 | ## Architecture Diagram
63 | ```mermaid
64 | graph TD
65 |     A[Start] --> B[Input Problem]
66 |     B --> C[Phase I: Information Identification]
67 |     C --> D[Phase II: Tailored Demonstration Creation]
68 |     D --> E{Generate Pseudo Problem}
69 |     E --> F{Generate Pseudo Solution}
70 |     F --> G{Check Confidence}
71 |     G -->|Confidence >= Threshold| H[Add to Demonstrations]
72 |     G -->|Confidence < Threshold| I{Max Attempts Reached?}
73 |     I -->|No| F
74 |     I -->|Yes| J[Use Best Available Solution]
75 |     H --> K{Enough Demonstrations?}
76 |     J --> K
77 |     K -->|No| E
78 |     K -->|Yes| L[Phase III: Self-Directed Problem-Solving]
79 |     L --> M[Generate Final Solution]
80 |     M --> N[End]
81 | 
82 |     subgraph "Language Model API"
83 |         O[LLM]
84 |     end
85 | 
86 |     C -.-> O
87 |     E -.-> O
88 |     F -.-> O
89 |     L -.-> O
90 | ```
91 | 


--------------------------------------------------------------------------------
/self-taught-reasoners/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from openai import OpenAI
  4 | from dotenv import load_dotenv
  5 | from tenacity import retry, stop_after_attempt, wait_random_exponential
  6 | 
  7 | load_dotenv()
  8 | 
  9 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 10 | 
 11 | 
 12 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 13 | def completion_with_backoff(**kwargs):
 14 |     return client.chat.completions.create(**kwargs)
 15 | 
 16 | class DirectResponse:
 17 |    
 18 |     def __init__(self, model="gpt-4o-mini", temperature=0.7):
 19 |         self.model = model
 20 |         self.temperature = temperature
 21 | 
 22 |     def call_openai(self, prompt):
 23 |         response = completion_with_backoff(
 24 |             model=self.model,
 25 |             messages=[{"role": "user", "content": prompt}],
 26 |             temperature=self.temperature,
 27 |         )
 28 |         return response.choices[0].message.content
 29 |     
 30 | class SelfTaught:
 31 |     def __init__(self, model="gpt-4o-mini", temperature=0.7):
 32 |         self.model = model
 33 |         self.temperature = temperature
 34 | 
 35 |     def call_openai(self, prompt):
 36 |         response = completion_with_backoff(
 37 |             model=self.model,
 38 |             messages=[{"role": "user", "content": prompt}],
 39 |             temperature=self.temperature,
 40 |         )
 41 |         return response.choices[0].message.content
 42 | 
 43 |     def identify_information(self, problem):
 44 |         prompt = f"""[QUESTION]
 45 |                 {problem}
 46 | 
 47 |             List the necessary information that one must know for solving the above question. Your response should be in an abstractive manner."""
 48 |         print("Problem",problem)
 49 |         return self.call_openai(prompt)
 50 | 
 51 |     def generate_pseudo_problem(self, problem, information):
 52 |         prompt = f"""[QUESTION]
 53 |                     {problem}
 54 | 
 55 |                     [INFORMATION]
 56 |                     {information}
 57 | 
 58 |                     Based on the above information, generate a new question that addresses similar information/knowledge with higher relevance and no ambiguity."""
 59 |         return self.call_openai(prompt)
 60 | 
 61 |     def generate_pseudo_solution(self, pseudo_problem):
 62 |         prompt = f"""[QUESTION]
 63 |             {pseudo_problem}
 64 | 
 65 |         Solve this question step by step. At the end, provide your confidence level (0-100) for your answer."""
 66 |         solution = self.call_openai(prompt)
 67 | 
 68 |         # Use regex to find the confidence score
 69 |         match = re.search(r"confidence level.*?(\d+)", solution, re.IGNORECASE)
 70 |         if match:
 71 |             confidence = int(match.group(1))
 72 |         else:
 73 |             # If no confidence score is found, assume a low score
 74 |             confidence = 0
 75 | 
 76 |         return solution, confidence
 77 | 
 78 |     def solve_problem(self, problem, demonstrations):
 79 |         prompt = f"""[QUESTION]
 80 |         {problem}
 81 | 
 82 |         Here are some relevant examples:
 83 | 
 84 |         {demonstrations}
 85 | 
 86 |         Now, solve the original question step by step."""
 87 |         return self.call_openai(prompt)
 88 | 
 89 |     def run(
 90 |         self, problem, num_demonstrations=3, confidence_threshold=90, max_attempts=5
 91 |     ):
 92 |         information = self.identify_information(problem)
 93 |         demonstrations = []
 94 | 
 95 |         for index in range(num_demonstrations):
 96 |             pseudo_problem = self.generate_pseudo_problem(problem, information)
 97 | 
 98 |             print("Index of Psuedo Problem",index)
 99 |             print("Pseudo Problem", pseudo_problem)
100 |             for _ in range(max_attempts):
101 |                 pseudo_solution, confidence = self.generate_pseudo_solution(
102 |                     pseudo_problem
103 |                 )
104 |                 
105 |                 print("Solution", pseudo_solution)
106 |                 
107 |                 print("Confidence",confidence)
108 |                 if confidence >= confidence_threshold:
109 |                     demonstrations.append(
110 |                         f"Problem: {pseudo_problem}\nSolution: {pseudo_solution}\n"
111 |                     )
112 |                     break
113 | 
114 |             if len(demonstrations) < _ + 1:
115 |                 demonstrations.append(
116 |                     f"Problem: {pseudo_problem}\nSolution: {pseudo_solution}\n"
117 |                 )
118 | 
119 |         return self.solve_problem(problem, "\n".join(demonstrations))
120 | 
121 | # Example usage
122 | if __name__ == "__main__":
123 |     problem = """You bought a limousine for $98,000 and are planning to rent it for weddings, ceremonies 
124 | and parties at $245 per hour. If you estimate the car will be hired for 2 hours a day on 
125 | average, with daily costs at about $50, what is the estimated yearly yield on your investment 
126 | if you work all year round, i.e. every day of the year, including any festivities and 
127 | weekends? A) 164% (B) 1.64% (C) 0.45% (D) 183% """
128 |     self_taught = SelfTaught()
129 |     solution = self_taught.run(problem)
130 |     
131 |     direct_response = DirectResponse()
132 |     print("Solution", direct_response.call_openai(problem))
133 |     # print(f"Problem: {problem}")
134 |     print(f"Solution: {solution}")
135 | 


--------------------------------------------------------------------------------