├── .dockerignore
├── .dvcignore
├── .editorconfig
├── .env.sample
├── .github
    ├── .stale.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── config.yml
    │   ├── feature_request.md
    │   └── question.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── dependabot.yml
    ├── release-drafter.yml
    └── workflows
    │   ├── build.yml
    │   ├── greetings.yml
    │   └── release-drafter.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── assets
    ├── PareaLogoLight.png
    └── images
    │   └── coverage.svg
├── cookbook
    ├── ab_testing.py
    ├── anthropic
    │   ├── tracing_anthropic.py
    │   ├── tracing_anthropic_tool_use.py
    │   ├── tracing_bedrock.py
    │   └── tracing_with_images_anthropic.py
    ├── assets
    │   ├── data
    │   │   ├── 2022-letter.txt
    │   │   ├── __init__.py
    │   │   ├── anthropic_tool_use_examples.py
    │   │   ├── openai_input_examples.py
    │   │   └── state_of_the_union.txt
    │   └── img
    │   │   ├── dashboard.png
    │   │   ├── dashboard_detailed_view.png
    │   │   ├── deployed_prompts.png
    │   │   ├── feedback.png
    │   │   ├── logs.png
    │   │   ├── meta_data.png
    │   │   └── trace_log_view.png
    ├── async_enpoints_for_datasets.py
    ├── cohere
    │   ├── trace_cohere.py
    │   ├── trace_cohere_tools.py
    │   └── tracing_with_cohere.py
    ├── dspy
    │   ├── dspy_examples.py
    │   ├── dspy_threading.py
    │   └── tracing_and_evaluation_tutorial.ipynb
    ├── endpoints_for_datasets.py
    ├── evals_and_experiments
    │   ├── RAG_experiment_with_auto_evals.py
    │   ├── async_experiments.py
    │   ├── deployed_prompt_and_dataset.py
    │   ├── deployed_prompt_dataset_and_eval.py
    │   ├── experiment_test_substeps.py
    │   ├── list_experiments.py
    │   ├── modify_dataset_before_experiment.py
    │   ├── parea_evaluation_deepdive.ipynb
    │   ├── route_llm_experiment.py
    │   ├── run_experiment.py
    │   ├── run_experiment_agreement_among_evals.py
    │   ├── run_experiment_balanced_acc.py
    │   ├── run_experiment_evas_with_reason.py
    │   └── run_experiment_using_saved_test_collection.py
    ├── fetch_logs.py
    ├── finetuning
    │   └── download_as_jsonl.py
    ├── guidance
    │   └── tracing_guidance.py
    ├── instructor
    │   ├── dynamic_few_shot_injection_with_from_feedback.py
    │   ├── instructor_blog_example_simple.py
    │   ├── instructor_blog_example_validation_context.py
    │   ├── instructor_evals.py
    │   └── instructor_streaming.py
    ├── langchain
    │   ├── trace_class_call_method.py
    │   ├── trace_langchain_RAG_evals.py
    │   ├── trace_langchain_RAG_with_experiment.py
    │   ├── trace_langchain_anthropic_function_calling.py
    │   ├── trace_langchain_azure_RAG_with_experiment.py
    │   ├── trace_langchain_bedrock_rag.py
    │   ├── trace_langchain_inside_trace_decorator.py
    │   ├── trace_langchain_rag_agents.py
    │   ├── trace_langchain_rag_question_answering.py
    │   ├── trace_langchain_simple.py
    │   └── trace_langchain_with_deployed_prompt.py
    ├── marvin
    │   └── trace_marvin.py
    ├── openai
    │   ├── dynamic_few_shot_injection_with_evals.py
    │   ├── simple_experiment_with_openai.py
    │   ├── trace_class_call_method.py
    │   ├── tracing_and_evaluating_openai_endpoint.py
    │   ├── tracing_azure_open_ai.py
    │   ├── tracing_open_ai_streams.py
    │   ├── tracing_openai_assistant_endpoint.py
    │   ├── tracing_templated_llm_calls.py
    │   ├── tracing_tool_calling.py
    │   ├── tracing_with_images_open_ai.py
    │   ├── tracing_with_open_ai_endpoint_directly.py
    │   ├── tracing_with_openai_requests_api.py
    │   ├── tracing_with_openai_with_functions.py
    │   └── tracing_with_openai_with_structured_output.py
    ├── parea_llm_proxy
    │   ├── deployments
    │   │   ├── fetching_and_using_parea_deployments.py
    │   │   └── tracing_with_deployed_prompt.py
    │   ├── dynamic_few_shot_injection.py
    │   ├── tracing_with_Parea_sdk.ipynb
    │   ├── tracing_with_agent.py
    │   ├── tracing_with_function_calling_and_chains.ipynb
    │   ├── tracing_with_parea_streaming.py
    │   └── tracing_without_deployed_prompt.py
    ├── tracing_with_threading.py
    └── use_dataset_for_finetuning.py
├── cookiecutter-config-file.yml
├── parea
    ├── __init__.py
    ├── api_client.py
    ├── cache
    │   ├── __init__.py
    │   ├── cache.py
    │   └── in_memory.py
    ├── client.py
    ├── constants.py
    ├── evals
    │   ├── __init__.py
    │   ├── chat
    │   │   ├── __init__.py
    │   │   └── goal_success_ratio.py
    │   ├── dataset_level
    │   │   ├── __init__.py
    │   │   └── balanced_acc.py
    │   ├── general
    │   │   ├── __init__.py
    │   │   ├── answer_matches_target_llm_grader.py
    │   │   ├── answer_matches_target_recall.py
    │   │   ├── answer_relevancy.py
    │   │   ├── levenshtein.py
    │   │   ├── llm_grader.py
    │   │   ├── lm_vs_lm.py
    │   │   ├── self_check.py
    │   │   └── semantic_similarity.py
    │   ├── rag
    │   │   ├── __init__.py
    │   │   ├── answer_context_faithfulness_binary.py
    │   │   ├── answer_context_faithfulness_precision.py
    │   │   ├── answer_context_faithfulness_statement_level.py
    │   │   ├── context_has_answer.py
    │   │   ├── context_query_relevancy.py
    │   │   ├── context_ranking_listwise.py
    │   │   ├── context_ranking_pointwise.py
    │   │   └── percent_target_supported_by_context.py
    │   ├── summary
    │   │   ├── __init__.py
    │   │   ├── factual_inconsistency_binary.py
    │   │   ├── factual_inconsistency_scale.py
    │   │   └── likert_scale.py
    │   └── utils.py
    ├── experiment
    │   ├── __init__.py
    │   ├── cli.py
    │   ├── datasets.py
    │   ├── dvc.py
    │   └── experiment.py
    ├── helpers.py
    ├── parea_logger.py
    ├── schemas
    │   ├── __init__.py
    │   ├── log.py
    │   └── models.py
    ├── types.py
    ├── utils
    │   ├── __init__.py
    │   ├── trace_integrations
    │   │   ├── dspy.py
    │   │   ├── instructor.py
    │   │   ├── langchain.py
    │   │   ├── langchain_utils.py
    │   │   └── wrapt_utils.py
    │   ├── trace_utils.py
    │   └── universal_encoder.py
    └── wrapper
    │   ├── __init__.py
    │   ├── anthropic
    │       ├── __init__.py
    │       ├── anthropic.py
    │       └── stream_wrapper.py
    │   ├── cohere
    │       ├── helpers.py
    │       └── wrap_cohere.py
    │   ├── openai
    │       ├── __init__.py
    │       └── openai.py
    │   ├── openai_beta_wrapper.py
    │   ├── openai_raw_api_tracer.py
    │   ├── utils.py
    │   └── wrapper.py
├── poetry.lock
├── pyproject.toml
├── setup.cfg
└── tests
    ├── test_import.py
    └── test_test_case_collection.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git
 2 | .git
 3 | .gitignore
 4 | .github
 5 | 
 6 | # Docker
 7 | .dockerignore
 8 | 
 9 | # IDE
10 | .idea
11 | .vscode
12 | 
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | **/__pycache__/
16 | *.pyc
17 | *.pyo
18 | *.pyd
19 | .Python
20 | *.py[cod]
21 | *$py.class
22 | .pytest_cache/
23 | ..mypy_cache/
24 | 
25 | # poetry
26 | .venv
27 | 
28 | # C extensions
29 | *.so
30 | 
31 | # Virtual environment
32 | .venv
33 | venv
34 | 
35 | .DS_Store
36 | .AppleDouble
37 | .LSOverride
38 | ._*
39 | /LocalREADME.md
40 | 


--------------------------------------------------------------------------------
/.dvcignore:
--------------------------------------------------------------------------------
1 | # Add patterns of files dvc should ignore, which could improve
2 | # the performance. Learn more at
3 | # https://dvc.org/doc/user-guide/dvcignore
4 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # Check http://editorconfig.org for more information
 2 | # This is the main config file for this project:
 3 | root = true
 4 | 
 5 | [*]
 6 | charset = utf-8
 7 | end_of_line = lf
 8 | insert_final_newline = true
 9 | indent_style = space
10 | indent_size = 2
11 | trim_trailing_whitespace = true
12 | 
13 | [*.{py, pyi}]
14 | indent_style = space
15 | indent_size = 4
16 | 
17 | [Makefile]
18 | indent_style = tab
19 | 
20 | [*.md]
21 | trim_trailing_whitespace = false
22 | 
23 | [*.{diff,patch}]
24 | trim_trailing_whitespace = false
25 | 


--------------------------------------------------------------------------------
/.env.sample:
--------------------------------------------------------------------------------
1 | API_KEY=<key>
2 | 


--------------------------------------------------------------------------------
/.github/.stale.yml:
--------------------------------------------------------------------------------
 1 | # Number of days of inactivity before an issue becomes stale
 2 | daysUntilStale: 60
 3 | # Number of days of inactivity before a stale issue is closed
 4 | daysUntilClose: 7
 5 | # Issues with these labels will never be considered stale
 6 | exemptLabels:
 7 |   - pinned
 8 |   - security
 9 | # Label to use when marking an issue as stale
10 | staleLabel: wontfix
11 | # Comment to post when marking an issue as stale. Set to `false` to disable
12 | markComment: >
13 |   This issue has been automatically marked as stale because it has not had
14 |   recent activity. It will be closed if no further activity occurs. Thank you
15 |   for your contributions.
16 | # Comment to post when closing a stale issue. Set to `false` to disable
17 | closeComment: false
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🐛 Bug report
 3 | about: If something isn't working 🔧
 4 | title: ''
 5 | labels: bug
 6 | assignees:
 7 | ---
 8 | 
 9 | ## 🐛 Bug Report
10 | 
11 | <!-- A clear and concise description of what the bug is. -->
12 | 
13 | ## 🔬 How To Reproduce
14 | 
15 | Steps to reproduce the behavior:
16 | 
17 | 1. ...
18 | 
19 | ### Code sample
20 | 
21 | <!-- If applicable, attach a minimal code sample to reproduce the decried issue. -->
22 | 
23 | ### Environment
24 | 
25 | * OS: [e.g. Linux / Windows / macOS]
26 | * Python version, get it with:
27 | 
28 | ```bash
29 | python --version
30 | ```
31 | 
32 | ### Screenshots
33 | 
34 | <!-- If applicable, add screenshots to help explain your problem. -->
35 | 
36 | ## 📈 Expected behavior
37 | 
38 | <!-- A clear and concise description of what you expected to happen. -->
39 | 
40 | ## 📎 Additional context
41 | 
42 | <!-- Add any other context about the problem here. -->
43 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | # Configuration: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository
2 | 
3 | blank_issues_enabled: false
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🚀 Feature request
 3 | about: Suggest an idea for this project 🏖
 4 | title: ''
 5 | labels: enhancement
 6 | assignees:
 7 | ---
 8 | 
 9 | ## 🚀 Feature Request
10 | 
11 | <!-- A clear and concise description of the feature proposal. -->
12 | 
13 | ## 🔈 Motivation
14 | 
15 | <!-- Please describe the motivation for this proposal. -->
16 | 
17 | ## 🛰 Alternatives
18 | 
19 | <!-- A clear and concise description of any alternative solutions or features you've considered. -->
20 | 
21 | ## 📎 Additional context
22 | 
23 | <!-- Add any other context or screenshots about the feature request here. -->
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: ❓ Question
 3 | about: Ask a question about this project 🎓
 4 | title: ''
 5 | labels: question
 6 | assignees:
 7 | ---
 8 | 
 9 | ## Checklist
10 | 
11 | <!-- Mark with an `x` all the checkboxes that apply (like `[x]`) -->
12 | 
13 | - [ ] I've searched the project's [`issues`](https://github.com/parea-ai/parea-sdk/issues?q=is%3Aissue).
14 | 
15 | ## ❓ Question
16 | 
17 | <!-- What is your question -->
18 | 
19 | How can I [...]?
20 | 
21 | Is it possible to [...]?
22 | 
23 | ## 📎 Additional context
24 | 
25 | <!-- Add any other context or screenshots about the feature request here. -->
26 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Description
 2 | 
 3 | <!-- Add a more detailed description of the changes if needed. -->
 4 | 
 5 | ## Related Issue
 6 | 
 7 | <!-- If your PR refers to a related issue, link it here. -->
 8 | 
 9 | ## Type of Change
10 | 
11 | <!-- Mark with an `x` all the checkboxes that apply (like `[x]`) -->
12 | 
13 | - [ ] 📚 Examples / docs / tutorials / dependencies update
14 | - [ ] 🔧 Bug fix (non-breaking change which fixes an issue)
15 | - [ ] 🥂 Improvement (non-breaking change which improves an existing feature)
16 | - [ ] 🚀 New feature (non-breaking change which adds functionality)
17 | - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to change)
18 | - [ ] 🔐 Security fix
19 | - [ ] 🆙 Version bump
20 | 
21 | ## Checklist
22 | 
23 | <!-- Mark with an `x` all the checkboxes that apply (like `[x]`) -->
24 | 
25 | - [ ] I've read the [`CODE_OF_CONDUCT.md`](https://github.com/parea-ai/parea-sdk/blob/master/CODE_OF_CONDUCT.md)
26 |   document.
27 | - [ ] I've read the [`CONTRIBUTING.md`](https://github.com/parea-ai/parea-sdk/blob/master/CONTRIBUTING.md) guide.
28 | - [ ] I've updated the code style using `make codestyle`.
29 | - [ ] I've written tests for all new methods and classes that I created.
30 | - [ ] I've written the docstring in Google format for all the methods and classes that I used.
31 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Configuration: https://dependabot.com/docs/config-file/
 2 | # Docs: https://docs.github.com/en/github/administering-a-repository/keeping-your-dependencies-updated-automatically
 3 | 
 4 | version: 2
 5 | 
 6 | updates:
 7 |   - package-ecosystem: "pip"
 8 |     directory: "/"
 9 |     schedule:
10 |       interval: "daily"
11 |     allow:
12 |       - dependency-type: "all"
13 |     commit-message:
14 |       prefix: ":arrow_up:"
15 |     open-pull-requests-limit: 50
16 | 
17 |   - package-ecosystem: "github-actions"
18 |     directory: "/"
19 |     schedule:
20 |       interval: "daily"
21 |     allow:
22 |       - dependency-type: "all"
23 |     commit-message:
24 |       prefix: ":arrow_up:"
25 |     open-pull-requests-limit: 50
26 | 
27 |   - package-ecosystem: "docker"
28 |     directory: "/docker"
29 |     schedule:
30 |       interval: "weekly"
31 |     allow:
32 |       - dependency-type: "all"
33 |     commit-message:
34 |       prefix: ":arrow_up:"
35 |     open-pull-requests-limit: 50
36 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | # Release drafter configuration https://github.com/release-drafter/release-drafter#configuration
 2 | # Emojis were chosen to match the https://gitmoji.carloscuesta.me/
 3 | 
 4 | name-template: "v$NEXT_PATCH_VERSION"
 5 | tag-template: "v$NEXT_PATCH_VERSION"
 6 | 
 7 | categories:
 8 |   - title: ":rocket: Features"
 9 |     labels: [enhancement, feature]
10 |   - title: ":wrench: Fixes & Refactoring"
11 |     labels: [bug, refactoring, bugfix, fix]
12 |   - title: ":package: Build System & CI/CD"
13 |     labels: [build, ci, testing]
14 |   - title: ":boom: Breaking Changes"
15 |     labels: [breaking]
16 |   - title: ":pencil: Documentation"
17 |     labels: [documentation]
18 |   - title: ":arrow_up: Dependencies updates"
19 |     labels: [dependencies]
20 | 
21 | template: |
22 |   ## What’s Changed
23 | 
24 |   $CHANGES
25 | 
26 |   ## :busts_in_silhouette: List of contributors
27 | 
28 |   $CONTRIBUTORS
29 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on: [ push, pull_request ]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: [ "3.11" ]
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v4.2.2
14 |       - name: Set up Python ${{ matrix.python-version }}
15 |         uses: actions/setup-python@v5
16 |         with:
17 |           python-version: ${{ matrix.python-version }}
18 | 
19 |       - name: Install Poetry
20 |         uses: snok/install-poetry@v1
21 |         with:
22 |           virtualenvs-create: true
23 |           virtualenvs-in-project: true
24 |           virtualenvs-path: .venv
25 |           installer-parallel: true
26 | 
27 |       - name: Load cached venv
28 |         id: cached-poetry-dependencies
29 |         uses: actions/cache@v4
30 |         with:
31 |           path: .venv
32 |           key: venv-${{ matrix.python-version }}-${{ hashFiles('poetry.lock') }}
33 | 
34 |       - name: Install dependencies
35 |         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
36 |         run: poetry install --no-interaction --no-root
37 | 
38 |       - name: Run style checks
39 |         run: |
40 |           make check-codestyle
41 | 
42 |       - name: Run tests
43 |         run: |
44 |           make test
45 | 


--------------------------------------------------------------------------------
/.github/workflows/greetings.yml:
--------------------------------------------------------------------------------
 1 | name: Greetings
 2 | 
 3 | on: [pull_request, issues]
 4 | 
 5 | jobs:
 6 |   greeting:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/first-interaction@v1.3.0
10 |       with:
11 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
12 |         pr-message:  'Hello @${{ github.actor }}, thank you for submitting a PR! We will respond as soon as possible.'
13 |         issue-message: |
14 |           Hello @${{ github.actor }}, thank you for your interest in our work!
15 | 
16 |           If this is a bug report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you.
17 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     # branches to consider in the event; optional, defaults to all
 6 |     branches:
 7 |       - master
 8 | 
 9 | jobs:
10 |   update_release_draft:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       # Drafts your next Release notes as Pull Requests are merged into "master"
14 |       - uses: release-drafter/release-drafter@v6.0.0
15 |         env:
16 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
17 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   python: python3.9
 3 | 
 4 | default_stages: [commit, push]
 5 | 
 6 | repos:
 7 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v2.5.0
 9 |     hooks:
10 |       - id: check-yaml
11 |       - id: end-of-file-fixer
12 |         exclude: LICENSE
13 | 
14 |   - repo: local
15 |     hooks:
16 |       - id: pyupgrade
17 |         name: pyupgrade
18 |         entry: poetry run pyupgrade --py38-plus
19 |         types: [python]
20 |         language: system
21 | 
22 |   - repo: local
23 |     hooks:
24 |       - id: isort
25 |         name: isort
26 |         entry: poetry run isort --settings-path pyproject.toml
27 |         types: [python]
28 |         language: system
29 | 
30 |   - repo: local
31 |     hooks:
32 |       - id: black
33 |         name: black
34 |         entry: poetry run black --config pyproject.toml
35 |         types: [python]
36 |         language: system
37 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at joel@parea.ai. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | 
 3 | ## Dependencies
 4 | 
 5 | We use `poetry` to manage the [dependencies](https://github.com/python-poetry/poetry).
 6 | If you dont have `poetry`, you should install with `make poetry-download`.
 7 | 
 8 | To install dependencies and prepare [`pre-commit`](https://pre-commit.com/) hooks you would need to run `install` command:
 9 | 
10 | ```bash
11 | make install
12 | make pre-commit-install
13 | ```
14 | 
15 | To activate your `virtualenv` run `poetry shell`.
16 | 
17 | ## Codestyle
18 | 
19 | After installation you may execute code formatting.
20 | 
21 | ```bash
22 | make codestyle
23 | ```
24 | 
25 | ### Checks
26 | 
27 | Many checks are configured for this project. Command `make check-codestyle` will check black, isort and darglint.
28 | The `make check-safety` command will look at the security of your code.
29 | 
30 | Comand `make lint` applies all checks.
31 | 
32 | ### Before submitting
33 | 
34 | Before submitting your code please do the following steps:
35 | 
36 | 1. Add any changes you want
37 | 1. Add tests for the new changes
38 | 1. Edit documentation if you have changed something significant
39 | 1. Run `make codestyle` to format your changes.
40 | 1. Run `make lint` to ensure that types, security and docstrings are okay.
41 | 
42 | ## Other help
43 | 
44 | You can contribute by spreading a word about this library.
45 | It would also be a huge contribution to write
46 | a short article on how you are using this project.
47 | You can also share your best practices with us.
48 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | #* Variables
  2 | SHELL := /usr/bin/env bash
  3 | PYTHON := python3
  4 | PYTHONPATH := `pwd`
  5 | 
  6 | #* Docker variables
  7 | IMAGE := parea
  8 | VERSION := latest
  9 | 
 10 | #* Poetry
 11 | .PHONY: poetry-download
 12 | poetry-download:
 13 | 	curl -sSL https://install.python-poetry.org | $(PYTHON) -
 14 | 
 15 | .PHONY: poetry-remove
 16 | poetry-remove:
 17 | 	curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | $(PYTHON) - --uninstall
 18 | 
 19 | #* Installation
 20 | .PHONY: install
 21 | install:
 22 | 	poetry lock -n && poetry export --without-hashes > requirements.txt
 23 | 	poetry install -n
 24 | 	poetry run mypy --install-types --non-interactive ./
 25 | 
 26 | .PHONY: pre-commit-install
 27 | pre-commit-install:
 28 | 	poetry run pre-commit install
 29 | 
 30 | #* Formatters
 31 | .PHONY: codestyle
 32 | codestyle:
 33 | 	poetry run pyupgrade --exit-zero-even-if-changed --py38-plus **/*.py
 34 | 	poetry run isort --settings-path pyproject.toml ./
 35 | 	poetry run black --config pyproject.toml ./
 36 | 
 37 | .PHONY: formatting
 38 | formatting: codestyle
 39 | 
 40 | #* Linting
 41 | .PHONY: test
 42 | test:
 43 | 	PYTHONPATH=$(PYTHONPATH) poetry run pytest -c pyproject.toml --cov-report=html --cov=parea tests/
 44 | 
 45 | .PHONY: check-codestyle
 46 | check-codestyle:
 47 | 	poetry run isort --diff --check-only --settings-path pyproject.toml ./
 48 | 	poetry run black --diff --check --config pyproject.toml ./
 49 | 	poetry run darglint --verbosity 2 parea tests
 50 | 
 51 | .PHONY: mypy
 52 | mypy:
 53 | 	poetry run mypy --config-file pyproject.toml ./
 54 | 
 55 | #.PHONY: check-safety
 56 | # check-safety:
 57 | #	poetry check
 58 | #	poetry run safety check --full-report
 59 | #	poetry run bandit -ll --recursive parea tests
 60 | 
 61 | .PHONY: lint
 62 | lint: test check-codestyle mypy
 63 | 
 64 | .PHONY: update-dev-deps
 65 | update-dev-deps:
 66 | 	poetry add -D bandit@latest darglint@latest "isort[colors]@latest" mypy@latest pre-commit@latest pydocstyle@latest pylint@latest pytest@latest pyupgrade@latest safety@latest coverage@latest coverage-badge@latest pytest-html@latest pytest-cov@latest
 67 | 	poetry add -D --allow-prereleases black@latest
 68 | 
 69 | #* Docker
 70 | # Example: make docker-build VERSION=latest
 71 | # Example: make docker-build IMAGE=some_name VERSION=0.1.0
 72 | .PHONY: docker-build
 73 | docker-build:
 74 | 	@echo Building docker $(IMAGE):$(VERSION) ...
 75 | 	docker build \
 76 | 		-t $(IMAGE):$(VERSION) . \
 77 | 		-f ./docker/Dockerfile --no-cache
 78 | 
 79 | # Example: make docker-remove VERSION=latest
 80 | # Example: make docker-remove IMAGE=some_name VERSION=0.1.0
 81 | .PHONY: docker-remove
 82 | docker-remove:
 83 | 	@echo Removing docker $(IMAGE):$(VERSION) ...
 84 | 	docker rmi -f $(IMAGE):$(VERSION)
 85 | 
 86 | #* Cleaning
 87 | .PHONY: pycache-remove
 88 | pycache-remove:
 89 | 	find . | grep -E "(__pycache__|\.pyc|\.pyo$$)" | xargs rm -rf
 90 | 
 91 | .PHONY: dsstore-remove
 92 | dsstore-remove:
 93 | 	find . | grep -E ".DS_Store" | xargs rm -rf
 94 | 
 95 | .PHONY: mypycache-remove
 96 | mypycache-remove:
 97 | 	find . | grep -E ".mypy_cache" | xargs rm -rf
 98 | 
 99 | .PHONY: ipynbcheckpoints-remove
100 | ipynbcheckpoints-remove:
101 | 	find . | grep -E ".ipynb_checkpoints" | xargs rm -rf
102 | 
103 | .PHONY: pytestcache-remove
104 | pytestcache-remove:
105 | 	find . | grep -E ".pytest_cache" | xargs rm -rf
106 | 
107 | .PHONY: build-remove
108 | build-remove:
109 | 	rm -rf build/
110 | 
111 | .PHONY: cleanup
112 | cleanup: pycache-remove dsstore-remove mypycache-remove ipynbcheckpoints-remove pytestcache-remove
113 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security
 2 | 
 3 | ## 🔐 Reporting Security Issues
 4 | 
 5 | > Do not open issues that might have security implications!
 6 | > It is critical that security related issues are reported privately so we have time to address them before they become public knowledge.
 7 | 
 8 | Vulnerabilities can be reported by emailing core members:
 9 | 
10 | - parea-ai [joel@parea.ai](mailto:joel@parea.ai)
11 | 
12 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
13 | 
14 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
15 | - Full paths of source file(s) related to the manifestation of the issue
16 | - The location of the affected source code (tag/branch/commit or direct URL)
17 | - Any special configuration required to reproduce the issue
18 | - Environment (e.g. Linux / Windows / macOS)
19 | - Step-by-step instructions to reproduce the issue
20 | - Proof-of-concept or exploit code (if possible)
21 | - Impact of the issue, including how an attacker might exploit the issue
22 | 
23 | This information will help us triage your report more quickly.
24 | 
25 | ## Preferred Languages
26 | 
27 | We prefer all communications to be in English.
28 | 


--------------------------------------------------------------------------------
/assets/PareaLogoLight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/assets/PareaLogoLight.png


--------------------------------------------------------------------------------
/assets/images/coverage.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg xmlns="http://www.w3.org/2000/svg" width="99" height="20">
 3 |     <linearGradient id="b" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
 5 |         <stop offset="1" stop-opacity=".1"/>
 6 |     </linearGradient>
 7 |     <mask id="a">
 8 |         <rect width="99" height="20" rx="3" fill="#fff"/>
 9 |     </mask>
10 |     <g mask="url(#a)">
11 |         <path fill="#555" d="M0 0h63v20H0z"/>
12 |         <path fill="#a4a61d" d="M63 0h36v20H63z"/>
13 |         <path fill="url(#b)" d="M0 0h99v20H0z"/>
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
16 |         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
17 |         <text x="31.5" y="14">coverage</text>
18 |         <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
19 |         <text x="80" y="14">80%</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/cookbook/ab_testing.py:
--------------------------------------------------------------------------------
 1 | # checkout the associated tutorial at https://docs.parea.ai//tutorials/running-ab-tests/llm-generated-emails
 2 | 
 3 | from typing import Tuple
 4 | 
 5 | import os
 6 | import random
 7 | 
 8 | from openai import OpenAI
 9 | 
10 | from parea import Parea, get_current_trace_id, parea_logger, trace, trace_insert
11 | from parea.schemas import EvaluationResult, UpdateLog
12 | 
13 | client = OpenAI()
14 | # instantiate Parea client
15 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
16 | # wrap OpenAI client to trace calls
17 | p.wrap_openai_client(client)
18 | 
19 | 
20 | ab_test_name = "long-vs-short-emails"
21 | 
22 | 
23 | @trace  # decorator to trace functions with Parea
24 | def generate_email(user: str) -> Tuple[str, str, str]:
25 |     # randomly choose to generate a long or short email
26 |     if random.random() < 0.5:
27 |         variant = "variant_0"
28 |         prompt = f"Generate a long email for {user}"
29 |     else:
30 |         variant = "variant_1"
31 |         prompt = f"Generate a short email for {user}"
32 |     # tag the requests with the A/B test name & chosen variant
33 |     trace_insert(
34 |         {
35 |             "metadata": {
36 |                 "ab_test_name": ab_test_name,
37 |                 f"ab_test_{ab_test_name}": variant,
38 |             }
39 |         }
40 |     )
41 | 
42 |     email = (
43 |         client.chat.completions.create(
44 |             model="gpt-4o",
45 |             messages=[
46 |                 {
47 |                     "role": "user",
48 |                     "content": prompt,
49 |                 }
50 |             ],
51 |         )
52 |         .choices[0]
53 |         .message.content
54 |     )
55 |     # need to return in addition to the email, the trace_id and the chosen variant
56 |     return email, get_current_trace_id(), variant
57 | 
58 | 
59 | def capture_feedback(feedback: float, trace_id: str, ab_test_variant: str, user_corrected_email: str = None) -> None:
60 |     field_name_to_value_map = {
61 |         "scores": [EvaluationResult(name=f"ab_test_{ab_test_variant}", score=feedback, reason="any additional user feedback on why it's good/bad")],
62 |     }
63 |     if user_corrected_email:
64 |         field_name_to_value_map["target"] = user_corrected_email
65 | 
66 |     parea_logger.update_log(
67 |         UpdateLog(
68 |             trace_id=trace_id,
69 |             field_name_to_value_map=field_name_to_value_map,
70 |         )
71 |     )
72 | 
73 | 
74 | def main():
75 |     # generate email and get trace ID
76 |     email, trace_id, ab_test_variant = generate_email("Max Mustermann")
77 | 
78 |     # create a biased feedback for shorter emals
79 |     if ab_test_variant == "variant_1":
80 |         user_feedback = 0.0 if random.random() < 0.7 else 1.0
81 |     else:
82 |         user_feedback = 0.0 if random.random() < 0.3 else 1.0
83 | 
84 |     capture_feedback(user_feedback, trace_id, ab_test_variant, "Hi Max")
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     main()
89 | 


--------------------------------------------------------------------------------
/cookbook/anthropic/tracing_anthropic.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | import anthropic
 5 | from anthropic.types import ContentBlockDeltaEvent, MessageDeltaEvent, MessageStartEvent
 6 | from dotenv import load_dotenv
 7 | 
 8 | from parea import Parea
 9 | 
10 | load_dotenv()
11 | 
12 | 
13 | client = anthropic.Anthropic()
14 | aclient = anthropic.AsyncAnthropic()
15 | 
16 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
17 | p.wrap_anthropic_client(client)
18 | p.wrap_anthropic_client(aclient)
19 | 
20 | 
21 | client_kwargs = {"model": "claude-3-opus-20240229", "max_tokens": 1024, "messages": [{"role": "user", "content": "Hello, Claude"}]}
22 | 
23 | 
24 | def anthropic_sync():
25 |     message = client.messages.create(**client_kwargs)
26 |     print(message.content[0].text)
27 | 
28 | 
29 | def anthropic_stream():
30 |     message = client.messages.create(**client_kwargs, stream=True)
31 |     for event in message:
32 |         if isinstance(event, MessageStartEvent):
33 |             print(f"{event.type}: {event.message.usage.input_tokens}")
34 |         elif isinstance(event, ContentBlockDeltaEvent):
35 |             print(f"{event.type}: {event.delta.text}")
36 |         elif isinstance(event, MessageDeltaEvent):
37 |             print(f"{event.type}: {event.usage.output_tokens}")
38 |         else:
39 |             print(f"{event.type}: {event}")
40 | 
41 | 
42 | def anthropic_stream_context_manager():
43 |     with client.messages.stream(**client_kwargs) as stream:
44 |         for text in stream.text_stream:
45 |             print(text, end="", flush=True)
46 |         print()
47 |         message = stream.get_final_message()
48 |         print(message.model_dump_json(indent=2))
49 | 
50 | 
51 | async def async_anthropic():
52 |     message = await aclient.messages.create(**client_kwargs)
53 |     print(message.content[0].text)
54 | 
55 | 
56 | async def async_anthropic_stream():
57 |     message = await aclient.messages.create(**client_kwargs, stream=True)
58 |     async for event in message:
59 |         if isinstance(event, MessageStartEvent):
60 |             print(f"{event.type}: {event.message.usage.input_tokens}")
61 |         elif isinstance(event, ContentBlockDeltaEvent):
62 |             print(f"{event.type}: {event.delta.text}")
63 |         elif isinstance(event, MessageDeltaEvent):
64 |             print(f"{event.type}: {event.usage.output_tokens}")
65 |         else:
66 |             print(f"{event.type}: {event}")
67 | 
68 | 
69 | async def async_anthropic_stream_context_manager():
70 |     async with aclient.messages.stream(**client_kwargs) as stream:
71 |         async for text in stream.text_stream:
72 |             print(text, end="", flush=True)
73 |         print()
74 |         message = await stream.get_final_message()
75 |         print(message.model_dump_json(indent=2))
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     anthropic_sync()
80 |     anthropic_stream()
81 |     anthropic_stream_context_manager()
82 |     asyncio.run(async_anthropic())
83 |     asyncio.run(async_anthropic_stream())
84 |     asyncio.run(async_anthropic_stream_context_manager())
85 | 


--------------------------------------------------------------------------------
/cookbook/anthropic/tracing_anthropic_tool_use.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import anthropic
 4 | from dotenv import load_dotenv
 5 | 
 6 | from cookbook.assets.data.anthropic_tool_use_examples import missing_information, multiple_tool_use, single_tool_use
 7 | from parea import Parea
 8 | 
 9 | load_dotenv()
10 | 
11 | client = anthropic.Anthropic()
12 | aclient = anthropic.AsyncAnthropic()
13 | 
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
15 | p.wrap_anthropic_client(client)
16 | p.wrap_anthropic_client(aclient)
17 | 
18 | 
19 | def anthropic_sync(create_kwargs):
20 |     message = client.messages.create(**create_kwargs)
21 |     print(message.content)
22 | 
23 | 
24 | def anthropic_sync_stream(create_kwargs):
25 |     message = client.messages.create(stream=True, **create_kwargs)
26 |     for m in message:
27 |         print(m)
28 | 
29 | 
30 | async def async_anthropic(create_kwargs):
31 |     message = await aclient.messages.create(**create_kwargs)
32 |     print(message.content)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     anthropic_sync(single_tool_use)
37 |     anthropic_sync_stream(single_tool_use)
38 |     anthropic_sync(multiple_tool_use)
39 |     anthropic_sync(missing_information)
40 |     # asyncio.run(async_anthropic(single_tool_use))
41 |     # asyncio.run(async_anthropic(multiple_tool_use))
42 |     # asyncio.run(async_anthropic(missing_information))
43 | 


--------------------------------------------------------------------------------
/cookbook/anthropic/tracing_bedrock.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from anthropic import AnthropicBedrock
 4 | from dotenv import load_dotenv
 5 | 
 6 | from parea import Parea
 7 | 
 8 | load_dotenv()
 9 | 
10 | client = AnthropicBedrock(
11 |     # Authenticate by either providing the keys below or use the default AWS credential providers, such as
12 |     # using ~/.aws/credentials or the "AWS_SECRET_ACCESS_KEY" and "AWS_ACCESS_KEY_ID" environment variables.
13 |     aws_access_key="<access key>",
14 |     aws_secret_key="<secret key>",
15 |     # Temporary credentials can be used with aws_session_token.
16 |     # Read more at https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html.
17 |     aws_session_token="<session_token>",
18 |     # aws_region changes the aws region to which the request is made. By default, we read AWS_REGION,
19 |     # and if that's not present, we default to us-east-1. Note that we do not read ~/.aws/config for the region.
20 |     aws_region="us-west-2",
21 | )
22 | 
23 | 
24 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
25 | p.wrap_anthropic_client(client)
26 | 
27 | message = client.messages.create(model="anthropic.claude-3-5-sonnet-20240620-v1:0", max_tokens=256, messages=[{"role": "user", "content": "Hello, world"}])
28 | print(message.content)
29 | 


--------------------------------------------------------------------------------
/cookbook/anthropic/tracing_with_images_anthropic.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import base64
 4 | import json
 5 | import os
 6 | 
 7 | import requests
 8 | from anthropic import Anthropic
 9 | from dotenv import load_dotenv
10 | from openai import OpenAI
11 | 
12 | from parea import Parea, trace, trace_insert
13 | from parea.schemas import TraceLogImage
14 | 
15 | load_dotenv()
16 | 
17 | 
18 | oai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
19 | a_client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
20 | 
21 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
22 | p.wrap_openai_client(oai_client)
23 | p.wrap_anthropic_client(a_client)
24 | 
25 | 
26 | @trace
27 | def image_maker(query: str) -> str:
28 |     response = oai_client.images.generate(prompt=query, model="dall-e-3")
29 |     image_url = response.data[0].url
30 |     caption = {"original_prompt": query, "revised_prompt": response.data[0].revised_prompt}
31 |     trace_insert({"images": [TraceLogImage(url=image_url, caption=json.dumps(caption))]})
32 |     return image_url
33 | 
34 | 
35 | @trace
36 | def ask_vision(image_url: str) -> Optional[str]:
37 |     image_data = requests.get(image_url).content
38 |     base64_image = base64.b64encode(image_data).decode("utf-8")
39 | 
40 |     response = a_client.messages.create(
41 |         model="claude-3-haiku-20240307",
42 |         messages=[
43 |             {
44 |                 "role": "user",
45 |                 "content": [
46 |                     {
47 |                         "type": "image",
48 |                         "source": {
49 |                             "type": "base64",
50 |                             "media_type": "image/png",
51 |                             "data": base64_image,
52 |                         },
53 |                     },
54 |                     {"type": "text", "text": "What’s in this image?"},
55 |                 ],
56 |             }
57 |         ],
58 |         max_tokens=300,
59 |     )
60 |     return response.content[0].text
61 | 
62 | 
63 | @trace
64 | def main(query: str) -> str:
65 |     image_url = image_maker(query)
66 |     return ask_vision(image_url)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     result = main("A dog sitting comfortably on a chair")
71 |     print(result)
72 | 


--------------------------------------------------------------------------------
/cookbook/assets/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/data/__init__.py


--------------------------------------------------------------------------------
/cookbook/assets/data/anthropic_tool_use_examples.py:
--------------------------------------------------------------------------------
 1 | single_tool_use = {
 2 |     "model": "claude-3-opus-20240229",
 3 |     "max_tokens": 1024,
 4 |     "messages": [{"role": "user", "content": "What's the weather like in San Francisco?"}],
 5 |     "tools": [
 6 |         {
 7 |             "name": "get_weather",
 8 |             "description": "Get the current weather in a given location",
 9 |             "input_schema": {
10 |                 "type": "object",
11 |                 "properties": {
12 |                     "location": {
13 |                         "type": "string",
14 |                         "description": "The city and state, e.g. San Francisco, CA",
15 |                     }
16 |                 },
17 |                 "required": ["location"],
18 |             },
19 |         }
20 |     ],
21 | }
22 | 
23 | 
24 | multiple_tool_use = {
25 |     "model": "claude-3-opus-20240229",
26 |     "max_tokens": 1024,
27 |     "messages": [{"role": "user", "content": "What is the weather like right now in New York? Also what time is it there?"}],
28 |     "tools": [
29 |         {
30 |             "name": "get_weather",
31 |             "description": "Get the current weather in a given location",
32 |             "input_schema": {
33 |                 "type": "object",
34 |                 "properties": {
35 |                     "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
36 |                     "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit of temperature, either 'celsius' or 'fahrenheit'"},
37 |                 },
38 |                 "required": ["location"],
39 |             },
40 |         },
41 |         {
42 |             "name": "get_time",
43 |             "description": "Get the current time in a given time zone",
44 |             "input_schema": {
45 |                 "type": "object",
46 |                 "properties": {"timezone": {"type": "string", "description": "The IANA time zone name, e.g. America/Los_Angeles"}},
47 |                 "required": ["timezone"],
48 |             },
49 |         },
50 |     ],
51 | }
52 | 
53 | 
54 | missing_information = {
55 |     "model": "claude-3-opus-20240229",
56 |     "max_tokens": 1024,
57 |     "tools": [
58 |         {
59 |             "name": "get_weather",
60 |             "description": "Get the current weather in a given location",
61 |             "input_schema": {
62 |                 "type": "object",
63 |                 "properties": {
64 |                     "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
65 |                     "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": 'The unit of temperature, either "celsius" or "fahrenheit"'},
66 |                 },
67 |                 "required": ["location"],
68 |             },
69 |         }
70 |     ],
71 |     "messages": [
72 |         {"role": "user", "content": "What is the weather like in San Francisco?"},
73 |         {
74 |             "role": "assistant",
75 |             "content": [
76 |                 {"type": "text", "text": "<thinking>I need to use get_weather, and the user wants SF, which is likely San Francisco, CA.</thinking>"},
77 |                 {"type": "tool_use", "id": "toolu_01A09q90qw90lq917835lq9", "name": "get_weather", "input": {"location": "San Francisco, CA", "unit": "celsius"}},
78 |             ],
79 |         },
80 |         {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "toolu_01A09q90qw90lq917835lq9", "content": "65 degrees"}]},
81 |     ],
82 | }
83 | 


--------------------------------------------------------------------------------
/cookbook/assets/data/openai_input_examples.py:
--------------------------------------------------------------------------------
 1 | tool_calling_example = {
 2 |     "model": "gpt-3.5-turbo-0125",
 3 |     "messages": [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}],
 4 |     "tools": [
 5 |         {
 6 |             "type": "function",
 7 |             "function": {
 8 |                 "name": "get_current_weather",
 9 |                 "description": "Get the current weather in a given location",
10 |                 "parameters": {
11 |                     "type": "object",
12 |                     "properties": {
13 |                         "location": {
14 |                             "type": "string",
15 |                             "description": "The city and state, e.g. San Francisco, CA",
16 |                         },
17 |                         "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
18 |                     },
19 |                     "required": ["location"],
20 |                 },
21 |             },
22 |         }
23 |     ],
24 |     "tool_choice": "auto",
25 | }
26 | 
27 | functions_example = {
28 |     "model": "gpt-3.5-turbo-0125",
29 |     "messages": [
30 |         {
31 |             "role": "system",
32 |             "content": f"You are a sophisticated AI assistant, "
33 |             f"a specialist in user intent detection and interpretation. "
34 |             f"Your task is to perceive and respond to the user's needs, even when they're expressed "
35 |             f"in an indirect or direct manner. You excel in recognizing subtle cues: for example, "
36 |             f"if a user states they are 'hungry', you should assume they are seeking nearby dining "
37 |             f"options such as a restaurant or a cafe. If they indicate feeling 'tired', 'weary', "
38 |             f"or mention a long journey, interpret this as a request for accommodation options like "
39 |             f"hotels or guest houses. However, remember to navigate the fine line of interpretation "
40 |             f"and assumption: if a user's intent is unclear or can be interpreted in multiple ways, "
41 |             f"do not hesitate to politely ask for additional clarification. Make sure to tailor your "
42 |             f"responses to the user based on their preferences and past experiences which can "
43 |             f"be found here: Name: John Doe",
44 |         },
45 |         {"role": "user", "content": "I'm hungry"},
46 |     ],
47 |     "functions": [
48 |         {
49 |             "name": "call_google_places_api",
50 |             "description": f"""
51 |             This function calls the Google Places API to find the top places of a specified type near
52 |             a specific location. It can be used when a user expresses a need (e.g., feeling hungry or tired) or wants to
53 |             find a certain type of place (e.g., restaurant or hotel).
54 |         """,
55 |             "parameters": {"type": "object", "properties": {"place_type": {"type": "string", "description": "The type of place to search for."}}},
56 |             "result": {"type": "array", "items": {"type": "string"}},
57 |         }
58 |     ],
59 | }
60 | 
61 | simple_example = {
62 |     "model": "gpt-3.5-turbo-0125",
63 |     "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}],
64 | }
65 | 
66 | simple_example_json = {
67 |     "model": "gpt-3.5-turbo-0125",
68 |     "messages": [{"role": "system", "content": "You are a helpful assistant talking JSON."}, {"role": "user", "content": "Hello!"}],
69 |     "response_format": {"type": "json_object"},
70 | }
71 | 


--------------------------------------------------------------------------------
/cookbook/assets/img/dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/dashboard.png


--------------------------------------------------------------------------------
/cookbook/assets/img/dashboard_detailed_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/dashboard_detailed_view.png


--------------------------------------------------------------------------------
/cookbook/assets/img/deployed_prompts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/deployed_prompts.png


--------------------------------------------------------------------------------
/cookbook/assets/img/feedback.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/feedback.png


--------------------------------------------------------------------------------
/cookbook/assets/img/logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/logs.png


--------------------------------------------------------------------------------
/cookbook/assets/img/meta_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/meta_data.png


--------------------------------------------------------------------------------
/cookbook/assets/img/trace_log_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/trace_log_view.png


--------------------------------------------------------------------------------
/cookbook/async_enpoints_for_datasets.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from dotenv import load_dotenv
 5 | 
 6 | from parea import Parea
 7 | from parea.schemas import TestCase, TestCaseCollection, UpdateTestCase
 8 | 
 9 | load_dotenv()
10 | 
11 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
12 | 
13 | 
14 | data = [{"problem": "1+2", "target": 3, "tags": ["easy"]}, {"problem": "Solve the differential equation dy/dx = 3y.", "target": "y = c * e^(3x)", "tags": ["hard"]}]
15 | new_data = [{"problem": "Evaluate the integral ∫x^2 dx from 0 to 3.", "target": 9, "tags": ["hard"]}]
16 | 
17 | 
18 | async def update_test_case_example():
19 |     dataset: TestCaseCollection = await p.aget_collection("math_problems_v3")
20 |     test_cases: dict[int, TestCase] = dataset.test_cases
21 |     for test_case_id, test_case in test_cases.items():
22 |         if "easy" in test_case.tags:
23 |             # updated inputs must match the same k/v pair as original test case
24 |             await p.aupdate_test_case(
25 |                 dataset_id=dataset.id,
26 |                 test_case_id=test_case_id,
27 |                 update_request=UpdateTestCase(inputs={"problem": "Evaluate the integral ∫x^6 dx from 0 to 9."}, target="((1/7)x^7)+C", tags=["hard"]),
28 |             )
29 |             break
30 | 
31 | 
32 | async def main():
33 |     await p.acreate_test_collection(data, name="math_problems_v3")
34 |     await p.aadd_test_cases(new_data, dataset_id=182)
35 |     await update_test_case_example()
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     asyncio.run(main())
40 | 


--------------------------------------------------------------------------------
/cookbook/cohere/trace_cohere.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import cohere
 4 | from dotenv import load_dotenv
 5 | 
 6 | from parea import Parea
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | co = cohere.Client(api_key=os.getenv("COHERE_API_KEY"))
12 | p.wrap_cohere_client(co)
13 | 
14 | response = co.chat(
15 |     model="command-r-plus",
16 |     preamble="You are a helpful assistant talking in JSON.",
17 |     message="Generate a JSON describing a person, with the fields 'name' and 'age'",
18 |     response_format={"type": "json_object"},
19 | )
20 | print(response)
21 | print("\n\n")
22 | 
23 | response = co.chat(message="Who discovered gravity?")
24 | print(response)
25 | print("\n\n")
26 | #
27 | docs = [
28 |     "Carson City is the capital city of the American state of Nevada.",
29 |     "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
30 |     "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.",
31 |     "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
32 |     "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
33 | ]
34 | response = co.rerank(
35 |     model="rerank-english-v3.0",
36 |     query="What is the capital of the United States?",
37 |     documents=docs,
38 |     top_n=3,
39 | )
40 | print(response)
41 | print("\n\n")
42 | 
43 | 
44 | response = co.chat(
45 |     model="command-r-plus",
46 |     message="Where do the tallest penguins live?",
47 |     documents=[
48 |         {"title": "Tall penguins", "snippet": "Emperor penguins are the tallest."},
49 |         {"title": "Penguin habitats", "snippet": "Emperor penguins only live in Antarctica."},
50 |         {"title": "What are animals?", "snippet": "Animals are different from plants."},
51 |     ],
52 | )
53 | print(response)
54 | print("\n\n")
55 | 
56 | response = co.chat(model="command-r-plus", message="Who is more popular: Nsync or Backstreet Boys?", search_queries_only=True)
57 | print(response)
58 | print("\n\n")
59 | 
60 | response = co.chat(model="command-r-plus", message="Who is more popular: Nsync or Backstreet Boys?", connectors=[{"id": "web-search"}])
61 | print(response)
62 | print("\n\n")
63 | 
64 | for event in co.chat_stream(message="Who discovered gravity?"):
65 |     print(event)
66 | 


--------------------------------------------------------------------------------
/cookbook/cohere/tracing_with_cohere.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | import os
 4 | from datetime import datetime
 5 | 
 6 | import cohere
 7 | from dotenv import load_dotenv
 8 | 
 9 | from parea import Parea, trace, trace_insert
10 | 
11 | load_dotenv()
12 | 
13 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
14 | co = cohere.Client(api_key=os.getenv("COHERE_API_KEY"))
15 | p.wrap_cohere_client(co)
16 | 
17 | 
18 | def call_llm(message: str, chat_history: Optional[List[dict]] = None, system_message: str = "", model: str = "command-r-plus") -> str:
19 |     return co.chat(
20 |         model=model,
21 |         preamble=system_message,
22 |         chat_history=chat_history or [],
23 |         message=message,
24 |     ).text
25 | 
26 | 
27 | @trace
28 | def argumentor(query: str, additional_description: str = "") -> str:
29 |     return call_llm(
30 |         system_message=f"""You are a debater making an argument on a topic. {additional_description}.
31 |         The current time is {datetime.now().strftime("%Y-%m-%d")}""",
32 |         message=f"The discussion topic is {query}",
33 |     )
34 | 
35 | 
36 | @trace
37 | def critic(argument: str) -> str:
38 |     return call_llm(
39 |         system_message="""You are a critic.
40 |                 What unresolved questions or criticism do you have after reading the following argument?
41 |                 Provide a concise summary of your feedback.""",
42 |         message=argument,
43 |     )
44 | 
45 | 
46 | @trace
47 | def refiner(query: str, additional_description: str, argument: str, criticism: str) -> str:
48 |     return call_llm(
49 |         system_message=f"""You are a debater making an argument on a topic. {additional_description}.
50 |                 The current time is {datetime.now().strftime("%Y-%m-%d")}""",
51 |         chat_history=[{"role": "USER", "message": f"""The discussion topic is {query}"""}, {"role": "CHATBOT", "message": argument}, {"role": "USER", "message": criticism}],
52 |         message="Please generate a new argument that incorporates the feedback from the user.",
53 |     )
54 | 
55 | 
56 | @trace
57 | def argument_chain(query: str, additional_description: str = "") -> str:
58 |     trace_insert({"session_id": "cus_1234", "end_user_identifier": "user_1234"})
59 |     argument = argumentor(query, additional_description)
60 |     criticism = critic(argument)
61 |     refined_argument = refiner(query, additional_description, argument, criticism)
62 |     return refined_argument
63 | 
64 | 
65 | @trace(session_id="cus_1234", end_user_identifier="user_1234")
66 | def json_call() -> str:
67 |     completion = co.chat(
68 |         model="command-r-plus",
69 |         preamble="You are a helpful assistant talking in JSON.",
70 |         message="What are you?",
71 |         response_format={"type": "json_object"},
72 |     )
73 |     return completion.text
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     result = argument_chain(
78 |         "Whether sparkling wine is good for you.",
79 |         additional_description="Provide a concise, few sentence argument on why sparkling wine is good for you.",
80 |     )
81 |     print(result)
82 |     print(json_call())
83 | 


--------------------------------------------------------------------------------
/cookbook/dspy/dspy_threading.py:
--------------------------------------------------------------------------------
 1 | import contextvars
 2 | import os
 3 | from concurrent.futures import ThreadPoolExecutor
 4 | 
 5 | import dspy
 6 | from dotenv import load_dotenv
 7 | 
 8 | from parea import Parea
 9 | 
10 | load_dotenv()
11 | 
12 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
13 | p.trace_dspy()
14 | 
15 | gpt3_turbo = dspy.OpenAI(model="gpt-3.5-turbo-1106", max_tokens=300)
16 | dspy.configure(lm=gpt3_turbo)
17 | 
18 | 
19 | class QASignature(dspy.Signature):
20 |     question = dspy.InputField()
21 |     answer = dspy.OutputField()
22 | 
23 | 
24 | class EnsembleQA(dspy.Module):
25 |     def __init__(self):
26 |         super().__init__()
27 |         self.step1 = dspy.ChainOfThought(QASignature)
28 |         self.step2 = dspy.ChainOfThought(QASignature)
29 | 
30 |     def forward(self, question):
31 |         with ThreadPoolExecutor(max_workers=2) as executor:
32 |             context1 = contextvars.copy_context()
33 |             future1 = executor.submit(context1.run, self.step1, question=question)
34 |             context2 = contextvars.copy_context()
35 |             future2 = executor.submit(context2.run, self.step2, question=question + "?")
36 | 
37 |         answer1 = future1.result()
38 |         answer2 = future2.result()
39 | 
40 |         return dspy.Prediction(answer=f"{answer1}\n\n{answer2}")
41 | 
42 | 
43 | qa = EnsembleQA()
44 | response = qa("Who are you?")
45 | print(response.answer)
46 | 


--------------------------------------------------------------------------------
/cookbook/endpoints_for_datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea
 6 | from parea.schemas import TestCase, TestCaseCollection, UpdateTestCase
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | 
13 | data = [{"problem": "1+2", "target": 3, "tags": ["easy"]}, {"problem": "Solve the differential equation dy/dx = 3y.", "target": "y = c * e^(3x)", "tags": ["hard"]}]
14 | 
15 | # this will create a new dataset on Parea named "math_problems_v4".
16 | # The dataset will have one column named "problem", and two columns using the reserved names "target" and "tags".
17 | # when using this dataset the expected prompt template should have a placeholder for the varible problem.
18 | p.create_test_collection(data, name="math_problems_v4")
19 | 
20 | new_data = [{"problem": "Evaluate the integral ∫x^2 dx from 0 to 3.", "target": 9, "tags": ["hard"]}]
21 | # this will add the new test cases to the existing "math_problems_v4" dataset.
22 | # New test cases must have the same columns as the existing dataset.
23 | p.add_test_cases(new_data, name="math_problems_v4")
24 | # Or if you can use the dataset ID instead of the name
25 | # p.add_test_cases(new_data, dataset_id=121)
26 | 
27 | 
28 | def update_test_case_example():
29 |     dataset: TestCaseCollection = p.get_collection("math_problems_v4")
30 |     test_cases: dict[int, TestCase] = dataset.test_cases
31 |     for test_case_id, test_case in test_cases.items():
32 |         if "easy" in test_case.tags:
33 |             # updated inputs must match the same k/v pair as original test case
34 |             p.update_test_case(
35 |                 dataset_id=dataset.id,
36 |                 test_case_id=test_case_id,
37 |                 update_request=UpdateTestCase(inputs={"problem": "Evaluate the integral ∫x^6 dx from 0 to 9."}, target="((1/7)x^7)+C", tags=["hard"]),
38 |             )
39 |             break
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     update_test_case_example()
44 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/async_experiments.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import uuid
 4 | 
 5 | from dotenv import load_dotenv
 6 | 
 7 | from parea import Parea, trace
 8 | from parea.schemas import Completion, LLMInputs, Log, Message, ModelParams, Role
 9 | 
10 | load_dotenv()
11 | 
12 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
13 | 
14 | 
15 | DATA = [{"topic": "Python"}, {"topic": "Javascript"}, {"topic": "Water"}, {"topic": "Fire"}]
16 | models = ["gpt-4-turbo", "claude-3-haiku-20240307"]
17 | 
18 | 
19 | def eval_func(log: Log) -> float:
20 |     from random import random
21 | 
22 |     return random()
23 | 
24 | 
25 | def model_call_factory(model: str):
26 |     @trace(eval_funcs=[eval_func])
27 |     def func(topic: str) -> str:
28 |         return p.completion(
29 |             data=Completion(
30 |                 llm_configuration=LLMInputs(
31 |                     model=model,
32 |                     model_params=ModelParams(temp=1),
33 |                     messages=[Message(role=Role.user, content=f"Write a short haiku about {topic}")],
34 |                 )
35 |             )
36 |         ).content
37 | 
38 |     return func
39 | 
40 | 
41 | async def main():
42 |     await asyncio.gather(
43 |         *[p.experiment(name="Write-Haikus", data=DATA, func=model_call_factory(model), n_trials=4).arun(run_name=f"{model}-{str(uuid.uuid4())[:4]}") for model in models]
44 |     )
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     asyncio.run(main())
49 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/deployed_prompt_and_dataset.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from dotenv import load_dotenv
 5 | 
 6 | from parea import Parea, trace
 7 | from parea.evals import call_openai
 8 | from parea.schemas import Completion
 9 | from parea.schemas.log import EvaluationResult, Log
10 | 
11 | load_dotenv()
12 | 
13 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
14 | 
15 | 
16 | def eval_fun(log: Log) -> EvaluationResult:
17 |     # access the output and target from the log
18 |     # output, target = log.output, log.target
19 |     response: str = call_openai(
20 |         model="gpt-4o",
21 |         messages=[{"role": "system", "content": "Use JSON. provide a score and reason."}],  # <- CHANGE THIS
22 |         response_format={"type": "json_object"},
23 |         temperature=0.0,
24 |     )
25 |     response_dict = json.loads(response)
26 |     return EvaluationResult(name="YOUR_EVAL_NAME", score=response_dict["score"], reason=response_dict["reason"])
27 | 
28 | 
29 | @trace(eval_funcs=[eval_fun])
30 | def deployed_prompt(prompt_template_input: str) -> str:
31 |     return p.completion(Completion(deployment_id="YOUR_DEPLOYED_PROMPT_ID", llm_inputs={"prompt_template_input_name": prompt_template_input})).content
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     p.experiment(
36 |         name="some_experiment_name",
37 |         data=172,  # dataset Id from Parea, can also use dataset name if unique
38 |         func=deployed_prompt,
39 |     ).run()
40 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/deployed_prompt_dataset_and_eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea, trace
 6 | from parea.schemas import Completion
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | 
13 | @trace(eval_funcs_names=["YOUR_EVAL_NAME"])
14 | def deployed_prompt(prompt_template_input: str) -> str:
15 |     return p.completion(Completion(deployment_id="YOUR_DEPLOYED_PROMPT_ID", llm_inputs={"prompt_template_input_name": prompt_template_input})).content
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     p.experiment(
20 |         name="some_experiment_name",
21 |         data=172,  # dataset Id from Parea, can also use dataset name if unique
22 |         func=deployed_prompt,
23 |     ).run()
24 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/experiment_test_substeps.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import json
 4 | import os
 5 | 
 6 | from dotenv import load_dotenv
 7 | 
 8 | from parea import Parea, trace
 9 | from parea.evals.general.levenshtein import levenshtein_distance
10 | from parea.schemas import Log
11 | 
12 | load_dotenv()
13 | 
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
15 | 
16 | 
17 | # evaluation function for the substep
18 | def eval_choose_greeting(log: Log) -> Union[float, None]:
19 |     if not (target := log.target):
20 |         return None
21 | 
22 |     target_substep = json.loads(target)["substep"]  # log.target is a string
23 |     output = log.output
24 |     return levenshtein_distance(target_substep, output)
25 | 
26 | 
27 | # sub-step
28 | @trace(eval_funcs=[eval_choose_greeting])
29 | def choose_greeting(name: str) -> str:
30 |     return "Hello"
31 | 
32 | 
33 | # end-to-end evaluation function
34 | def eval_greet(log: Log) -> Union[float, None]:
35 |     if not (target := log.target):
36 |         return None
37 | 
38 |     target_overall = json.loads(target)["overall"]
39 |     output = log.output
40 |     return levenshtein_distance(target_overall, output)
41 | 
42 | 
43 | @trace(eval_funcs=[eval_greet])
44 | def greet(name: str) -> str:
45 |     greeting = choose_greeting(name)
46 |     return f"{greeting} {name}"
47 | 
48 | 
49 | data = [
50 |     {
51 |         "name": "Foo",
52 |         "target": {
53 |             "overall": "Hi Foo",
54 |             "substep": "Hi",
55 |         },
56 |     },
57 |     {
58 |         "name": "Bar",
59 |         "target": {
60 |             "overall": "Hello Bar",
61 |             "substep": "Hello",
62 |         },
63 |     },
64 | ]
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     p.experiment(
69 |         name="greeting",
70 |         data=data,
71 |         func=greet,
72 |     ).run(prefix="substep")
73 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/list_experiments.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea
 6 | from parea.schemas import ListExperimentUUIDsFilters
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | experiments = p.list_experiments(ListExperimentUUIDsFilters(experiment_name_filter="greeting"))
13 | print(f"Num. experiments: {len(experiments)}")
14 | trace_logs = p.get_experiment_trace_logs(experiments[0].uuid)
15 | print(f"Num. trace logs: {len(trace_logs)}")
16 | print(f"Trace log: {trace_logs[0]}")
17 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/modify_dataset_before_experiment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from openai import OpenAI
 5 | 
 6 | from parea import Parea, trace
 7 | from parea.evals.rag import context_query_relevancy_factory
 8 | from parea.schemas import TestCase
 9 | 
10 | load_dotenv()
11 | 
12 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
13 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
14 | p.wrap_openai_client(client)
15 | 
16 | context_query_relevancy = context_query_relevancy_factory(model="gpt-4o", context_fields=["context"])
17 | 
18 | 
19 | @trace(eval_funcs=[context_query_relevancy])
20 | def run_experiment(question: str, context: str) -> str:
21 |     return (
22 |         client.chat.completions.create(
23 |             model="gpt-4o",
24 |             temperature=0,
25 |             messages=[{"role": "user", "content": f"Answer question using context. Context: {context}. Question: {question}"}],
26 |         )
27 |         .choices[0]
28 |         .message.content
29 |     )
30 | 
31 | 
32 | # You can fetch a dataset directly and then modify it to meet our needs before passing it to p.experiment.
33 | def rename_information_to_context(num_samples: int = 3):
34 |     dataset = p.get_collection("Example_Dataset_Name")
35 |     if dataset:
36 |         testcases: list[TestCase] = list(dataset.test_cases.values())
37 |         # Assume dataset looks like this:
38 |         # [
39 |         #     inputs={"information": "Some long document", "question": "What is X?"}, target="X is Y" ...
40 |         # ]
41 |         return [{"context": case.inputs["information"], "question": case.inputs["question"], "target": case.target} for case in testcases[:num_samples]]
42 |     return []
43 | 
44 | 
45 | def main():
46 |     data = rename_information_to_context()
47 |     experiment = p.experiment("My_Experiment_Name", func=run_experiment, data=data)
48 |     experiment.run()
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/route_llm_experiment.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from dotenv import load_dotenv
 5 | from routellm.controller import Controller
 6 | 
 7 | from parea import Parea, trace, trace_insert
 8 | from parea.schemas import Completion, EvaluationResult, LLMInputs, Log, Message, ModelParams, Role
 9 | 
10 | load_dotenv()
11 | 
12 | os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
13 | os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
14 | 
15 | ROUTER = "mf"
16 | COST_THRESHOLD = 0.11593
17 | # This tells RouteLLM to use the MF router with a cost threshold of 0.11593
18 | RMODEL = f"router-{ROUTER}-{COST_THRESHOLD}"
19 | STRONG_MODEL = "gpt-4o"
20 | WEAK_MODEL = "groq/llama3-70b-8192"
21 | client = Controller(
22 |     routers=[ROUTER],
23 |     strong_model=STRONG_MODEL,
24 |     weak_model=WEAK_MODEL,
25 | )
26 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
27 | p.wrap_openai_client(client)
28 | 
29 | questions = [
30 |     {"question": "Write a function that takes a string as input and returns the string reversed."},
31 |     {"question": "Write a haiku about a sunset."},
32 |     {"question": "Write a cold email to a VP of Eng selling them on OpenAI's API."},
33 |     {"question": "What's the largest city in Germany?"},
34 | ]
35 | 
36 | 
37 | def llm_judge(log: Log) -> EvaluationResult:
38 |     try:
39 |         response = p.completion(
40 |             data=Completion(
41 |                 llm_configuration=LLMInputs(
42 |                     model="gpt-4o-mini",
43 |                     messages=[
44 |                         Message(
45 |                             role=Role.user,
46 |                             content=f"""[Instruction]\nPlease act as an impartial judge and evaluate the quality and
47 |                     correctness of the response provided. Be as objective as possible. Respond in JSON with two fields: \n
48 |                     \t 1. score: int = a number from a scale of 0 to 5; 5 being great and 0 being bad.\n
49 |                     \t 2. reason: str =  explain your reasoning for the selected score.\n\n
50 |                     This is this question asked: QUESTION:\n{log.inputs['question']}\n
51 |                     This is the response you are judging, RESPONSE:\n{log.output}\n\n""",
52 |                         )
53 |                     ],
54 |                     model_params=ModelParams(response_format={"type": "json_object"}),
55 |                 ),
56 |             )
57 |         )
58 |         r = json.loads(response.content)
59 |         return EvaluationResult(name="LLMJudge", score=int(r["score"]) / 5, reason=r["reason"])
60 |     except Exception as e:
61 |         return EvaluationResult(name="error-LLMJudge", score=0, reason=f"Error in grading: {e}")
62 | 
63 | 
64 | @trace(eval_funcs=[llm_judge])
65 | def answer_llm(question: str) -> str:
66 |     r = client.chat.completions.create(
67 |         model=RMODEL,
68 |         messages=[{"role": "user", "content": f"Answer this question: {question}\n"}],
69 |     )
70 |     trace_insert({"metadata": {"selected_model": r.model}})
71 |     return r.choices[0].message.content
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     p.experiment(
76 |         name="RouteLLM",
77 |         data=questions,
78 |         func=answer_llm,
79 |         metadata={
80 |             "router": ROUTER,
81 |             "cost_threshold": str(COST_THRESHOLD),
82 |             "strong_model": STRONG_MODEL,
83 |             "weak_model": WEAK_MODEL,
84 |         },
85 |     ).run()
86 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/run_experiment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea, trace
 6 | from parea.evals.general import levenshtein
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | 
13 | # annotate the function with the trace decorator and pass the evaluation function(s)
14 | @trace(eval_funcs=[levenshtein])
15 | def greeting(name: str) -> str:
16 |     return f"Hello {name}"
17 | 
18 | 
19 | data = [
20 |     {
21 |         "name": "Foo",
22 |         "target": "Hi Foo",
23 |     },
24 |     {
25 |         "name": "Bar",
26 |         "target": "Hello Bar",
27 |     },
28 | ]  # test data to run the experiment on (list of dicts)
29 | 
30 | 
31 | # # Define the experiment
32 | # # You can use the CLI command "parea experiment parea/cookbook/run_experiment.py" to execute this experiment
33 | # # or call `.run()`
34 | # # p.experiment(
35 | # #     data=data,  # Data to run the experiment on (list of dicts)
36 | # #     func=greeting,  # Function to run (callable)
37 | # #     n_trials=1,  # Number of times to run the experiment on the same data
38 | # # )
39 | 
40 | # You can optionally run the experiment manually by calling `.run()`
41 | if __name__ == "__main__":
42 |     p.experiment(
43 |         name="greeting",
44 |         data=data,
45 |         func=greeting,
46 |         n_trials=3,
47 |     ).run()
48 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/run_experiment_agreement_among_evals.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import os
 4 | import random
 5 | 
 6 | from dotenv import load_dotenv
 7 | 
 8 | from parea import Parea, trace
 9 | from parea.schemas import EvaluatedLog, EvaluationResult, Log
10 | 
11 | load_dotenv()
12 | 
13 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
14 | 
15 | 
16 | def random_eval_factory(trial: int):
17 |     def random_eval(log: Log) -> EvaluationResult:
18 |         return EvaluationResult(score=1 if random.random() < 0.5 else 0, name=f"random_eval_{trial}")
19 | 
20 |     return random_eval
21 | 
22 | 
23 | # apply random evaluation function twice
24 | @trace(eval_funcs=[random_eval_factory(1), random_eval_factory(2)])
25 | async def starts_with_f(name: str) -> str:
26 |     if name == "Foo":
27 |         return "1"
28 |     return "0"
29 | 
30 | 
31 | # dataset-level evaluation function which checks if both random evaluations agree
32 | def percent_evals_agree(logs: List[EvaluatedLog]) -> float:
33 |     correct = 0
34 |     total = 0
35 |     for log in logs:
36 |         if log.scores[0].score == log.scores[1].score:
37 |             correct += 1
38 |         total += 1
39 |     return correct / total
40 | 
41 | 
42 | data = [
43 |     {
44 |         "name": "Foo",
45 |         "target": "1",
46 |     },
47 |     {
48 |         "name": "Bar",
49 |         "target": "0",
50 |     },
51 |     {
52 |         "name": "Far",
53 |         "target": "1",
54 |     },
55 | ]  # test data to run the experiment on (list of dicts)
56 | 
57 | 
58 | # You can optionally run the experiment manually by calling `.run()`
59 | if __name__ == "__main__":
60 |     p.experiment(name="Greeting", data=data, func=starts_with_f, dataset_level_evals=[percent_evals_agree]).run()
61 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/run_experiment_balanced_acc.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import asyncio
 4 | import os
 5 | from collections import defaultdict
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | from parea import Parea, trace
10 | from parea.schemas import EvaluatedLog, Log
11 | 
12 | load_dotenv()
13 | 
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
15 | 
16 | 
17 | def is_correct(log: Log) -> bool:
18 |     return log.target == log.output
19 | 
20 | 
21 | def balanced_acc_is_correct(logs: List[EvaluatedLog]) -> float:
22 |     score_name = is_correct.__name__
23 | 
24 |     correct = defaultdict(int)
25 |     total = defaultdict(int)
26 |     for log in logs:
27 |         if (eval_result := log.get_score(score_name)) is not None:
28 |             correct[log.target] += int(eval_result.score)
29 |             total[log.target] += 1
30 |     recalls = [correct[key] / total[key] for key in correct]
31 | 
32 |     return sum(recalls) / len(recalls)
33 | 
34 | 
35 | # or use the pre-built `balanced_acc_factory` to create the function
36 | # from parea.evals.dataset_level import balanced_acc_factory
37 | #
38 | #
39 | # balanced_acc_is_correct = balanced_acc_factory(is_correct.__name__)
40 | 
41 | 
42 | @trace(eval_funcs=[is_correct])
43 | async def starts_with_f(name: str) -> str:
44 |     await asyncio.sleep(1)
45 |     if name == "Foo":
46 |         return "1"
47 |     return "0"
48 | 
49 | 
50 | data = [
51 |     {
52 |         "name": "Foo",
53 |         "target": "1",
54 |     },
55 |     {
56 |         "name": "Bar",
57 |         "target": "0",
58 |     },
59 |     {
60 |         "name": "Far",
61 |         "target": "1",
62 |     },
63 | ]  # test data to run the experiment on (list of dicts)
64 | 
65 | 
66 | # You can optionally run the experiment manually by calling `.run()`
67 | if __name__ == "__main__":
68 |     p.experiment(name="Greeting", data=data, func=starts_with_f, dataset_level_evals=[balanced_acc_is_correct], n_workers=2).run()
69 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/run_experiment_evas_with_reason.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea, trace
 6 | from parea.schemas.log import EvaluationResult, Log
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | 
13 | def eval_func_with_reason(log: Log) -> EvaluationResult:
14 |     if log.output == log.target:
15 |         return EvaluationResult(name="matches_target", score=1.0, reason="Output matches target")
16 |     elif "Hello" in log.target and "Hello" not in log.output:
17 |         return EvaluationResult(name="matches_target", score=0, reason="Output misses 'Hello'")
18 |     elif "Hi" in log.target and "Hi" not in log.output:
19 |         return EvaluationResult(name="matches_target", score=0, reason="Output misses 'Hi'")
20 |     else:
21 |         return EvaluationResult(name="matches_target", score=0, reason="Output does not match target")
22 | 
23 | 
24 | # annotate the function with the trace decorator and pass the evaluation function(s)
25 | @trace(eval_funcs=[eval_func_with_reason])
26 | def greeting(name: str) -> str:
27 |     return f"Hello {name}"
28 | 
29 | 
30 | data = [
31 |     {
32 |         "name": "Foo",
33 |         "target": "Hi Foo",
34 |     },
35 |     {
36 |         "name": "Bar",
37 |         "target": "Hello Bar",
38 |     },
39 | ]  # test data to run the experiment on (list of dicts)
40 | 
41 | 
42 | # You can optionally run the experiment manually by calling `.run()`
43 | if __name__ == "__main__":
44 |     p.experiment(
45 |         name="greeting",
46 |         data=data,
47 |         func=greeting,
48 |         n_trials=1,
49 |     ).run()
50 | 


--------------------------------------------------------------------------------
/cookbook/evals_and_experiments/run_experiment_using_saved_test_collection.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea, trace
 6 | from parea.schemas import Completion, LLMInputs, Log, Message, ModelParams, Role
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | 
13 | def eval_func(log: Log) -> float:
14 |     from random import random
15 |     from time import sleep
16 | 
17 |     sleep(random() * 10)
18 |     return random()
19 | 
20 | 
21 | # annotate the function with the trace decorator and pass the evaluation function(s)
22 | @trace(eval_funcs=[eval_func])
23 | def func(lang: str, framework: str) -> str:
24 |     return p.completion(
25 |         data=Completion(
26 |             llm_configuration=LLMInputs(
27 |                 model="gpt-3.5-turbo",
28 |                 model_params=ModelParams(temp=1),
29 |                 messages=[
30 |                     Message(role=Role.user, content=f"Write a hello world program in {lang} using {framework}"),
31 |                 ],
32 |             )
33 |         )
34 |     ).content
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     p.experiment(
39 |         name="Hello World Example",  # this is the name of the experiment
40 |         data="Hello World Example",  # this is the name of your Dataset in Parea (Dataset page)
41 |         func=func,
42 |     ).run()
43 | 
44 |     # Or use a dataset using its ID instead of the name
45 |     # p.experiment(
46 |     #     data=121,  # this is the id of your Dataset in Parea (Dataset page)
47 |     #     func=func,
48 |     # ).run(name="hello-world-example")
49 | 


--------------------------------------------------------------------------------
/cookbook/fetch_logs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea
 6 | from parea.schemas import FilterOperator, QueryParams
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | paginated_resp = p.get_trace_logs(QueryParams(project_name="default", filter_field="trace_name", filter_operator=FilterOperator.LIKE, filter_value="llm"))
13 | print(f"Num. LLM logs fetched: {len(paginated_resp.results)} | total LLM logs: {paginated_resp.total}")
14 | 


--------------------------------------------------------------------------------
/cookbook/finetuning/download_as_jsonl.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | 
 3 | import os
 4 | 
 5 | from dotenv import load_dotenv
 6 | 
 7 | from parea import Parea
 8 | from parea.schemas import FilterOperator, QueryParams
 9 | 
10 | load_dotenv()
11 | 
12 | 
13 | project_name = "default"
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"), project_name=project_name)
15 | 
16 | 
17 | def fetch_trace_logs_as_jsonl() -> List[Dict]:
18 |     page_size = 100
19 |     query_params = QueryParams(
20 |         project_name=project_name,
21 |         filter_field="trace_name",
22 |         filter_value="personalize_email_german",
23 |         filter_operator=FilterOperator.EQUALS,
24 |         page_size=page_size,
25 |         status="success",
26 |     )
27 |     initial_fetch = p.get_trace_logs(query_params)
28 |     fetched_trace_logs = initial_fetch.results
29 |     for page in range(1, initial_fetch.total_pages):
30 |         query_params.page = page
31 |         fetched_trace_logs.extend(p.get_trace_logs(query_params).results)
32 |     return [trace_log.convert_to_jsonl_row_for_finetuning() for trace_log in fetched_trace_logs]
33 | 
34 | 
35 | jsonl_rows = fetch_trace_logs_as_jsonl()
36 | 


--------------------------------------------------------------------------------
/cookbook/guidance/tracing_guidance.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from guidance import assistant, gen, models, user
 5 | 
 6 | from parea import Parea, trace
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"), project_name="testing")
11 | p.auto_trace_openai_clients()
12 | 
13 | 
14 | gpt = models.OpenAI("gpt-3.5-turbo")
15 | 
16 | 
17 | @trace
18 | def guidance_program():
19 | 
20 |     with user():
21 |         lm = gpt + "What is the capital of Italy?"
22 | 
23 |     with assistant():
24 |         out = gen("capital")
25 |         lm += out
26 | 
27 |     with user():
28 |         lm += "What is one short surprising fact about it?"
29 | 
30 |     with assistant():
31 |         lm += gen("fact")
32 | 
33 |     print(lm)
34 | 
35 | 
36 | guidance_program()
37 | 


--------------------------------------------------------------------------------
/cookbook/instructor/instructor_blog_example_simple.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | import instructor
 5 | import requests
 6 | from dotenv import load_dotenv
 7 | from openai import OpenAI
 8 | from pydantic import BaseModel, Field, field_validator
 9 | 
10 | from parea import Parea
11 | 
12 | load_dotenv()
13 | 
14 | client = OpenAI()
15 | 
16 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
17 | p.wrap_openai_client(client, "instructor")
18 | 
19 | client = instructor.from_openai(client)
20 | 
21 | 
22 | class Email(BaseModel):
23 |     subject: str
24 |     body: str = Field(
25 |         ...,
26 |         description="Email body, Should contain links to instructor documentation. ",
27 |     )
28 | 
29 |     @field_validator("body")
30 |     def check_urls(cls, v):
31 |         urls = re.findall(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", v)
32 |         errors = []
33 |         for url in urls:
34 |             if not url.startswith("https://python.useinstructor.com"):
35 |                 errors.append(f"URL {url} is not from useinstructor.com, Only include URLs that include use instructor.com. ")
36 |             response = requests.get(url)
37 |             if response.status_code != 200:
38 |                 errors.append(f"URL {url} returned status code {response.status_code}. Only include valid URLs that exist.")
39 |             elif "404" in response.text:
40 |                 errors.append(f"URL {url} contained '404' in the body. Only include valid URLs that exist.")
41 |         if errors:
42 |             raise ValueError("\n".join(errors))
43 |         return v
44 | 
45 | 
46 | def main():
47 |     email = client.messages.create(
48 |         model="gpt-3.5-turbo",
49 |         max_tokens=1024,
50 |         max_retries=3,
51 |         messages=[
52 |             {
53 |                 "role": "user",
54 |                 "content": "I'm responding to a student's question. Here is the link to the documentation: {{doc_link1}} and {{doc_link2}}",
55 |             }
56 |         ],
57 |         template_inputs={
58 |             "doc_link1": "https://python.useinstructor.com/docs/tutorial/tutorial-1",
59 |             "doc_link2": "https://jxnl.github.io/docs/tutorial/tutorial-2",
60 |         },
61 |         response_model=Email,
62 |     )
63 |     print(email)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/cookbook/instructor/instructor_blog_example_validation_context.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | 
 3 | import os
 4 | import re
 5 | 
 6 | import instructor
 7 | import requests
 8 | from dotenv import load_dotenv
 9 | from openai import OpenAI
10 | from pydantic import AfterValidator, BaseModel, ValidationInfo
11 | 
12 | from parea import Parea
13 | 
14 | load_dotenv()
15 | 
16 | client = OpenAI()
17 | 
18 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
19 | p.wrap_openai_client(client, "instructor")
20 | 
21 | client = instructor.from_openai(client)
22 | 
23 | 
24 | def check_urls(v, info: ValidationInfo):
25 |     urls = re.findall(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", v)
26 |     domain = info.context.get("domain") if info and info.context else None
27 |     errors = []
28 |     for url in urls:
29 |         if domain and not url.startswith(domain):
30 |             errors.append(f"URL {url} is not from useinstructor.com, Only include URLs that include use instructor.com. ")
31 |         response = requests.get(url)
32 |         if response.status_code != 200:
33 |             errors.append(f"URL {url} returned status code {response.status_code}. Only include valid URLs that exist.")
34 |         elif "404" in response.text:
35 |             errors.append(f"URL {url} contained '404' in the body. Only include valid URLs that exist.")
36 |     if errors:
37 |         raise ValueError("\n".join(errors))
38 |     return v
39 | 
40 | 
41 | Body = Annotated[str, AfterValidator(check_urls)]
42 | 
43 | 
44 | class Email(BaseModel):
45 |     subject: str
46 |     body: Body
47 | 
48 | 
49 | def main():
50 |     email = client.messages.create(
51 |         model="gpt-3.5-turbo",
52 |         max_tokens=1024,
53 |         max_retries=3,
54 |         messages=[
55 |             {
56 |                 "role": "user",
57 |                 "content": "I'm responding to a student's question. Here is the link to the documentation: {{doc_link1}} and {{doc_link2}}",
58 |             }
59 |         ],
60 |         template_inputs={
61 |             "doc_link1": "https://python.useinstructor.com/docs/tutorial/tutorial-1",
62 |             "doc_link2": "https://jxnl.github.io/docs/tutorial/tutorial-2",
63 |         },
64 |         response_model=Email,
65 |         validation_context={"domain": "https://python.useinstructor.com"},
66 |     )
67 |     print(email)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/cookbook/instructor/instructor_evals.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import instructor
 4 | from dotenv import load_dotenv
 5 | from openai import OpenAI
 6 | from pydantic import BaseModel, field_validator
 7 | 
 8 | from parea import Parea
 9 | 
10 | load_dotenv()
11 | 
12 | client = OpenAI()
13 | 
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
15 | p.wrap_openai_client(client, "instructor")
16 | 
17 | client = instructor.from_openai(client)
18 | 
19 | 
20 | class User(BaseModel):
21 |     name: str
22 |     age: int
23 | 
24 |     @field_validator("name")
25 |     def name_is_uppercase(cls, v: str):
26 |         assert v.isupper(), "Name must be uppercase"
27 |         return v
28 | 
29 | 
30 | resp = client.messages.create(
31 |     model="gpt-3.5-turbo",
32 |     max_tokens=1024,
33 |     max_retries=3,
34 |     messages=[
35 |         {
36 |             "role": "user",
37 |             "content": "Extract {{name}} is {{age}} years old.",
38 |         }
39 |     ],
40 |     template_inputs={
41 |         "name": "Bobby",
42 |         "age": 18,
43 |     },
44 |     response_model=User,
45 | )
46 | 
47 | assert isinstance(resp, User)
48 | assert resp.name == "BOBBY"  # due to validation
49 | assert resp.age == 18
50 | print(resp)
51 | 


--------------------------------------------------------------------------------
/cookbook/instructor/instructor_streaming.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import anthropic
 4 | import instructor
 5 | from dotenv import load_dotenv
 6 | from openai import AsyncOpenAI
 7 | from pydantic import BaseModel
 8 | 
 9 | from parea import Parea, trace
10 | 
11 | load_dotenv()
12 | 
13 | oai_aclient = AsyncOpenAI()
14 | ant_client = anthropic.AsyncClient()
15 | 
16 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
17 | 
18 | p.wrap_openai_client(oai_aclient, "instructor")
19 | p.wrap_anthropic_client(ant_client)
20 | 
21 | oai_aclient = instructor.from_openai(oai_aclient)
22 | ant_client = instructor.from_anthropic(ant_client)
23 | 
24 | 
25 | class UserDetail(BaseModel):
26 |     name: str
27 |     age: str
28 | 
29 | 
30 | @trace
31 | async def ainner_main():
32 |     user = oai_aclient.completions.create_partial(
33 |         model="gpt-4o-mini",
34 |         max_tokens=1024,
35 |         max_retries=3,
36 |         messages=[
37 |             {
38 |                 "role": "user",
39 |                 "content": "Please create a user",
40 |             }
41 |         ],
42 |         response_model=UserDetail,
43 |     )
44 |     return user
45 | 
46 | 
47 | async def amain():
48 |     resp = await ainner_main()
49 |     async for u in resp:
50 |         print(u)
51 | 
52 | 
53 | @trace
54 | def inner_main():
55 |     user = ant_client.completions.create_partial(
56 |         model="claude-3-5-sonnet-20240620",
57 |         max_tokens=1024,
58 |         max_retries=3,
59 |         messages=[
60 |             {
61 |                 "role": "user",
62 |                 "content": "Please create a user",
63 |             }
64 |         ],
65 |         response_model=UserDetail,
66 |     )
67 |     return user
68 | 
69 | 
70 | def main():
71 |     resp = inner_main()
72 |     for u in resp:
73 |         print(u)
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     import asyncio
78 | 
79 |     asyncio.run(amain())
80 | 
81 |     main()
82 | 


--------------------------------------------------------------------------------
/cookbook/langchain/trace_class_call_method.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from langchain_core.output_parsers import StrOutputParser
 5 | from langchain_core.prompts import ChatPromptTemplate
 6 | from langchain_openai import ChatOpenAI
 7 | 
 8 | from parea import Parea, trace
 9 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
10 | 
11 | load_dotenv()
12 | 
13 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
14 | 
15 | 
16 | class LangChainModule:
17 |     handler = PareaAILangchainTracer()
18 | 
19 |     def __init__(self):
20 |         self.llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))
21 | 
22 |     def get_chain(self):
23 |         prompt = ChatPromptTemplate.from_messages([("user", "{input}")])
24 |         chain = prompt | self.llm | StrOutputParser()
25 |         return chain
26 | 
27 |     @trace(name="langchain_caller_call")
28 |     def __call__(self, query: str) -> str:
29 |         chain = self.get_chain()
30 |         return chain.invoke({"input": query}, config={"callbacks": [self.handler]})
31 | 
32 | 
33 | class LLMCaller:
34 |     def __init__(self, query: str):
35 |         self.client = LangChainModule()
36 |         self.query = query
37 | 
38 |     @trace(name="llm_caller_call")
39 |     def __call__(self) -> str:
40 |         return self.client(query=self.query)
41 | 
42 | 
43 | @trace
44 | def main(query: str) -> str:
45 |     caller = LLMCaller(query=query)
46 |     return caller()
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     result = main("Write a Hello World program in Python using FastAPI.")
51 |     print(result)
52 | 


--------------------------------------------------------------------------------
/cookbook/langchain/trace_langchain_anthropic_function_calling.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from langchain.chains import create_extraction_chain
 5 | from langchain.schema import HumanMessage
 6 | from langchain_experimental.llms.anthropic_functions import AnthropicFunctions
 7 | 
 8 | from parea import Parea
 9 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
10 | 
11 | load_dotenv()
12 | 
13 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
14 | 
15 | model = AnthropicFunctions(model="claude-2")
16 | 
17 | functions = [
18 |     {
19 |         "name": "get_current_weather",
20 |         "description": "Get the current weather in a given location",
21 |         "parameters": {
22 |             "type": "object",
23 |             "properties": {
24 |                 "location": {
25 |                     "type": "string",
26 |                     "description": "The city and state, e.g. San Francisco, CA",
27 |                 },
28 |                 "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
29 |             },
30 |             "required": ["location"],
31 |         },
32 |     }
33 | ]
34 | 
35 | 
36 | schema = {
37 |     "properties": {
38 |         "name": {"type": "string"},
39 |         "height": {"type": "integer"},
40 |         "hair_color": {"type": "string"},
41 |     },
42 |     "required": ["name", "height"],
43 | }
44 | inp = """Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex
45 | is blonde."""
46 | 
47 | chain = create_extraction_chain(schema, model)
48 | 
49 | 
50 | def main():
51 |     response = model.predict_messages([HumanMessage(content="whats the weater in boston?")], functions=functions, callbacks=[PareaAILangchainTracer()])
52 |     print(response)
53 |     result = chain.run(inp, callbacks=[PareaAILangchainTracer()])
54 |     print(result)
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     main()
59 | 


--------------------------------------------------------------------------------
/cookbook/langchain/trace_langchain_bedrock_rag.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import boto3
 4 | from dotenv import load_dotenv
 5 | from langchain.document_loaders import TextLoader
 6 | from langchain.llms.bedrock import Bedrock
 7 | from langchain.output_parsers import XMLOutputParser
 8 | from langchain.prompts import PromptTemplate
 9 | from langchain.schema.output_parser import StrOutputParser
10 | from langchain.text_splitter import RecursiveCharacterTextSplitter
11 | 
12 | from parea import Parea
13 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
14 | 
15 | load_dotenv()
16 | 
17 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
18 | handler = PareaAILangchainTracer()
19 | 
20 | 
21 | def get_docs():
22 |     loader = TextLoader("../assets/data/2022-letter.txt")
23 |     letter = loader.load()
24 |     text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=4000, chunk_overlap=100)
25 |     return text_splitter.split_documents(letter)
26 | 
27 | 
28 | xml_parser = XMLOutputParser(tags=["insight"])
29 | str_parser = StrOutputParser()
30 | 
31 | insight_prompt = PromptTemplate(
32 |     template="""
33 | 
34 |     Human:
35 |     {instructions} : \"{document}\"
36 |     Format help: {format_instructions}.
37 |     Assistant:""",
38 |     input_variables=["instructions", "document"],
39 |     partial_variables={"format_instructions": xml_parser.get_format_instructions()},
40 | )
41 | 
42 | summary_prompt = PromptTemplate(
43 |     template="""
44 | 
45 |     Human:
46 |     {instructions} : \"{document}\"
47 |     Assistant:""",
48 |     input_variables=["instructions", "document"],
49 | )
50 | 
51 | docs = get_docs()
52 | bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")
53 | bedrock_llm = Bedrock(
54 |     client=bedrock_client,
55 |     model_id="amazon.titan-text-express-v1",
56 |     model_kwargs={"maxTokenCount": 4096, "stopSequences": [], "temperature": 0, "topP": 1},
57 | )
58 | 
59 | insight_chain = insight_prompt | bedrock_llm | StrOutputParser()
60 | summary_chain = summary_prompt | bedrock_llm | StrOutputParser()
61 | 
62 | 
63 | def get_insights(docs):
64 |     insights = []
65 |     for i in range(len(docs)):
66 |         insight = insight_chain.invoke(
67 |             {"instructions": "Provide Key insights from the following text", "document": {docs[i].page_content}}, config={"callbacks": [PareaAILangchainTracer()]}
68 |         )
69 |         insights.append(insight)
70 |     return insights
71 | 
72 | 
73 | def main():
74 |     print("Starting")
75 |     insights = get_insights(docs)
76 |     print(insights)
77 |     summary = summary_chain.invoke(
78 |         {
79 |             "instructions": "You will be provided with multiple sets of insights. Compile and summarize these "
80 |             "insights and provide key takeaways in one concise paragraph. Do not use the original xml "
81 |             "tags. Just provide a paragraph with your compiled insights.",
82 |             "document": {"\n".join(insights)},
83 |         },
84 |         config={"callbacks": [PareaAILangchainTracer()]},
85 |     )
86 |     print(summary)
87 |     print("Done")
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/cookbook/langchain/trace_langchain_inside_trace_decorator.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from langchain_core.output_parsers import StrOutputParser
 5 | from langchain_core.prompts import ChatPromptTemplate
 6 | from langchain_openai import ChatOpenAI
 7 | from openai import OpenAI
 8 | 
 9 | from parea import Parea, trace
10 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
11 | 
12 | load_dotenv()
13 | 
14 | oai_client = OpenAI()
15 | 
16 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
17 | handler = PareaAILangchainTracer()
18 | p.wrap_openai_client(oai_client)
19 | 
20 | llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))
21 | prompt = ChatPromptTemplate.from_messages([("user", "{input}")])
22 | chain = prompt | llm | StrOutputParser()
23 | 
24 | 
25 | @trace
26 | def main():
27 |     programming_language = (
28 |         oai_client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Suggest one programming languages"}]).choices[0].message.content
29 |     )
30 | 
31 |     return chain.invoke(
32 |         {"input": f"Write a Hello World program in {programming_language}."},
33 |         config={"callbacks": [handler]},
34 |     )
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     print(main())
39 | 


--------------------------------------------------------------------------------
/cookbook/langchain/trace_langchain_rag_agents.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from langchain.agents.agent_toolkits import create_conversational_retrieval_agent, create_retriever_tool
 5 | from langchain.chat_models import ChatOpenAI
 6 | from langchain.document_loaders import TextLoader
 7 | from langchain.embeddings import OpenAIEmbeddings
 8 | from langchain.text_splitter import CharacterTextSplitter
 9 | from langchain.vectorstores import FAISS
10 | 
11 | from parea import Parea
12 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
13 | 
14 | load_dotenv()
15 | 
16 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
17 | 
18 | loader = TextLoader("../assets/data/state_of_the_union.txt")
19 | 
20 | 
21 | documents = loader.load()
22 | text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
23 | texts = text_splitter.split_documents(documents)
24 | embeddings = OpenAIEmbeddings()
25 | db = FAISS.from_documents(texts, embeddings)
26 | retriever = db.as_retriever()
27 | tool = create_retriever_tool(
28 |     retriever,
29 |     "search_state_of_union",
30 |     "Searches and returns documents regarding the state-of-the-union.",
31 | )
32 | tools = [tool]
33 | 
34 | 
35 | llm = ChatOpenAI(temperature=0)
36 | 
37 | agent_executor = create_conversational_retrieval_agent(llm, tools)
38 | 
39 | 
40 | def main():
41 |     result = agent_executor({"input": "what did the president say about kentaji brown jackson in the most recent state of the union?"}, callbacks=[PareaAILangchainTracer()])
42 |     print(result)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 


--------------------------------------------------------------------------------
/cookbook/langchain/trace_langchain_rag_question_answering.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import bs4
 4 | from dotenv import load_dotenv
 5 | from langchain import hub
 6 | from langchain.document_loaders import WebBaseLoader
 7 | from langchain.schema import StrOutputParser
 8 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 9 | from langchain_community.vectorstores.faiss import FAISS
10 | from langchain_core.runnables import RunnablePassthrough
11 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings
12 | 
13 | from parea import Parea
14 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
15 | 
16 | load_dotenv()
17 | 
18 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
19 | 
20 | loader = WebBaseLoader(
21 |     web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
22 |     bs_kwargs=dict(parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))),
23 | )
24 | docs = loader.load()
25 | 
26 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
27 | splits = text_splitter.split_documents(docs)
28 | 
29 | vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings())
30 | retriever = vectorstore.as_retriever()
31 | 
32 | prompt = hub.pull("rlm/rag-prompt")
33 | llm = ChatOpenAI(temperature=0)
34 | 
35 | 
36 | def format_docs(docs):
37 |     return "\n\n".join(doc.page_content for doc in docs)
38 | 
39 | 
40 | rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser()
41 | 
42 | 
43 | def main():
44 |     response = rag_chain.invoke("What is Task Decomposition?", config={"callbacks": [PareaAILangchainTracer()]})
45 |     print(response)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     main()
50 | 


--------------------------------------------------------------------------------
/cookbook/langchain/trace_langchain_simple.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from dotenv import load_dotenv
 5 | from langchain_core.output_parsers import StrOutputParser
 6 | from langchain_core.prompts import ChatPromptTemplate
 7 | from langchain_openai import ChatOpenAI
 8 | 
 9 | from parea import Parea
10 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
11 | 
12 | load_dotenv()
13 | 
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
15 | handler = PareaAILangchainTracer()
16 | 
17 | llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))
18 | prompt = ChatPromptTemplate.from_messages([("user", "{input}")])
19 | chain = prompt | llm | StrOutputParser()
20 | 
21 | 
22 | def main():
23 |     return chain.invoke(
24 |         {"input": "Write a Hello World program in Python using FastAPI."},
25 |         config={"callbacks": [PareaAILangchainTracer(session_id="123", tags=["fastapi"], metadata={"key": "value"}, end_user_identifier="user123", deployment_id="456")]},
26 |     )
27 | 
28 | 
29 | async def amain():
30 |     return await chain.ainvoke(
31 |         {"input": "Write a Hello World program in Python using FastAPI."},
32 |         config={"callbacks": [handler]},
33 |     )
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     print(main())
38 |     print(asyncio.run(amain()))
39 | 


--------------------------------------------------------------------------------
/cookbook/langchain/trace_langchain_with_deployed_prompt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from langchain_core.output_parsers import StrOutputParser
 5 | from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
 6 | from langchain_openai import ChatOpenAI
 7 | 
 8 | from parea import Parea
 9 | from parea.schemas import UseDeployedPrompt, UseDeployedPromptResponse
10 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer
11 | 
12 | load_dotenv()
13 | 
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
15 | 
16 | 
17 | CONTEXT = """Company: Nike. 2023
18 | FORM 10-K 35
19 | OPERATING SEGMENTS
20 | As discussed in Note 15 2014 Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company's internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity.
21 | The breakdown of Revenues is as follows:
22 | \n\n(Dollars in millions)
23 | \n\nFISCAL 2023 FISCAL 2022
24 | \n\n% CHANGE\n\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\n\n% CHANGE\n\n
25 | North America Europe, Middle East & Africa Greater China\n\n$\n\n21,608 $ 13,418 7,248\n\n18,353 12,479 7,547\n\n18 % 8 % -4 %\n\n18 % $ 21 % 4 %\n\n17,179 11,456 8,290\n\n7 % 9 % -9 %\n\nAsia Pacific & Latin America Global Brand Divisions\n\n(3)\n\n(2)\n\n6,431 58\n\n5,955 102\n\n8 % -43 %\n\n17 % -43 %\n\n5,343 25\n\n11 % 308 %\n\nTOTAL NIKE BRAND Converse\n\n$\n\n48,763 $ 2,427\n\n44,436 2,346\n\n10 % 3 %\n\n16 % $ 8 %\n\n42,293 2,205\n\n5 % 6 %\n\n(4)\n\nCorporate TOTAL NIKE, INC. REVENUES\n\n$\n\n27\n\n51,217 $\n\n(72) 46,710\n\n— 10 %\n\n— 16 % $\n\n40 44,538\n\n— 5 %"""
26 | 
27 | 
28 | def get_answer_prompt() -> ChatPromptTemplate:
29 |     # fetched.prompt.raw_messages = [
30 |     #     {
31 |     #         "content": "Use the following pieces of context from Nike's financial 10k filings dataset to answer the question. "
32 |     #                    "Do not make up an answer if no context is provided to help answer it."
33 |     #                    "\n\nContext:\n---------\n{context}\n\n---------\nQuestion: {question}\n---------\n\nAnswer:",
34 |     #         "role": "user",
35 |     #     }
36 |     # ]
37 |     fetched: UseDeployedPromptResponse = p.get_prompt(UseDeployedPrompt(deployment_id="p-JTDYylldIrMbMisT70DJZ"))
38 |     # use the raw messages since it has the templated variables which will be filled in when we invoke the prompt
39 |     answer_prompt = ChatPromptTemplate.from_messages([(message["role"], message["content"]) for message in fetched.prompt.raw_messages])
40 |     return answer_prompt
41 | 
42 | 
43 | def get_summary_prompt() -> PromptTemplate:
44 |     # fetched.prompt.raw_messages = [{'content': 'Compile and summarize the following content: {content}', 'role': 'user'}]
45 |     fetched: UseDeployedPromptResponse = p.get_prompt(UseDeployedPrompt(deployment_id="p-OGWAo6yvVKr1hUBY6bmHw"))
46 |     # use the raw messages since it has the templated variables which will be filled in when we invoke the prompt
47 |     summary_prompt = PromptTemplate(
48 |         template=fetched.prompt.raw_messages[0]["content"],
49 |         input_variables=["content"],
50 |     )
51 |     return summary_prompt
52 | 
53 | 
54 | def main(question):
55 |     llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))
56 |     answer_prompt = get_answer_prompt()
57 |     summary_prompt = get_summary_prompt()
58 |     answer_chain = answer_prompt | llm | StrOutputParser()
59 |     summary_chain = summary_prompt | llm | StrOutputParser()
60 |     answer = answer_chain.invoke(
61 |         {
62 |             "context": CONTEXT,
63 |             "question": question,
64 |         },
65 |         config={"callbacks": [PareaAILangchainTracer(deployment_id="p-JTDYylldIrMbMisT70DJZ")]},
66 |     )
67 |     summary = summary_chain.invoke(
68 |         {"content": answer},
69 |         config={"callbacks": [PareaAILangchainTracer(deployment_id="p-OGWAo6yvVKr1hUBY6bmHw")]},
70 |     )
71 |     return summary
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     response = main(question="Which operating segment contributed least to total Nike brand revenue in fiscal 2023?")
76 |     print(response)
77 | 


--------------------------------------------------------------------------------
/cookbook/marvin/trace_marvin.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import marvin
 4 | from dotenv import load_dotenv
 5 | from pydantic import BaseModel, Field
 6 | 
 7 | from parea import Parea
 8 | 
 9 | load_dotenv()
10 | 
11 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
12 | p.auto_trace_openai_clients("marvin")
13 | 
14 | 
15 | class Location(BaseModel):
16 |     city: str
17 |     state: str = Field(description="2-letter abbreviation")
18 | 
19 | 
20 | result = marvin.cast("the big apple", Location)
21 | print(result)
22 | 


--------------------------------------------------------------------------------
/cookbook/openai/dynamic_few_shot_injection_with_evals.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | import os
  4 | import random
  5 | 
  6 | from dotenv import load_dotenv
  7 | from openai import OpenAI
  8 | from pydantic import BaseModel
  9 | 
 10 | from parea import Parea, get_current_trace_id, trace, trace_insert
 11 | from parea.schemas import Log, TestCase
 12 | 
 13 | load_dotenv()
 14 | 
 15 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
 17 | p.wrap_openai_client(client)
 18 | 
 19 | NUM_INTERACTIONS = 5
 20 | 
 21 | 
 22 | class Person(BaseModel):
 23 |     name: str
 24 |     email: str
 25 | 
 26 | 
 27 | class Email(BaseModel):
 28 |     contact: Person
 29 |     email_sent: str
 30 | 
 31 | 
 32 | mock_DB: dict[str, Email] = {}
 33 | 
 34 | 
 35 | def call_llm(messages: List[dict], model: str = "gpt-4o", temperature: float = 0.0) -> str:
 36 |     return client.chat.completions.create(model=model, temperature=temperature, messages=messages).choices[0].message.content
 37 | 
 38 | 
 39 | def eval_func(log: Log) -> float:
 40 |     return random.uniform(0, 1)
 41 | 
 42 | 
 43 | # Imitate collecting few shot examples from prod based on user feedback
 44 | @trace(eval_funcs=[eval_func])
 45 | def email_writer(main_objective: str, contact: Person, few_shot_examples: Optional[List[str]] = None) -> str:
 46 |     trace_insert({"end_user_identifier": contact.name, "metadata": {"has_few_shot_examples": bool(few_shot_examples)}})
 47 | 
 48 |     few_shot_examples_prompt = ("\nHere are some examples of good emails\n" + "\n".join(few_shot_examples)) if few_shot_examples else ""
 49 |     messages = [
 50 |         {
 51 |             "role": "system",
 52 |             "content": f"You are an AI who performs an email writing task based on the following objective: {main_objective}",
 53 |         },
 54 |         {
 55 |             "role": "user",
 56 |             "content": f"""
 57 |             Your email is from: {contact.model_dump()}
 58 |             {few_shot_examples_prompt if few_shot_examples else ""}
 59 |             Email:
 60 |             """,
 61 |         },
 62 |     ]
 63 |     response = call_llm(messages)
 64 |     trace_id = get_current_trace_id()
 65 |     # insert into mock_DB
 66 |     mock_DB[trace_id] = Email(contact=contact, email_sent=response)
 67 |     return response
 68 | 
 69 | 
 70 | def mimic_prod(few_shot_limit: int = 3):
 71 |     contact = Person(name="John Doe", email="jdoe@email.com")
 72 |     dataset = p.get_collection("Good_Email_Examples")
 73 |     selected_few_shot_examples = None
 74 |     if dataset:
 75 |         testcases: list[TestCase] = list(dataset.test_cases.values())
 76 |         few_shot_examples = [case.inputs["email"] for case in testcases if case.inputs["user"] == contact.name]
 77 |         # This is simply taking most recent n examples. You can imagine adding additional logic to the dataset
 78 |         # that allows you to rank the examples based on some criteria
 79 |         selected_few_shot_examples = few_shot_examples[-few_shot_limit:] if few_shot_examples else None
 80 |     for interaction in range(NUM_INTERACTIONS):
 81 |         email = email_writer("Convincing email to gym to cancel membership early.", contact, selected_few_shot_examples)
 82 |         print(email)
 83 | 
 84 | 
 85 | def add_good_email_example_to_dataset(user_name, email):
 86 |     # Note: if the test case collection doesn't exist, we will create a new collection with the provided name and data
 87 |     p.add_test_cases([{"user": user_name, "email": email}], name="Good_Email_Examples")
 88 | 
 89 | 
 90 | def mimic_prod_checking_eval_scores():
 91 |     # imagine the trace_id of the email is stored in state in the UI, so when the user provides feedback, we can use it
 92 |     trace_ids = mock_DB.keys()
 93 |     for trace_id in trace_ids:
 94 |         scores = p.get_trace_log_scores(trace_id)
 95 |         for score in scores:
 96 |             if score.name == "eval_func" and score.score >= 0.5:
 97 |                 add_good_email_example_to_dataset(mock_DB[trace_id].contact.name, mock_DB[trace_id].email_sent)
 98 |                 break
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     mimic_prod()
103 |     mimic_prod_checking_eval_scores()
104 |     # future llm calls will now have few-shot examples from the feedback collection
105 |     mimic_prod()
106 |     print("Done")
107 | 


--------------------------------------------------------------------------------
/cookbook/openai/simple_experiment_with_openai.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import os
 4 | 
 5 | from dotenv import load_dotenv
 6 | from openai import OpenAI
 7 | 
 8 | from parea import Parea, trace
 9 | from parea.schemas import Log
10 | 
11 | load_dotenv()
12 | 
13 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
15 | p.wrap_openai_client(client)
16 | 
17 | 
18 | def eval_func(log: Log) -> float:
19 |     from random import random
20 |     from time import sleep
21 | 
22 |     sleep(random() * 10)
23 |     return random()
24 | 
25 | 
26 | @trace(eval_funcs=[eval_func])
27 | def func(topic: str) -> dict[str, Union[str, None]]:
28 |     return {
29 |         "data": (
30 |             client.chat.completions.create(
31 |                 model="gpt-4-turbo",
32 |                 messages=[
33 |                     {
34 |                         "role": "user",
35 |                         "content": f"Write a short haiku about {topic}",
36 |                     }
37 |                 ],
38 |             )
39 |             .choices[0]
40 |             .message.content
41 |         )
42 |     }
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     p.experiment(
47 |         name="hello-world-example-ch",
48 |         data=[{"topic": "Fish"}, {"topic": "Python"}],
49 |         func=func,
50 |     ).run()
51 | 


--------------------------------------------------------------------------------
/cookbook/openai/trace_class_call_method.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import os
 4 | 
 5 | from dotenv import load_dotenv
 6 | from openai import OpenAI
 7 | 
 8 | from parea import Parea, trace
 9 | 
10 | load_dotenv()
11 | 
12 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
13 | 
14 | 
15 | class LLMCaller:
16 |     def __init__(self, messages: List[dict[str, str]]):
17 |         self.messages = messages
18 |         self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
19 |         p.wrap_openai_client(self.client)
20 | 
21 |     @trace
22 |     def __call__(self, model: str = "gpt-4o", temperature: float = 0.0) -> str:
23 |         return self.client.chat.completions.create(model=model, temperature=temperature, messages=self.messages).choices[0].message.content
24 | 
25 | 
26 | @trace
27 | def main(topic: str) -> str:
28 |     caller = LLMCaller(
29 |         messages=[
30 |             {"role": "system", "content": "You are a debater making an argument on a topic."},
31 |             {"role": "user", "content": f"The discussion topic is {topic}"},
32 |         ]
33 |     )
34 |     return caller()
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     result = main("The impact of climate change on the economy")
39 |     print(result)
40 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_azure_open_ai.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from dotenv import load_dotenv
 5 | from openai.lib.azure import AsyncAzureOpenAI, AzureOpenAI
 6 | 
 7 | from cookbook.assets.data.openai_input_examples import functions_example, simple_example
 8 | from parea import Parea, trace
 9 | 
10 | load_dotenv()
11 | 
12 | client = AzureOpenAI(
13 |     api_version="2023-12-01-preview",
14 |     api_key=os.getenv("AZURE_OAI_API_KEY"),
15 |     azure_endpoint=os.getenv("AZURE_OAI_ENDPOINT"),
16 | )
17 | aclient = AsyncAzureOpenAI(
18 |     api_version="2023-12-01-preview",
19 |     api_key=os.getenv("AZURE_OAI_API_KEY"),
20 |     azure_endpoint=os.getenv("AZURE_OAI_ENDPOINT"),
21 | )
22 | 
23 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
24 | p.wrap_openai_client(client)
25 | p.wrap_openai_client(aclient)
26 | 
27 | 
28 | @trace
29 | def call_azure(data: dict):
30 |     response = client.chat.completions.create(**data)
31 |     print(response)
32 | 
33 | 
34 | @trace
35 | def call_azure_stream(data: dict):
36 |     data["stream"] = True
37 |     stream = client.chat.completions.create(**data)
38 |     for chunk in stream:
39 |         if chunk.choices:
40 |             print(chunk.choices[0].delta or "")
41 | 
42 | 
43 | @trace
44 | async def acall_azure(data: dict):
45 |     response = await aclient.chat.completions.create(**data)
46 |     print(response)
47 | 
48 | 
49 | @trace
50 | async def acall_azure_stream(data: dict):
51 |     data["stream"] = True
52 |     stream = await aclient.chat.completions.create(**data)
53 |     async for chunk in stream:
54 |         if chunk.choices:
55 |             print(chunk.choices[0].delta or "")
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     azure_model = "AZURE_MODEL_NAME"  # replace with your model name
60 |     functions_example["model"] = azure_model
61 |     simple_example["model"] = azure_model
62 |     call_azure(functions_example)
63 |     # call_azure_stream(simple_example)
64 |     # call_azure_stream(functions_example)
65 |     asyncio.run(acall_azure(simple_example))
66 |     # asyncio.run(acall_azure(functions_example))
67 |     # asyncio.run(acall_azure_stream(simple_example))
68 |     asyncio.run(acall_azure_stream(functions_example))
69 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_open_ai_streams.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from dotenv import load_dotenv
 5 | from openai import AsyncOpenAI, OpenAI
 6 | 
 7 | from cookbook.assets.data.openai_input_examples import functions_example, simple_example_json
 8 | from parea import Parea, trace
 9 | 
10 | load_dotenv()
11 | 
12 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
13 | aclient = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
14 | 
15 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
16 | p.wrap_openai_client(client)
17 | p.wrap_openai_client(aclient)
18 | 
19 | 
20 | @trace
21 | def _call_openai_stream(data: dict):
22 |     data["stream"] = True
23 |     stream = client.chat.completions.create(**data)
24 |     for chunk in stream:
25 |         yield chunk
26 | 
27 | 
28 | def call_openai_stream(data: dict):
29 |     stream = _call_openai_stream(data)
30 |     for chunk in stream:
31 |         print(chunk.choices[0].delta or "")
32 | 
33 | 
34 | @trace
35 | async def acall_openai_stream(data: dict):
36 |     data["stream"] = True
37 |     stream = await aclient.chat.completions.create(**data)
38 |     async for chunk in stream:
39 |         print(chunk.choices[0].delta or "")
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     # call_openai_stream(simple_example)
44 |     call_openai_stream(simple_example_json)
45 |     # call_openai_stream(functions_example)
46 |     # asyncio.run(acall_openai_stream(simple_example))
47 |     asyncio.run(acall_openai_stream(functions_example))
48 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_openai_assistant_endpoint.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import openai
 5 | from dotenv import load_dotenv
 6 | from openai.pagination import SyncCursorPage
 7 | from openai.types.beta import Thread
 8 | from openai.types.beta.threads import Message, Run
 9 | 
10 | from parea import Parea, trace
11 | 
12 | load_dotenv()
13 | 
14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
15 | client = openai.OpenAI()
16 | p.wrap_openai_client(client)
17 | 
18 | QUESTIONS = ["I need to solve the equation `3x + 11 = 14`. Can you help me?", "Could you explain linear algebra to me?", "I don't like math. What can I do?"]
19 | 
20 | 
21 | def pretty_print(messages):
22 |     print("# Messages")
23 |     for m in messages:
24 |         print(f"{m.role}: {m.content[0].text.value}")
25 |     print()
26 | 
27 | 
28 | @trace
29 | def create_assistant(instructions: str):
30 |     return client.beta.assistants.create(
31 |         name="Math Tutor",
32 |         instructions=instructions,
33 |         tools=[{"type": "code_interpreter"}],
34 |         model="gpt-4-turbo-preview",
35 |     )
36 | 
37 | 
38 | @trace
39 | def submit_message(assistant_id: str, thread_id: str, user_message: str) -> Run:
40 |     client.beta.threads.messages.create(thread_id=thread_id, role="user", content=user_message)
41 |     return client.beta.threads.runs.create(
42 |         thread_id=thread_id,
43 |         assistant_id=assistant_id,
44 |     )
45 | 
46 | 
47 | @trace
48 | def get_response(thread_id: str) -> SyncCursorPage[Message]:
49 |     return client.beta.threads.messages.list(thread_id=thread_id, order="asc")
50 | 
51 | 
52 | @trace
53 | def create_thread_and_run(assistant_id: str, user_input: str) -> (Thread, Run):
54 |     thread = client.beta.threads.create()
55 |     run = submit_message(assistant_id, thread.id, user_input)
56 |     return thread, run
57 | 
58 | 
59 | @trace
60 | def wait_on_run(run: Run, thread: Thread) -> Run:
61 |     while run.status == "queued" or run.status == "in_progress":
62 |         run = client.beta.threads.runs.retrieve(
63 |             thread_id=thread.id,
64 |             run_id=run.id,
65 |         )
66 |         time.sleep(0.5)
67 |     return run
68 | 
69 | 
70 | @trace
71 | def run_until_complete(assistant_id: str, run_instructions: str) -> SyncCursorPage[Message]:
72 |     thread, run = create_thread_and_run(assistant_id, run_instructions)
73 |     wait_on_run(run, thread)
74 |     response = get_response(thread.id)
75 |     pretty_print(response)
76 |     return response
77 | 
78 | 
79 | @trace
80 | def main(assistant_instructions: str) -> SyncCursorPage[Message]:
81 |     assistant = create_assistant(assistant_instructions)
82 |     response = None
83 |     for question in QUESTIONS:
84 |         response = run_until_complete(assistant.id, question)
85 |     return response
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main("You are a personal math tutor. Write and run code to answer math questions.")
90 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_templated_llm_calls.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from openai import OpenAI
 5 | 
 6 | from parea import Parea
 7 | 
 8 | load_dotenv()
 9 | 
10 | client = OpenAI()
11 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
12 | p.wrap_openai_client(client)
13 | 
14 | response = client.chat.completions.create(
15 |     model="gpt-4",
16 |     messages=[
17 |         {"role": "user", "content": "Make up {{number}} people."},
18 |     ],
19 |     template_inputs={"number": "three"},  # with Parea wrapper, we can specify template_inputs which will appear as inputs and are used to fill-in the templated messages
20 |     metadata={"template_id": "make-up-people-v1"},  # via Parea wrapper, can associate request with any metadata
21 | )
22 | print(response.choices[0].message.content)
23 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_tool_calling.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | from openai import OpenAI
 5 | 
 6 | from parea import Parea
 7 | 
 8 | load_dotenv()
 9 | 
10 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
11 | 
12 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
13 | p.wrap_openai_client(client)
14 | 
15 | 
16 | def main():
17 |     tools = [
18 |         {
19 |             "type": "function",
20 |             "function": {
21 |                 "name": "get_current_weather",
22 |                 "description": "Get the current weather in a given location",
23 |                 "parameters": {
24 |                     "type": "object",
25 |                     "properties": {
26 |                         "location": {
27 |                             "type": "string",
28 |                             "description": "The city and state, e.g. San Francisco, CA",
29 |                         },
30 |                         "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
31 |                     },
32 |                     "required": ["location"],
33 |                 },
34 |             },
35 |         }
36 |     ]
37 |     messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
38 |     completion = client.chat.completions.create(
39 |         model="gpt-4o",
40 |         messages=messages,
41 |         tools=tools,
42 |         tool_choice="auto",
43 |     )
44 |     messages.append({k: v for k, v in completion.choices[0].message.model_dump().items() if v is not None})
45 |     # messages.append(completion.choices[0].message)
46 |     messages.append({"role": "tool", "content": "5 Celcius", "tool_call_id": completion.choices[0].message.tool_calls[0].id})
47 |     messages.append(
48 |         {
49 |             "role": "user",
50 |             "content": "What's the weather like in Boston today?",
51 |         }
52 |     )
53 | 
54 |     final_completion = client.chat.completions.create(
55 |         model="gpt-4o",
56 |         messages=messages,
57 |         tools=tools,
58 |         tool_choice="auto",
59 |     )
60 | 
61 |     print(final_completion)
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_with_images_open_ai.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import json
 4 | import os
 5 | 
 6 | from dotenv import load_dotenv
 7 | from openai import OpenAI
 8 | 
 9 | from parea import Parea, trace, trace_insert
10 | from parea.schemas import TraceLogImage
11 | 
12 | load_dotenv()
13 | 
14 | 
15 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
16 | 
17 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
18 | p.wrap_openai_client(client)
19 | 
20 | 
21 | @trace
22 | def image_maker(query: str) -> str:
23 |     response = client.images.generate(prompt=query, model="dall-e-3")
24 |     image_url = response.data[0].url
25 |     caption = {"original_prompt": query, "revised_prompt": response.data[0].revised_prompt}
26 |     trace_insert({"images": [TraceLogImage(url=image_url, caption=json.dumps(caption))]})
27 |     return image_url
28 | 
29 | 
30 | @trace
31 | def ask_vision(image_url: str) -> Optional[str]:
32 |     response = client.chat.completions.create(
33 |         model="gpt-4o",
34 |         messages=[
35 |             {
36 |                 "role": "user",
37 |                 "content": [
38 |                     {"type": "text", "text": "What’s in this image?"},
39 |                     {"type": "image_url", "image_url": {"url": image_url}},
40 |                 ],
41 |             }
42 |         ],
43 |         max_tokens=300,
44 |     )
45 |     return response.choices[0].message.content
46 | 
47 | 
48 | @trace
49 | def main(query: str) -> str:
50 |     image_url = image_maker(query)
51 |     return ask_vision(image_url)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     result = main("A cat sitting comfortably on a chair")
56 |     print(result)
57 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_with_open_ai_endpoint_directly.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple
  2 | 
  3 | import os
  4 | from datetime import datetime
  5 | 
  6 | from dotenv import load_dotenv
  7 | from openai import OpenAI
  8 | 
  9 | from parea import Parea, get_current_trace_id, trace, trace_insert
 10 | from parea.schemas import FeedbackRequest
 11 | 
 12 | load_dotenv()
 13 | 
 14 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 15 | 
 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
 17 | p.wrap_openai_client(client)
 18 | 
 19 | 
 20 | def call_llm(data: List[dict], model: str = "gpt-4o", temperature: float = 0.0) -> str:
 21 |     return client.chat.completions.create(model=model, temperature=temperature, messages=data).choices[0].message.content
 22 | 
 23 | 
 24 | @trace
 25 | def argumentor(query: str, additional_description: str = "") -> str:
 26 |     return call_llm(
 27 |         [
 28 |             {
 29 |                 "role": "system",
 30 |                 "content": f"""You are a debater making an argument on a topic. {additional_description}.
 31 |                 The current time is {datetime.now().strftime("%Y-%m-%d")}""",
 32 |             },
 33 |             {"role": "user", "content": f"The discussion topic is {query}"},
 34 |         ]
 35 |     )
 36 | 
 37 | 
 38 | @trace
 39 | def critic(argument: str) -> str:
 40 |     return call_llm(
 41 |         [
 42 |             {
 43 |                 "role": "system",
 44 |                 "content": f"""You are a critic.
 45 |                 What unresolved questions or criticism do you have after reading the following argument?
 46 |                 Provide a concise summary of your feedback.""",
 47 |             },
 48 |             {"role": "user", "content": argument},
 49 |         ]
 50 |     )
 51 | 
 52 | 
 53 | @trace
 54 | def refiner(query: str, additional_description: str, argument: str, criticism: str) -> str:
 55 |     return call_llm(
 56 |         [
 57 |             {
 58 |                 "role": "system",
 59 |                 "content": f"""You are a debater making an argument on a topic. {additional_description}.
 60 |                 The current time is {datetime.now().strftime("%Y-%m-%d")}""",
 61 |             },
 62 |             {"role": "user", "content": f"""The discussion topic is {query}"""},
 63 |             {"role": "assistant", "content": argument},
 64 |             {"role": "user", "content": criticism},
 65 |             {
 66 |                 "role": "system",
 67 |                 "content": "Please generate a new argument that incorporates the feedback from the user.",
 68 |             },
 69 |         ],
 70 |     )
 71 | 
 72 | 
 73 | @trace
 74 | def argument_chain(query: str, additional_description: str = "") -> Tuple[str, str]:
 75 |     trace_id = get_current_trace_id()
 76 |     trace_insert({"session_id": "cus_1234", "end_user_identifier": "user_1234"}, trace_id)
 77 |     argument = argumentor(query, additional_description)
 78 |     criticism = critic(argument)
 79 |     refined_argument = refiner(query, additional_description, argument, criticism)
 80 |     return refined_argument, trace_id
 81 | 
 82 | 
 83 | @trace(session_id="cus_1234", end_user_identifier="user_1234")
 84 | def json_call() -> str:
 85 |     completion = client.chat.completions.create(
 86 |         model="gpt-4o",
 87 |         messages=[{"role": "system", "content": "You are a helpful assistant talking in JSON."}, {"role": "user", "content": "What are you?"}],
 88 |         response_format={"type": "json_object"},
 89 |     )
 90 |     return completion.choices[0].message.content
 91 | 
 92 | 
 93 | if __name__ == "__main__":
 94 |     result, trace_id = argument_chain(
 95 |         "Whether sparkling wine is good for you.",
 96 |         additional_description="Provide a concise, few sentence argument on why sparkling wine is good for you.",
 97 |     )
 98 |     print(result)
 99 |     p.record_feedback(
100 |         FeedbackRequest(
101 |             trace_id=trace_id,
102 |             score=0.7,  # 0.0 (bad) to 1.0 (good)
103 |         )
104 |     )
105 | 
106 |     print(json_call())
107 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_with_openai_requests_api.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import httpx
 4 | from dotenv import load_dotenv
 5 | 
 6 | from cookbook.assets.data.openai_input_examples import functions_example, simple_example, tool_calling_example
 7 | from parea import Parea, aprocess_stream_and_yield, convert_openai_raw_to_log, process_stream_and_yield, trace
 8 | from parea.wrapper import get_formatted_openai_response
 9 | 
10 | load_dotenv()
11 | 
12 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
13 | 
14 | URL = "https://api.openai.com/v1/chat/completions"
15 | HEADERS = {
16 |     "Content-Type": "application/json",
17 |     "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
18 | }
19 | TIMEOUT = None
20 | 
21 | # Sync HTTPX
22 | 
23 | 
24 | ## Normal
25 | @trace
26 | def call_openai_api(data: dict) -> str:
27 |     with httpx.Client(timeout=TIMEOUT) as client:
28 |         response = client.post(URL, json=data, headers=HEADERS)
29 |         r = response.json()
30 |         convert_openai_raw_to_log(r, data)  # Add this line to enable tracing. Non-blocking
31 |         return get_formatted_openai_response(r)  # Return how you normally would
32 | 
33 | 
34 | ## Streaming
35 | @trace
36 | def call_openai_api_stream(data: dict):
37 |     data["stream"] = True
38 |     with httpx.stream("POST", URL, json=data, headers=HEADERS, timeout=TIMEOUT) as response:
39 |         # Add process_stream_and_yield to enable tracing. Non-blocking
40 |         for chunk in process_stream_and_yield(response, data):
41 |             print(chunk)
42 | 
43 | 
44 | # Async HTTPX
45 | 
46 | 
47 | ## Normal
48 | @trace
49 | async def acall_openai_api(data: dict) -> str:
50 |     async with httpx.AsyncClient(timeout=TIMEOUT) as client:
51 |         response = await client.post(URL, json=data, headers=HEADERS)
52 |         r = response.json()
53 |         convert_openai_raw_to_log(r, data)  # Add this line to enable tracing. Non-blocking
54 |         return get_formatted_openai_response(r)  # Return how you normally would
55 | 
56 | 
57 | ## Streaming
58 | @trace
59 | async def acall_openai_api_stream(data: dict):
60 |     data["stream"] = True
61 |     async with httpx.AsyncClient(timeout=TIMEOUT).stream("POST", URL, json=data, headers=HEADERS) as response:
62 |         # Add process_stream_and_yield to enable tracing. Non-blocking
63 |         async for chunk in aprocess_stream_and_yield(response, data):
64 |             print(chunk)
65 | 
66 | 
67 | # TEST NESTED TRACING
68 | @trace
69 | def chain():
70 |     call_openai_api(simple_example)
71 |     call_openai_api(functions_example)
72 |     call_openai_api(tool_calling_example)
73 |     call_openai_api_stream(tool_calling_example)
74 | 
75 | 
76 | @trace
77 | async def achain():
78 |     await acall_openai_api(simple_example)
79 |     await acall_openai_api(functions_example)
80 |     await acall_openai_api(tool_calling_example)
81 |     await acall_openai_api_stream(tool_calling_example)
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     chain()
86 |     # asyncio.run(achain())
87 | 


--------------------------------------------------------------------------------
/cookbook/openai/tracing_with_openai_with_structured_output.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from dotenv import load_dotenv
  4 | from openai import OpenAI
  5 | from pydantic import BaseModel
  6 | 
  7 | from parea import Parea
  8 | 
  9 | load_dotenv()
 10 | 
 11 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 12 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
 13 | p.wrap_openai_client(client)
 14 | 
 15 | 
 16 | class CalendarEvent(BaseModel):
 17 |     name: str
 18 |     date: str
 19 |     participants: list[str]
 20 | 
 21 | 
 22 | def with_pydantic():
 23 |     completion = client.beta.chat.completions.parse(
 24 |         model="gpt-4o-2024-08-06",
 25 |         messages=[
 26 |             {"role": "system", "content": "Extract the event information."},
 27 |             {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
 28 |         ],
 29 |         response_format=CalendarEvent,
 30 |     )
 31 |     event = completion.choices[0].message.parsed
 32 |     print(event)
 33 | 
 34 | 
 35 | def with_json_schema():
 36 |     response = client.chat.completions.create(
 37 |         model="gpt-4o-2024-08-06",
 38 |         messages=[
 39 |             {"role": "system", "content": "You are a helpful math tutor. Guide the user through the solution step by step."},
 40 |             {"role": "user", "content": "how can I solve 8x + 7 = -23"},
 41 |         ],
 42 |         response_format={
 43 |             "type": "json_schema",
 44 |             "json_schema": {
 45 |                 "name": "math_response",
 46 |                 "schema": {
 47 |                     "type": "object",
 48 |                     "properties": {
 49 |                         "steps": {
 50 |                             "type": "array",
 51 |                             "items": {
 52 |                                 "type": "object",
 53 |                                 "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}},
 54 |                                 "required": ["explanation", "output"],
 55 |                                 "additionalProperties": False,
 56 |                             },
 57 |                         },
 58 |                         "final_answer": {"type": "string"},
 59 |                     },
 60 |                     "required": ["steps", "final_answer"],
 61 |                     "additionalProperties": False,
 62 |                 },
 63 |                 "strict": True,
 64 |             },
 65 |         },
 66 |     )
 67 |     print(response.choices[0].message.content)
 68 | 
 69 | 
 70 | def with_tools():
 71 |     tools = [
 72 |         {
 73 |             "type": "function",
 74 |             "function": {
 75 |                 "name": "get_delivery_date",
 76 |                 "description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
 77 |                 "parameters": {
 78 |                     "type": "object",
 79 |                     "properties": {
 80 |                         "order_id": {
 81 |                             "type": "string",
 82 |                             "description": "The customer's order ID.",
 83 |                         },
 84 |                     },
 85 |                     "required": ["order_id"],
 86 |                     "additionalProperties": False,
 87 |                 },
 88 |             },
 89 |             "strict": True,
 90 |         }
 91 |     ]
 92 | 
 93 |     messages = [
 94 |         {"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."},
 95 |         {"role": "user", "content": "Hi, can you tell me the delivery date for my order with id 5?"},
 96 |     ]
 97 | 
 98 |     response = client.chat.completions.create(
 99 |         model="gpt-4o-2024-08-06",
100 |         messages=messages,
101 |         tools=tools,
102 |     )
103 |     print(response.choices[0].message.tool_calls)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     with_pydantic()
108 |     with_json_schema()
109 |     with_tools()
110 | 


--------------------------------------------------------------------------------
/cookbook/parea_llm_proxy/deployments/fetching_and_using_parea_deployments.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea
 6 | from parea.schemas.models import Completion, CompletionResponse, UseDeployedPrompt, UseDeployedPromptResponse
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | 
13 | def main() -> CompletionResponse:
14 |     return p.completion(Completion(deployment_id="p-4cbYJ0LIy0gaWb6Z819k7", llm_inputs={"x": "python", "y": "fastapi"}))
15 | 
16 | 
17 | def get_critic_prompt(val: str) -> UseDeployedPromptResponse:
18 |     return p.get_prompt(UseDeployedPrompt(deployment_id="p-87NFVeQg30Hk2Hatw1h72", llm_inputs={"x": val}))
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     print(get_critic_prompt("Python"))
23 |     # a = UseDeployedPromptResponse(
24 |     #     deployment_id="p-87NFVeQg30Hk2Hatw1h72",
25 |     #     name="deploy-test",
26 |     #     functions=[],
27 |     #     function_call=None,
28 |     #     prompt=Prompt(
29 |     #         raw_messages=[{"role": "user", "content": "Write a hello world program in {{x}}"}],
30 |     #         messages=[{"content": "Write a hello world program in Python", "role": "user"}],
31 |     #         inputs={"x": "Python"},
32 |     #     ),
33 |     #     model="gpt-3.5-turbo-0125",
34 |     #     provider="openai",
35 |     #     model_params={"temp": 0.0, "top_p": 1.0, "max_length": None, "presence_penalty": 0.0, "frequency_penalty": 0.0},
36 |     # )
37 | 


--------------------------------------------------------------------------------
/cookbook/parea_llm_proxy/dynamic_few_shot_injection.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | import os
  4 | import random
  5 | from datetime import datetime
  6 | 
  7 | from dotenv import load_dotenv
  8 | from pydantic import BaseModel
  9 | 
 10 | from parea import Parea, trace, trace_insert
 11 | from parea.schemas import Completion, CompletionResponse, FeedbackRequest, LLMInputs, Message, Role, TestCase
 12 | 
 13 | load_dotenv()
 14 | 
 15 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
 16 | 
 17 | NUM_INTERACTIONS = 5
 18 | 
 19 | 
 20 | class Person(BaseModel):
 21 |     name: str
 22 |     email: str
 23 | 
 24 | 
 25 | class Email(BaseModel):
 26 |     contact: Person
 27 |     email_sent: str
 28 | 
 29 | 
 30 | mock_DB: dict[str, Email] = {}
 31 | 
 32 | 
 33 | def call_llm(messages: List[Message]) -> CompletionResponse:
 34 |     return p.completion(Completion(llm_configuration=LLMInputs(model="gpt-4o", messages=messages)))
 35 | 
 36 | 
 37 | # Imitate collecting few shot examples from prod based on user feedback
 38 | @trace
 39 | def email_writer(main_objective: str, contact: Person, few_shot_examples: Optional[List[str]] = None) -> str:
 40 |     trace_insert({"end_user_identifier": contact.name, "metadata": {"has_few_shot_examples": bool(few_shot_examples)}})
 41 | 
 42 |     few_shot_examples_prompt = ("\nHere are some examples of good emails\n" + "\n".join(few_shot_examples)) if few_shot_examples else ""
 43 |     messages = [
 44 |         Message(
 45 |             role=Role.system,
 46 |             content=f"You are an AI who performs an email writing task based on the following objective: {main_objective}",
 47 |         ),
 48 |         # added date to prompt to avoid cache
 49 |         Message(
 50 |             role=Role.user,
 51 |             content=f"""
 52 |             Your email is from: {contact.model_dump()}
 53 |             Today's date is: {datetime.now().isoformat()}
 54 |             {few_shot_examples_prompt if few_shot_examples else ""}
 55 |             Email:
 56 |             """,
 57 |         ),
 58 |     ]
 59 |     response: CompletionResponse = call_llm(messages)
 60 |     trace_id = response.inference_id
 61 |     # insert into mock_DB
 62 |     mock_DB[trace_id] = Email(contact=contact, email_sent=response.content)
 63 |     return response.content
 64 | 
 65 | 
 66 | def mimic_prod(few_shot_limit: int = 3):
 67 |     contact = Person(name="John Doe", email="jdoe@email.com")
 68 |     dataset = p.get_collection("Good_Email_Examples")
 69 |     selected_few_shot_examples = None
 70 |     if dataset:
 71 |         testcases: list[TestCase] = list(dataset.test_cases.values())
 72 |         few_shot_examples = [case.inputs["email"] for case in testcases if case.inputs["user"] == contact.name]
 73 |         # This is simply taking most recent n examples. You can imagine adding additional logic to the dataset
 74 |         # that allows you to rank the examples based on some criteria
 75 |         selected_few_shot_examples = few_shot_examples[-few_shot_limit:] if few_shot_examples else None
 76 |     for interaction in range(NUM_INTERACTIONS):
 77 |         email = email_writer("Convincing email to gym to cancel membership early.", contact, selected_few_shot_examples)
 78 |         print(email)
 79 | 
 80 | 
 81 | def add_good_email_example_to_dataset(user_name, email):
 82 |     # Note: if the test case collection doesn't exist, we will create a new collection with the provided name and data
 83 |     p.add_test_cases([{"user": user_name, "email": email}], name="Good_Email_Examples")
 84 | 
 85 | 
 86 | def mimic_prod_feedback_collection():
 87 |     # imagine the trace_id of the email is stored in state in the UI, so when the user provides feedback, we can use it
 88 |     trace_ids = mock_DB.keys()
 89 |     for trace_id in trace_ids:
 90 |         score = random.uniform(0, 1)
 91 |         p.record_feedback(FeedbackRequest(trace_id=trace_id, score=score))
 92 |         # if the feedback is good, add it to the dataset to use later as a few-shot example
 93 |         if score >= 0.5:
 94 |             add_good_email_example_to_dataset(mock_DB[trace_id].contact.name, mock_DB[trace_id].email_sent)
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     mimic_prod()
 99 |     mimic_prod_feedback_collection()
100 |     # future llm calls will now have few-shot examples from the feedback collection
101 |     mimic_prod()
102 |     print("Done")
103 | 


--------------------------------------------------------------------------------
/cookbook/parea_llm_proxy/tracing_with_agent.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Tuple
  2 | 
  3 | import os
  4 | import random
  5 | from datetime import datetime
  6 | 
  7 | import pytz
  8 | from dotenv import load_dotenv
  9 | 
 10 | from parea import Parea, get_current_trace_id, trace
 11 | from parea.schemas import Completion, CompletionResponse, FeedbackRequest, LLMInputs, Message, ModelParams, Role
 12 | 
 13 | load_dotenv()
 14 | 
 15 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
 16 | 
 17 | # Parea SDK makes it easy to use different LLMs with the same apis structure and standardized request/response schemas.
 18 | LLM_OPTIONS = [("gpt-3.5-turbo-0125", "openai"), ("gpt-4o", "openai"), ("claude-3-haiku-20240307", "anthropic"), ("claude-3-opus-20240229", "anthropic")]
 19 | LIMIT = 1
 20 | 
 21 | 
 22 | def dump_task(task):
 23 |     d = ""
 24 |     for tasklet in task:
 25 |         d += f"\n{tasklet.get('task_name','')}"
 26 |     d = d.strip()
 27 |     return d
 28 | 
 29 | 
 30 | def call_llm(
 31 |     data: List[Message],
 32 |     model: str = "gpt-3.5-turbo",
 33 |     provider: str = "openai",
 34 |     temperature: float = 0.0,
 35 | ) -> CompletionResponse:
 36 |     return p.completion(
 37 |         data=Completion(
 38 |             llm_configuration=LLMInputs(
 39 |                 model=model,
 40 |                 provider=provider,
 41 |                 model_params=ModelParams(temp=temperature),
 42 |                 messages=data,
 43 |             )
 44 |         )
 45 |     )
 46 | 
 47 | 
 48 | @trace
 49 | def expound_task(main_objective: str, current_task: str) -> List[Dict[str, str]]:
 50 |     prompt = [
 51 |         Message(
 52 |             role=Role.system,
 53 |             content=f"You are an AI who performs one task based on the following objective: {main_objective}\n" f"Your task: {current_task}\nResponse:",
 54 |         ),
 55 |     ]
 56 |     response = call_llm(prompt).content
 57 |     new_tasks = response.split("\n") if "\n" in response else [response]
 58 |     return [{"task_name": task_name} for task_name in new_tasks]
 59 | 
 60 | 
 61 | @trace
 62 | def generate_tasks(main_objective: str, expounded_initial_task: List[Dict[str, str]]) -> List[str]:
 63 |     select_llm_option = random.choice(LLM_OPTIONS)
 64 |     task_expansion = dump_task(expounded_initial_task)
 65 |     prompt = [
 66 |         Message(
 67 |             role=Role.user,
 68 |             content=(
 69 |                 f"You are an AI who creates tasks based on the following MAIN OBJECTIVE: {main_objective}\n"
 70 |                 f"Create tasks pertaining directly to your previous research here:\n"
 71 |                 f"{task_expansion}\nResponse:"
 72 |             ),
 73 |         ),
 74 |     ]
 75 |     response = call_llm(data=prompt, model=select_llm_option[0], provider=select_llm_option[1]).content
 76 |     new_tasks = response.split("\n") if "\n" in response else [response]
 77 |     task_list = [{"task_name": task_name} for task_name in new_tasks]
 78 |     new_tasks_list: List[str] = []
 79 |     for task_item in task_list:
 80 |         task_description = task_item.get("task_name")
 81 |         if task_description:
 82 |             task_parts = task_description.strip().split(".", 1)
 83 |             if len(task_parts) == 2:
 84 |                 new_task = task_parts[1].strip()
 85 |                 new_tasks_list.append(new_task)
 86 | 
 87 |     return new_tasks_list
 88 | 
 89 | 
 90 | @trace(name=f"run_agent-{datetime.now(pytz.utc)}")  # You can provide a custom name other than the function name
 91 | def run_agent(main_objective: str, initial_task: str = "") -> Tuple[List[Dict[str, str]], str]:
 92 |     trace_id = get_current_trace_id()
 93 |     generated_tasks = []
 94 |     expounded_initial_task = expound_task(main_objective, initial_task)
 95 |     new_tasks = generate_tasks(main_objective, expounded_initial_task)
 96 |     task_counter = 0
 97 |     for task in new_tasks or []:
 98 |         task_counter += 1
 99 |         q = expound_task(main_objective, task)
100 |         exp = dump_task(q)
101 |         generated_tasks.append({f"task_{task_counter}": exp})
102 |         if task_counter >= LIMIT:
103 |             break
104 |     return generated_tasks, trace_id
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     result, trace_id = run_agent("Become a machine learning expert.", "Learn about tensors.")
109 |     print(result)
110 |     p.record_feedback(FeedbackRequest(trace_id=trace_id, score=0.642))
111 | 


--------------------------------------------------------------------------------
/cookbook/parea_llm_proxy/tracing_with_parea_streaming.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea, trace
 6 | from parea.schemas import Completion, LLMInputs, Message, ModelParams, Role
 7 | 
 8 | load_dotenv()
 9 | 
10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
11 | 
12 | completion = Completion(
13 |     llm_configuration=LLMInputs(
14 |         model="gpt-3.5-turbo-1106",
15 |         model_params=ModelParams(temp=0.1),
16 |         messages=[Message(role=Role.user, content="Write a short haiku about the moon.")],
17 |     )
18 | )
19 | 
20 | 
21 | @trace
22 | def call_llm_stream():
23 |     stream = p.stream(completion)
24 |     for chunk in stream:
25 |         print(chunk)
26 | 
27 | 
28 | @trace
29 | async def acall_llm_stream():
30 |     stream = p.astream(completion)
31 |     async for chunk in stream:
32 |         print(chunk)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     call_llm_stream()
37 |     # asyncio.run(acall_llm_stream())
38 | 


--------------------------------------------------------------------------------
/cookbook/tracing_with_threading.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import contextvars
 4 | import os
 5 | from concurrent.futures import ThreadPoolExecutor
 6 | 
 7 | from dotenv import load_dotenv
 8 | 
 9 | from parea import Parea, trace
10 | 
11 | load_dotenv()
12 | 
13 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
14 | 
15 | 
16 | @trace
17 | def llm_call(question):
18 |     return f"I can't answer that question: {question}"
19 | 
20 | 
21 | @trace
22 | def multiple_llm_calls(question, n_calls: int = 2) -> List[str]:
23 |     answers = []
24 |     with ThreadPoolExecutor(max_workers=2) as executor:
25 |         for _ in range(n_calls):
26 |             context = contextvars.copy_context()
27 |             future = executor.submit(context.run, llm_call, question)
28 |             answers.append(future.result())
29 |     return answers
30 | 
31 | 
32 | response = multiple_llm_calls("Who are you?")
33 | print(response)
34 | 


--------------------------------------------------------------------------------
/cookbook/use_dataset_for_finetuning.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | from parea import Parea
 6 | 
 7 | load_dotenv()
 8 | 
 9 | p = Parea(api_key=os.getenv("PAREA_API_KEY"))
10 | 
11 | dataset = p.get_collection("DATASET_ID")  # Replace DATASET_ID with the actual dataset ID
12 | 
13 | dataset.write_to_finetune_jsonl("finetune.jsonl")
14 | 


--------------------------------------------------------------------------------
/cookiecutter-config-file.yml:
--------------------------------------------------------------------------------
 1 | # This file contains values from Cookiecutter
 2 | 
 3 | default_context:
 4 |   project_name: "parea-ai"
 5 |   project_description: "Parea python sdk"
 6 |   organization: "parea-ai"
 7 |   license: "Apache Software License 2.0"
 8 |   minimal_python_version: 3.9
 9 |   github_name: "parea-ai"
10 |   email: "joel@parea.ai"
11 |   version: "0.1.0"
12 |   line_length: "180"
13 |   create_example_template: "none"
14 | 


--------------------------------------------------------------------------------
/parea/__init__.py:
--------------------------------------------------------------------------------
 1 | # type: ignore[attr-defined]
 2 | # flake8: noqa
 3 | 
 4 | """
 5 |     Parea API SDK
 6 | 
 7 |     The Parea SDK allows you to interact with Parea from your product or service.
 8 |     To install the official [Python SDK](https://pypi.org/project/parea/),
 9 |     run the following command:  ```bash pip install parea ```.
10 | """
11 | import sys
12 | 
13 | from parea.api_client import get_version
14 | from parea.cache import InMemoryCache
15 | from parea.client import Parea
16 | from parea.experiment.cli import experiment as _experiment_cli
17 | from parea.experiment.dvc import parea_dvc_initialized
18 | from parea.experiment.experiment import Experiment
19 | from parea.helpers import gen_trace_id, write_trace_logs_to_csv
20 | from parea.parea_logger import parea_logger
21 | from parea.utils.trace_utils import clear_trace_context, get_current_trace_id, get_root_trace_id, trace, trace_insert
22 | from parea.wrapper.openai_raw_api_tracer import aprocess_stream_and_yield, process_stream_and_yield
23 | from parea.wrapper.utils import convert_openai_raw_to_log
24 | 
25 | version: str = get_version()
26 | 
27 | 
28 | def main():
29 |     args = sys.argv[1:]
30 |     if args[0] == "experiment":
31 |         _experiment_cli(args[1:])
32 |     elif args[0] == "dvc-init":
33 |         parea_dvc_initialized(only_check=False)
34 |     else:
35 |         print(f"Unknown command: '{args[0]}'")
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/parea/cache/__init__.py:
--------------------------------------------------------------------------------
1 | from .in_memory import InMemoryCache
2 | 


--------------------------------------------------------------------------------
/parea/cache/cache.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | from abc import ABC
  4 | 
  5 | from parea.schemas.models import CacheRequest, TraceLog
  6 | 
  7 | 
  8 | class Cache(ABC):
  9 |     def get(self, key: CacheRequest) -> Optional[TraceLog]:
 10 |         """
 11 |         Get a normal response from the cache.
 12 | 
 13 |         Args:
 14 |             key (CacheRequest): The cache key.
 15 | 
 16 |         Returns:
 17 |             Optional[TraceLog]: The cached response, or None if the key was not found.
 18 | 
 19 |         # noqa: DAR202
 20 |         # noqa: DAR401
 21 |         """
 22 |         raise NotImplementedError
 23 | 
 24 |     async def aget(self, key: CacheRequest) -> Optional[TraceLog]:
 25 |         """
 26 |         Get a normal response from the cache.
 27 | 
 28 |         Args:
 29 |             key (CacheRequest): The cache key.
 30 | 
 31 |         Returns:
 32 |             Optional[TraceLog]: The cached response, or None if the key was not found.
 33 | 
 34 |         # noqa: DAR202
 35 |         # noqa: DAR401
 36 |         """
 37 |         raise NotImplementedError
 38 | 
 39 |     def set(self, key: CacheRequest, value: TraceLog):
 40 |         """
 41 |         Set a normal response in the cache.
 42 | 
 43 |         Args:
 44 |             key (CacheRequest): The cache key.
 45 |             value (TraceLog): The response to cache.
 46 | 
 47 |         # noqa: DAR401
 48 |         """
 49 |         raise NotImplementedError
 50 | 
 51 |     async def aset(self, key: CacheRequest, value: TraceLog):
 52 |         """
 53 |         Set a normal response in the cache.
 54 | 
 55 |         Args:
 56 |             key (CacheRequest): The cache key.
 57 |             value (TraceLog): The response to cache.
 58 | 
 59 |         # noqa: DAR401
 60 |         """
 61 |         raise NotImplementedError
 62 | 
 63 |     def invalidate(self, key: CacheRequest):
 64 |         """
 65 |         Invalidate a key in the cache.
 66 | 
 67 |         Args:
 68 |             key (CacheRequest): The cache key.
 69 | 
 70 |         # noqa: DAR401
 71 |         """
 72 |         raise NotImplementedError
 73 | 
 74 |     async def ainvalidate(self, key: CacheRequest):
 75 |         """
 76 |         Invalidate a key in the cache.
 77 | 
 78 |         Args:
 79 |             key (CacheRequest): The cache key.
 80 | 
 81 |         # noqa: DAR401
 82 |         """
 83 |         raise NotImplementedError
 84 | 
 85 |     def log(self, value: TraceLog):
 86 |         """
 87 |         Log a response in the cache.
 88 | 
 89 |         Args:
 90 |             value (TraceLog): The response to log.
 91 | 
 92 |         # noqa: DAR401
 93 |         """
 94 |         raise NotImplementedError
 95 | 
 96 |     def read_logs(self) -> List[TraceLog]:
 97 |         """
 98 |         Read all logs from the cache.
 99 | 
100 |         Returns:
101 |             List[TraceLog]: All logs in the cache.
102 | 
103 |         # noqa: DAR202
104 |         # noqa: DAR401
105 |         """
106 |         raise NotImplementedError
107 | 


--------------------------------------------------------------------------------
/parea/cache/in_memory.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from attr import asdict
 4 | 
 5 | from parea.cache.cache import Cache
 6 | from parea.schemas.models import CacheRequest, TraceLog
 7 | from parea.utils.universal_encoder import json_dumps
 8 | 
 9 | 
10 | class InMemoryCache(Cache):
11 |     def __init__(self):
12 |         self.cache = {}
13 |         self.logs = []
14 | 
15 |     def get(self, key: CacheRequest) -> Optional[TraceLog]:
16 |         return self.cache.get(json_dumps(asdict(key)))
17 | 
18 |     async def aget(self, key: CacheRequest) -> Optional[TraceLog]:
19 |         return self.get(key)
20 | 
21 |     def set(self, key: CacheRequest, value: TraceLog):
22 |         self.cache[json_dumps(asdict(key))] = value
23 | 
24 |     async def aset(self, key: CacheRequest, value: TraceLog):
25 |         self.set(key, value)
26 | 
27 |     def invalidate(self, key: CacheRequest):
28 |         key = json_dumps(asdict(key))
29 |         if key in self.cache:
30 |             del self.cache[key]
31 | 
32 |     async def ainvalidate(self, key: CacheRequest):
33 |         self.invalidate(key)
34 | 
35 |     def log(self, value: TraceLog):
36 |         self.logs.append(value)
37 | 
38 |     def read_logs(self) -> List[TraceLog]:
39 |         return self.logs.copy()
40 | 


--------------------------------------------------------------------------------
/parea/evals/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import EvalFuncTuple, call_openai, get_tokens, run_evals_in_thread_and_log, run_evals_synchronous
2 | 


--------------------------------------------------------------------------------
/parea/evals/chat/__init__.py:
--------------------------------------------------------------------------------
1 | from .goal_success_ratio import goal_success_ratio_factory
2 | 


--------------------------------------------------------------------------------
/parea/evals/chat/goal_success_ratio.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | import json
 4 | 
 5 | from parea.evals.utils import call_openai
 6 | from parea.schemas.log import Log
 7 | 
 8 | 
 9 | def goal_success_ratio_factory(
10 |     use_output: Optional[bool] = False, message_field: Optional[str] = None, model: Optional[str] = "gpt-4", is_azure: Optional[bool] = False
11 | ) -> Callable[[Log], float]:
12 |     """
13 |     This factory creates an evaluation function that measures the success ratio of a goal-oriented conversation.
14 |     Typically, a user interacts with a chatbot or AI assistant to achieve specific goals.
15 |     This motivates to measure the quality of a chatbot by counting how many messages a user has to send before they reach their goal.
16 |     One can further break this down by successful and unsuccessful goals to analyze user & LLM behavior.
17 | 
18 |     Concretely:
19 |     1. Delineate the conversation into segments by splitting them by the goals the user wants to achieve.
20 |     2. Assess if every goal has been reached.
21 |     3. Calculate the average number of messages sent per segment.
22 | 
23 |     Args:
24 |         is_azure: Whether to use Azure as the model. Defaults to False.
25 |         model: The model which should be used for grading.
26 |         use_output (Optional[bool], optional): Whether to use the output of the log to access the messages. Defaults to False.
27 |         message_field (Optional[str], optional): The name of the field in the log that contains the messages.
28 |             Defaults to None. If None, the messages are taken from the configuration attribute.
29 | 
30 |     # noqa: DAR201
31 |     # noqa: DAR401
32 |     """
33 |     if use_output and message_field:
34 |         raise ValueError("Only one of use_output and message_field can be set.")
35 | 
36 |     def goal_success_ratio(log: Log) -> float:
37 |         """Returns the average amount of turns the user had to converse with the AI to reach their goals."""
38 |         if use_output:
39 |             output_list_dicts = json.loads(log.output)
40 |             messages = [m for m in output_list_dicts]
41 |         elif message_field:
42 |             messages = [m for m in log.inputs[message_field]]
43 |         else:
44 |             messages = [m.to_dict() for m in log.configuration.messages]
45 |             if log.output:
46 |                 messages.append({"role": "assistant", "content": log.output})
47 | 
48 |         # need to determine where does a new goal start
49 |         conversation_segments = []
50 |         start_index = 0
51 |         end_index = 3
52 |         while end_index < len(messages):
53 |             user_follows_same_goal = call_openai(
54 |                 [
55 |                     {
56 |                         "role": "system",
57 |                         "content": "Look at the conversation and to determine if the user is still following the same goal "
58 |                         "or if they are following a new goal. If they are following the same goal, respond "
59 |                         "SAME_GOAL. Otherwise, respond NEW_GOAL. In any case do not answer the user request!",
60 |                     }
61 |                 ]
62 |                 + messages[start_index:end_index],
63 |                 model=model,
64 |                 is_azure=is_azure,
65 |             )
66 | 
67 |             if user_follows_same_goal == "SAME_GOAL":
68 |                 end_index += 2
69 |             else:
70 |                 conversation_segments.append(messages[start_index : end_index - 1])
71 |                 start_index = end_index - 1
72 |                 end_index += 2
73 | 
74 |         if start_index < len(messages):
75 |             conversation_segments.append(messages[start_index:])
76 | 
77 |         # for now assume that the user reached their goal in every segment
78 |         # return the average amount of turns the user had to converse with the AI to reach their goals
79 |         return sum([2 / len(segment) for segment in conversation_segments]) / len(conversation_segments)
80 | 
81 |     return goal_success_ratio
82 | 


--------------------------------------------------------------------------------
/parea/evals/dataset_level/__init__.py:
--------------------------------------------------------------------------------
1 | from .balanced_acc import balanced_acc_factory
2 | 


--------------------------------------------------------------------------------
/parea/evals/dataset_level/balanced_acc.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | 
 3 | from collections import defaultdict
 4 | 
 5 | from parea.schemas import EvaluatedLog, EvaluationResult
 6 | 
 7 | 
 8 | def balanced_acc_factory(score_name: str):
 9 |     def balanced_acc(logs: List[EvaluatedLog]) -> Union[EvaluationResult, None]:
10 |         correct = defaultdict(int)
11 |         total = defaultdict(int)
12 |         for log in logs:
13 |             if (eval_result := log.get_score(score_name)) is not None:
14 |                 correct[log.target] += int(eval_result.score)
15 |                 total[log.target] += 1
16 |         recalls = [correct[key] / total[key] for key in correct]
17 | 
18 |         if len(recalls) == 0:
19 |             return None
20 | 
21 |         return EvaluationResult(name=f"balanced_acc_{score_name}", score=sum(recalls) / len(recalls))
22 | 
23 |     return balanced_acc
24 | 


--------------------------------------------------------------------------------
/parea/evals/general/__init__.py:
--------------------------------------------------------------------------------
1 | from .answer_matches_target_llm_grader import answer_matches_target_llm_grader_factory
2 | from .answer_matches_target_recall import answer_matches_target_recall
3 | from .answer_relevancy import answer_relevancy_factory
4 | from .levenshtein import levenshtein
5 | from .llm_grader import llm_grader_factory, llm_grader_gpt3t, llm_grader_gpt4
6 | from .lm_vs_lm import lm_vs_lm_factuality_factory, lm_vs_lm_factuality_gpt3t, lm_vs_lm_factuality_gpt4
7 | from .self_check import self_check
8 | from .semantic_similarity import semantic_similarity_factory, semantic_similarity_oai_3_large, semantic_similarity_oai_3_small, semantic_similarity_oai_ada_002
9 | 


--------------------------------------------------------------------------------
/parea/evals/general/answer_matches_target_llm_grader.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional, Union
 2 | 
 3 | from parea.evals.utils import call_openai
 4 | from parea.schemas.log import Log
 5 | 
 6 | 
 7 | def answer_matches_target_llm_grader_factory(
 8 |     question_field: Optional[str] = "question",
 9 |     model: Optional[str] = "gpt-4",
10 |     is_azure: Optional[bool] = False,
11 | ) -> Callable[[Log], Union[float, None]]:
12 |     """Quantifies how much the generated answer matches the ground truth / target."""
13 | 
14 |     def answer_matches_target_llm_grader(log: Log) -> Union[float, None]:
15 |         question = log.inputs[question_field]
16 |         output = log.output
17 |         if (target := log.target) is None:
18 |             return None
19 |         response = call_openai(
20 |             model=model,
21 |             messages=[
22 |                 {"role": "system", "content": "You are CompareGPT, a machine to verify the groundedness of predictions. Answer with " "only yes/no."},
23 |                 {
24 |                     "role": "user",
25 |                     "content": f"""You are given a question, the corresponding ground-truth answer and a prediction from a model. Compare the "Ground-truth answer" and the "Prediction" to determine whether the prediction correctly answers the question. All information in the ground-truth answer must be present in the prediction, including numbers and dates. You must answer "no" if there are any specific details in the ground-truth answer that are not mentioned in the prediction. There should be no contradicting statements in the prediction. The prediction may contain extra information. If the prediction states something as a possibility, treat it as a definitive answer.
26 | 
27 | Question: {question}
28 | Ground-truth answer: {target}
29 | Prediction: {output}
30 | 
31 | CompareGPT response:""",
32 |                 },
33 |             ],
34 |             temperature=0.0,
35 |             is_azure=is_azure,
36 |         )
37 |         return float("yes" in response.lower())
38 | 
39 |     return answer_matches_target_llm_grader
40 | 


--------------------------------------------------------------------------------
/parea/evals/general/answer_matches_target_recall.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from collections import Counter
 4 | 
 5 | from parea.evals.utils import get_tokens
 6 | from parea.schemas.log import Log
 7 | 
 8 | 
 9 | def answer_matches_target_recall(log: Log) -> Union[float, None]:
10 |     """Prop. of tokens in target/reference answer which are also in model generation."""
11 |     if (target := log.target) is None:
12 |         return None
13 |     output = log.output
14 |     model = log.configuration.model
15 | 
16 |     target_tokens = get_tokens(model, target)
17 |     output_tokens = get_tokens(model, output)
18 | 
19 |     if len(target_tokens) == 0:
20 |         return 1.0
21 |     common_tokens = Counter(target_tokens) & Counter(output_tokens)
22 |     num_common = sum(common_tokens.values())
23 |     return num_common / len(target_tokens)
24 | 


--------------------------------------------------------------------------------
/parea/evals/general/answer_relevancy.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | from parea.evals.utils import call_openai, embed
 4 | from parea.schemas.log import Log
 5 | 
 6 | 
 7 | def answer_relevancy_factory(
 8 |     question_field: str = "question",
 9 |     n_generations: int = 3,
10 |     model: Optional[str] = "gpt-3.5-turbo-16k",
11 |     embedding_model: str = "text-embedding-ada-002",
12 |     is_azure: Optional[bool] = False,
13 | ) -> Callable[[Log], float]:
14 |     """
15 |     This factory creates an evaluation function that measures how relevant the generated response is to the given question.
16 |     It is based on the paper [RAGAS: Automated Evaluation of Retrieval Augmented Generation](https://arxiv.org/abs/2309.15217)
17 |     which suggests using an LLM to generate multiple questions that fit the generated answer and measure the cosine
18 |     similarity of the generated questions with the original one.
19 | 
20 |     Args:
21 |         is_azure: Whether to use the Azure API. Defaults to False.
22 |         embedding_model: The model which should be used for embedding the text.
23 |         model: The model which should be used for grading. Defaults to "gpt-3.5-turbo-16k".
24 |         question_field: The key name/field used for the question/query of the user. Defaults to "question".
25 |         n_generations: The number of questions which should be generated. Defaults to 3.
26 | 
27 |     Returns:
28 |         Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating
29 |         if the generated response is relevant to the query.
30 | 
31 |     Raises:
32 |         ImportError: If numpy is not installed.
33 |     """
34 |     try:
35 |         import numpy as np
36 |     except ImportError:
37 |         raise ImportError("Please install numpy to use this metric.")
38 | 
39 |     def answer_relevancy(log: Log) -> float:
40 |         """Quantifies how much the generated answer relates to the query."""
41 |         question = log.inputs[question_field]
42 |         output = log.output
43 | 
44 |         generated_questions = call_openai(
45 |             model=model,
46 |             messages=[
47 |                 {
48 |                     "role": "user",
49 |                     "content": f"""\
50 | Generate question for the given answer.
51 | Answer:\nThe PSLV-C56 mission is scheduled to be launched on Sunday, 30 July 2023 at 06:30 IST / 01:00 UTC. It will be launched from the Satish Dhawan Space Centre, Sriharikota, Andhra Pradesh, India
52 | Question: When is the scheduled launch date and time for the PSLV-C56 mission, and where will it be launched from?
53 | 
54 | Answer: {output}
55 | Question:""",
56 |                 }
57 |             ],
58 |             temperature=0.0,
59 |             n=n_generations,
60 |             is_azure=is_azure,
61 |         )
62 |         embedded_generated_questions = [embed(model=embedding_model, input=q, is_azure=is_azure) for q in generated_questions]
63 |         embedded_question = embed(model=embedding_model, input=question, is_azure=is_azure)
64 | 
65 |         question_vec = np.asarray(embedded_question).reshape(1, -1)
66 |         gen_question_vec = np.asarray(embedded_generated_questions)
67 |         norm = np.linalg.norm(gen_question_vec, axis=1) * np.linalg.norm(question_vec, axis=1)
68 |         return (np.dot(gen_question_vec, question_vec.T).reshape(-1) / norm).mean()
69 | 
70 |     return answer_relevancy
71 | 


--------------------------------------------------------------------------------
/parea/evals/general/levenshtein.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from Levenshtein import distance
 4 | 
 5 | from parea.schemas import Log
 6 | 
 7 | 
 8 | def levenshtein(log: Log) -> Union[float, None]:
 9 |     output = log.output
10 |     if (target := log.target) is None:
11 |         return None
12 | 
13 |     return levenshtein_distance(str(output), str(target))
14 | 
15 | 
16 | def levenshtein_distance(output: str, target: str) -> float:
17 |     max_len = max(len(x) for x in [output, target])
18 | 
19 |     score = 1
20 |     if max_len > 0:
21 |         score = 1 - (distance(output, target) / max_len)
22 | 
23 |     return score
24 | 


--------------------------------------------------------------------------------
/parea/evals/general/llm_grader.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | import ast
 4 | import re
 5 | 
 6 | from parea.evals.utils import call_openai
 7 | from parea.schemas.log import Log
 8 | 
 9 | one_score_pattern = re.compile(r"\[\[(\d+\.?\d*)\]\]")
10 | one_score_pattern_backup = re.compile(r"\[(\d+\.?\d*)\]")
11 | 
12 | 
13 | def llm_grader_factory(model: str = "gpt-4", question_field: str = "question", is_azure: Optional[bool] = False) -> Callable[[Log], float]:
14 |     """
15 |     This factory creates an evaluation function that uses an LLM to grade the response of an LLM to a given question.
16 |     It is based on the paper [Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena](https://arxiv.org/abs/2306.05685)
17 |     which introduces general-purpose zero-shot prompt to rate responses from an LLM to a given question on a scale from 1-10.
18 |     They find that GPT-4's ratings agree as much with a human rater as a human annotator agrees with another one (>80%).
19 |     Further, they observe that the agreement with a human annotator increases as the response rating gets clearer.
20 |     Additionally, they investigated how much the evaluating LLM overestimated its responses and found that GPT-4 and
21 |     Claude-1 were the only models that didn't overestimate themselves.
22 | 
23 |     Args:
24 |         is_azure: Whether to use the Azure API. Defaults to False.
25 |         model: The model which should be used for grading. Currently, only supports OpenAI chat models.
26 |         question_field: The key name/field used for the question/query of the user. Defaults to "question".
27 | 
28 |     Returns:
29 |         Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 which is the
30 |         rating of the response on a scale from 1-10 divided by 10.
31 |     """
32 | 
33 |     def llm_grader(log: Log) -> float:
34 |         question = log.inputs[question_field]
35 |         output = log.output
36 |         rating_response = call_openai(
37 |             model=model,
38 |             messages=[
39 |                 {"role": "system", "content": "You are a helpful assistant."},
40 |                 {
41 |                     "role": "user",
42 |                     "content": f"[Instruction]\nPlease act as an impartial judge and evaluate the quality of the response "
43 |                     f"provided by an AI assistant to the user question displayed below. Your evaluation should "
44 |                     f"consider factors such as the helpfulness, relevance, accuracy, depth, creativity, and "
45 |                     f"level of detail of the response. Begin your evaluation by providing a short explanation. "
46 |                     f"Be as objective as possible. After providing your explanation, you must rate the response "
47 |                     f'on a scale of 1 to 10 by strictly following this format: "[[rating]]", for example: '
48 |                     f'"Rating: [[5]]".\n\n[Question]\n{question}\n\n[The Start of Assistant\'s Answer]'
49 |                     f"\n{output}\n[The End of Assistant's Answer]",
50 |                 },
51 |             ],
52 |             temperature=0.0,
53 |             is_azure=is_azure,
54 |         )
55 |         match = re.search(one_score_pattern, rating_response)
56 |         if not match:
57 |             match = re.search(one_score_pattern_backup, rating_response)
58 | 
59 |         if match:
60 |             rating = ast.literal_eval(match.groups()[0])
61 |         else:
62 |             rating = 0
63 | 
64 |         return rating / 10.0
65 | 
66 |     return llm_grader
67 | 
68 | 
69 | llm_grader_gpt4 = llm_grader_factory("gpt-4")
70 | llm_grader_gpt3t = llm_grader_factory("gpt-3.5-turbo-16k")
71 | 


--------------------------------------------------------------------------------
/parea/evals/general/self_check.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from parea.evals.utils import call_openai, sent_tokenize
 4 | from parea.schemas.log import Log
 5 | 
 6 | 
 7 | def self_check(log: Log) -> Union[float, None]:
 8 |     """
 9 |     Given that many API-based LLMs don't reliably give access to the log probabilities of the generated tokens, assessing
10 |     the certainty of LLM predictions via perplexity isn't possible.
11 |     The [SelfCheckGPT: Zero-Resource Black-Box Hallucination Detection for Generative Large Language Models](https://arxiv.org/abs/2303.08896) paper
12 |     suggests measuring the average factuality of every sentence in a generated response. They generate additional responses
13 |     from the LLM at a high temperature and check how much every sentence in the original answer is supported by the other generations.
14 |     The intuition behind this is that if the LLM knows a fact, it's more likely to sample it. The authors find that this
15 |     works well in detecting non-factual and factual sentences and ranking passages in terms of factuality.
16 |     The authors noted that correlation with human judgment doesn't increase after 4-6 additional
17 |     generations when using `gpt-3.5-turbo` to evaluate biography generations.
18 | 
19 |     Args:
20 |         log (Log): The log object to of the trace evaluate.
21 | 
22 |     Returns:
23 |         float: A score between 0 and 1 indicating the factuality of the response.
24 |     """
25 |     if log.configuration is None or log.configuration.messages is None:
26 |         return None
27 | 
28 |     messages = [m.to_dict() for m in log.configuration.messages]
29 | 
30 |     n_sampled_outputs = 5
31 |     sampled_outputs = []
32 |     for _ in range(n_sampled_outputs):
33 |         response = call_openai(
34 |             messages=messages,
35 |             model=log.configuration.model,
36 |             temperature=1.0,
37 |             max_tokens=log.configuration.model_params.max_length,
38 |             top_p=log.configuration.model_params.top_p,
39 |             frequency_penalty=log.configuration.model_params.frequency_penalty,
40 |             presence_penalty=log.configuration.model_params.presence_penalty,
41 |             response_format=log.configuration.model_params.response_format,
42 |         )
43 |         sampled_outputs.append(response)
44 | 
45 |     sentences = sent_tokenize(log.output)
46 | 
47 |     if len(sentences) == 0:
48 |         return 0.0
49 | 
50 |     sentences_scores = []
51 |     for sentence in sentences:
52 |         scores = []
53 |         for sampled_output in sampled_outputs:
54 |             response = call_openai(
55 |                 messages=[
56 |                     {
57 |                         "role": "user",
58 |                         "content": f"""Context: {sampled_output}
59 | Sentence: {sentence}
60 | Is the sentence supported by the context above?
61 | Answer Yes or No:""",
62 |                     }
63 |                 ],
64 |                 model="gpt-3.5-turbo",
65 |                 temperature=0.0,
66 |             )
67 |             scores.append(float("yes" in response.lower()))
68 |         sentences_scores.append(sum(scores) / len(scores))
69 | 
70 |     return sum(sentences_scores) / len(sentences_scores)
71 | 


--------------------------------------------------------------------------------
/parea/evals/general/semantic_similarity.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional, Union
 2 | 
 3 | import numpy as np
 4 | 
 5 | from parea.evals.utils import embed
 6 | from parea.schemas import Log
 7 | 
 8 | 
 9 | def semantic_similarity_factory(
10 |     embd_model: str = "text-embedding-3-small",
11 |     is_azure: Optional[bool] = False,
12 | ) -> Callable[[Log], Union[float, None]]:
13 |     def semantic_similarity(log: Log) -> Union[float, None]:
14 |         """Calculates semantic similarity between output and target"""
15 |         output = log.output
16 |         if (target := log.target) is None:
17 |             return None
18 | 
19 |         output_vector = embed(model=embd_model, input=output, is_azure=is_azure)
20 |         target_vector = embed(model=embd_model, input=target, is_azure=is_azure)
21 |         output_vector = np.array(output_vector)
22 |         target_vector = np.array(target_vector)
23 | 
24 |         return (np.dot(output_vector, target_vector) / (np.linalg.norm(output_vector) * np.linalg.norm(target_vector)) + 1) / 2
25 | 
26 |     return semantic_similarity
27 | 
28 | 
29 | semantic_similarity_oai_3_small = semantic_similarity_factory()
30 | semantic_similarity_oai_3_large = semantic_similarity_factory(embd_model="text-embedding-3-large")
31 | semantic_similarity_oai_ada_002 = semantic_similarity_factory(embd_model="text-embedding-ada-002")
32 | 


--------------------------------------------------------------------------------
/parea/evals/rag/__init__.py:
--------------------------------------------------------------------------------
1 | from .answer_context_faithfulness_binary import answer_context_faithfulness_binary_factory
2 | from .answer_context_faithfulness_precision import answer_context_faithfulness_precision_factory
3 | from .answer_context_faithfulness_statement_level import answer_context_faithfulness_statement_level_factory
4 | from .context_has_answer import context_has_answer_factory
5 | from .context_query_relevancy import context_query_relevancy_factory
6 | from .context_ranking_listwise import context_ranking_listwise_factory
7 | from .context_ranking_pointwise import context_ranking_pointwise_factory
8 | from .percent_target_supported_by_context import percent_target_supported_by_context_factory
9 | 


--------------------------------------------------------------------------------
/parea/evals/rag/answer_context_faithfulness_binary.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | from parea.evals.utils import call_openai
 4 | from parea.schemas.log import Log
 5 | 
 6 | 
 7 | def answer_context_faithfulness_binary_factory(
 8 |     question_field: Optional[str] = "question", context_field: Optional[str] = "context", model: Optional[str] = "gpt-3.5-turbo-16k", is_azure: Optional[bool] = False
 9 | ) -> Callable[[Log], float]:
10 |     """
11 |     This factory creates an evaluation function that classifies if the generated answer was faithful to the given context.
12 |     It is based on the paper [Evaluating Correctness and Faithfulness of Instruction-Following Models for Question Answering](https://arxiv.org/abs/2307.16877)
13 |     which suggests using an LLM to flag any information in the generated answer that cannot be deduced from the given context.
14 |     They find that GPT-4 is the best model for this analysis as measured by correlation with human judgment.
15 | 
16 |     Args:
17 |         is_azure: Whether to use the Azure API. Defaults to False.
18 |         question_field: The key name/field used for the question/query of the user. Defaults to "question".
19 |         context_field: The key name/field used for the retrieved context. Defaults to "context".
20 |         model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-4".
21 | 
22 |     Returns:
23 |         Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating
24 |         if the generated answer was faithful to the given context.
25 |     """
26 | 
27 |     def answer_context_faithfulness_binary(log: Log) -> float:
28 |         question = log.inputs[question_field]
29 |         evidence = log.inputs[context_field]
30 |         output = log.output
31 |         response = call_openai(
32 |             model=model,
33 |             messages=[
34 |                 {"role": "system", "content": "You are CompareGPT, a machine to verify the groundedness of predictions. Answer with " "only yes/no."},
35 |                 {
36 |                     "role": "user",
37 |                     "content": f"You are given a question, the corresponding evidence and a prediction from a model. Compare "
38 |                     f'the "Prediction" and the "Evidence" to determine whether all the information of the '
39 |                     f"prediction in present in the evidence or can be inferred from the evidence. You must answer "
40 |                     f'"no" if there are any specific details in the prediction that are not mentioned in the '
41 |                     f"evidence or cannot be inferred from the evidence.\n\n"
42 |                     f"Question: {question}\n\nPrediction: {output}\n\nEvidence: {evidence}\n\nCompareGPT response:",
43 |                 },
44 |             ],
45 |             temperature=0.0,
46 |             is_azure=is_azure,
47 |         )
48 |         return float("yes" in response.lower())
49 | 
50 |     return answer_context_faithfulness_binary
51 | 


--------------------------------------------------------------------------------
/parea/evals/rag/answer_context_faithfulness_precision.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | from collections import Counter
 4 | 
 5 | from parea.evals.utils import get_tokens
 6 | from parea.schemas.log import Log
 7 | 
 8 | 
 9 | def answer_context_faithfulness_precision_factory(context_field: Optional[str] = "context") -> Callable[[Log], float]:
10 |     """
11 |         This factory creates an evaluation function that calculates the how many tokens in the generated answer are also present in the retrieved context.
12 |     It is based on the paper [Evaluating Correctness and Faithfulness of Instruction-Following Models for Question Answering](https://arxiv.org/abs/2307.16877)
13 |     which finds that this method only slightly lags behind GPT-4 and outperforms GPT-3.5-turbo (see Table 4 from the above paper).
14 | 
15 |         Args:
16 |             context_field: The key name/field used for the retrieved context. Defaults to "context".
17 | 
18 |         Returns:
19 |             Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating
20 |             how many tokens in the generated answer are also present in the retrieved context.
21 |     """
22 | 
23 |     def answer_context_faithfulness_precision(log: Log) -> float:
24 |         """Prop. of tokens in model generation which are also present in the retrieved context."""
25 |         context = log.inputs[context_field]
26 |         model = log.configuration.model
27 | 
28 |         context_tokens = get_tokens(model, context)
29 |         output_tokens = get_tokens(model, log.output)
30 | 
31 |         if len(context_tokens) == 0:
32 |             return 1.0
33 |         elif len(output_tokens) == 0:
34 |             return 0.0
35 | 
36 |         common_tokens = Counter(context_tokens) & Counter(output_tokens)
37 |         num_common = sum(common_tokens.values())
38 |         return num_common / len(output_tokens)
39 | 
40 |     return answer_context_faithfulness_precision
41 | 


--------------------------------------------------------------------------------
/parea/evals/rag/context_has_answer.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | import json
 4 | 
 5 | from parea.evals import call_openai
 6 | from parea.schemas import Log
 7 | 
 8 | 
 9 | def context_has_answer_factory(question_field: Optional[str] = "question", model: Optional[str] = "gpt-3.5-turbo-0125", is_azure: Optional[bool] = False) -> Callable[[Log], bool]:
10 |     """
11 |     This factory creates an evaluation metric which assess whether the given context has the answer to the given question.
12 |     It is useful to measure the performance of a model in a question-answering task by measuring Hit Rate without the need to know the correct answer.
13 | 
14 |     Args:
15 |         question_field: The key name/field used for the question/query of the user. Defaults to "question".
16 |         model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-3.5-turbo-0125".
17 |         is_azure: Whether to use the Azure API. Defaults to False.
18 | 
19 |     Returns:
20 |         Callable[[Log], bool]: A function that takes a log as input and returns a boolean indicating if the context has the answer to the given question.
21 |     """
22 | 
23 |     def context_has_answer(log: Log) -> bool:
24 |         question = log.inputs[question_field]
25 |         answer = str(log.output)
26 | 
27 |         formatted_messages = [
28 |             {
29 |                 "role": "user",
30 |                 "content": f"""You are given a question and a list of answers. The answers were retrieved from a database which contains the question answer pairs. You need to decide if any of the given answers is the answer to the given question.
31 | 
32 | Question:
33 | {question}
34 | 
35 | Answers:
36 | {answer}
37 | 
38 | Answer in the following JSON format:
39 | {{"thoughts": "<thoughts>", "final_verdict": "<true|false>"}}""",
40 |             }
41 |         ]
42 | 
43 |         response = call_openai(model=model, temperature=0.0, messages=formatted_messages, response_format={"type": "json_object"}, is_azure=is_azure)
44 |         final_verdict = json.loads(response).get("final_verdict", "").lower()
45 |         return final_verdict == "true"
46 | 
47 |     return context_has_answer
48 | 


--------------------------------------------------------------------------------
/parea/evals/rag/context_query_relevancy.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, List, Optional
 2 | 
 3 | from parea.evals.utils import call_openai, get_context, sent_tokenize
 4 | from parea.schemas.log import Log
 5 | 
 6 | 
 7 | def context_query_relevancy_factory(
 8 |     question_field: str = "question", context_fields: Optional[List[str]] = None, model: Optional[str] = "gpt-3.5-turbo-16k", is_azure: Optional[bool] = False
 9 | ) -> Callable[[Log], float]:
10 |     """
11 |     This factory creates an evaluation function that measures how relevant the retrieved context is to the given question.
12 |     It is based on the paper [RAGAS: Automated Evaluation of Retrieval Augmented Generation](https://arxiv.org/abs/2309.15217)
13 |     which suggests using an LLM to extract any sentence from the retrieved context relevant to the query. Then, calculate
14 |     the ratio of relevant sentences to the total number of sentences in the retrieved context.
15 | 
16 |     Args:
17 |         is_azure: Whether to use the Azure API. Defaults to False.
18 |         model: The model which should be used for grading. Defaults to "gpt-3.5-turbo-16k".
19 |         question_field: The key name/field used for the question/query of the user. Defaults to "question".
20 |         context_fields: An optional list of key names/fields used for the retrieved contexts in the input to function. If empty list or None, it will use the output field of the log as context. Defaults to None.
21 | 
22 |     Returns:
23 |         Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating
24 |         if the retrieved context is relevant to the query.
25 |     """
26 | 
27 |     def context_query_relevancy(log: Log) -> float:
28 |         """Quantifies how much the retrieved context relates to the query."""
29 |         question = log.inputs[question_field]
30 |         context = get_context(log, context_fields)
31 | 
32 |         extracted_sentences = call_openai(
33 |             model=model,
34 |             messages=[
35 |                 {
36 |                     "role": "user",
37 |                     "content": f"""\
38 | Please extract relevant sentences from the provided context that is absolutely required answer the following question. If no relevant sentences are found, or if you believe the question cannot be answered from the given context, return the phrase "Insufficient Information".  While extracting candidate sentences you're not allowed to make any changes to sentences from given context.
39 | 
40 | question:{question}
41 | context:\n{context}
42 | candidate sentences:\n""",
43 |                 }
44 |             ],
45 |             temperature=0.0,
46 |             is_azure=is_azure,
47 |         ).strip()
48 |         if "insufficient information" in extracted_sentences.lower() and abs(len(extracted_sentences) - len("insufficient information")) < 10:
49 |             return 0.0
50 |         else:
51 |             n_extracted_sentences = len(sent_tokenize(extracted_sentences))
52 |             n_context_sentences = len(sent_tokenize(context))
53 |             return n_extracted_sentences / n_context_sentences
54 | 
55 |     return context_query_relevancy
56 | 


--------------------------------------------------------------------------------
/parea/evals/summary/__init__.py:
--------------------------------------------------------------------------------
1 | from .factual_inconsistency_binary import factual_inconsistency_binary_factory
2 | from .factual_inconsistency_scale import factual_inconsistency_scale_factory
3 | from .likert_scale import likert_scale_factory
4 | 


--------------------------------------------------------------------------------
/parea/evals/summary/factual_inconsistency_binary.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | from parea.evals.utils import call_openai
 4 | from parea.schemas.log import Log
 5 | 
 6 | 
 7 | def factual_inconsistency_binary_factory(article_field: Optional[str] = "article", model: Optional[str] = "gpt-4", is_azure: Optional[bool] = False) -> Callable[[Log], float]:
 8 |     """
 9 |     This factory creates an evaluation function that classifies if a summary is factually inconsistent with the original text.
10 |     It is based on the paper [ChatGPT as a Factual Inconsistency Evaluator for Text Summarization](https://arxiv.org/abs/2303.15621)
11 |     which suggests using an LLM to assess the factuality of a summary by measuring how consistent the summary is with
12 |     the original text, posed as a binary classification. They find that `gpt-3.5-turbo-0301` outperforms
13 |     baseline methods such as SummaC and QuestEval when identifying factually inconsistent summaries.
14 | 
15 |     Args:
16 |         article_field: The key name/field used for the content which should be summarized. Defaults to "article".
17 |         model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-4".
18 |         is_azure: Whether to use the Azure API. Defaults to False.
19 | 
20 |     Returns:
21 |         Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating
22 |         if the generated summary is factually consistent with the original text.
23 |     """
24 | 
25 |     def factual_inconsistency_binary(log: Log) -> float:
26 |         article = log.inputs[article_field]
27 |         output = log.output
28 |         prompt = f"""Decide if the following summary is consistent with the corresponding article. Note that consistency means all information in the summary is supported by the article.
29 |     Article: {article}
30 |     Summary: {output}
31 |     Explain your reasoning step by step then answer (yes or no) the question:"""
32 |         response = call_openai(
33 |             model=model,
34 |             messages=[
35 |                 {"role": "user", "content": prompt},
36 |             ],
37 |             temperature=0.0,
38 |             is_azure=is_azure,
39 |         )
40 |         return float("yes" in response.lower())
41 | 
42 |     return factual_inconsistency_binary
43 | 


--------------------------------------------------------------------------------
/parea/evals/summary/factual_inconsistency_scale.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | import re
 4 | 
 5 | from parea.evals.utils import call_openai
 6 | from parea.schemas.log import Log
 7 | 
 8 | 
 9 | def factual_inconsistency_scale_factory(article_field: Optional[str] = "article", model: Optional[str] = "gpt-4", is_azure: Optional[bool] = False) -> Callable[[Log], float]:
10 |     """
11 |     This factory creates an evaluation function that grades the factual consistency of a summary with the article on a scale from 1 to 10.
12 |     It is based on the paper [ChatGPT as a Factual Inconsistency Evaluator for Text Summarization](https://arxiv.org/abs/2303.15621)
13 |     which finds that using `gpt-3.5-turbo-0301` leads to a higher correlation with human expert judgment when grading
14 |     the factuality of summaries on a scale from 1 to 10 than baseline methods such as SummaC and QuestEval.
15 | 
16 |     Args:
17 |         article_field: The key name/field used for the content which should be summarized. Defaults to "article".
18 |         model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-4".
19 |         is_azure: Whether to use the Azure API. Defaults to False.
20 | 
21 |     Returns:
22 |         Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating
23 |         if the generated summary is factually consistent with the original text.
24 |     """
25 | 
26 |     def factual_inconsistency_scale(log: Log) -> float:
27 |         article = log.inputs[article_field]
28 |         output = log.output
29 |         prompt = f"""Score the following summary given the corresponding article with respect to consistency from 1 to 10. Note that consistency measures how much information included in the summary is present in the source article. 10 points indicate the summary contains only statements that are entailed by the source document.
30 |     Article: {article}
31 |     Summary: {output}
32 |     Marks: """
33 |         response = call_openai(
34 |             model=model,
35 |             messages=[
36 |                 {"role": "user", "content": prompt},
37 |             ],
38 |             temperature=0.0,
39 |             is_azure=is_azure,
40 |         )
41 | 
42 |         pattern = re.compile(r"\d+")
43 |         match = pattern.search(response)
44 |         if match:
45 |             score = match.group()
46 |         else:
47 |             score = 0
48 | 
49 |         return float(score) / 10.0
50 | 
51 |     return factual_inconsistency_scale
52 | 


--------------------------------------------------------------------------------
/parea/evals/summary/likert_scale.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | import re
 4 | 
 5 | from parea.evals.utils import call_openai
 6 | from parea.schemas.log import Log
 7 | 
 8 | 
 9 | def likert_scale_factory(article_field: Optional[str] = "article", model: Optional[str] = "gpt-4", is_azure: Optional[bool] = False) -> Callable[[Log], float]:
10 |     """
11 |     This factory creates an evaluation function that grades the quality of a summary on a Likert scale from 1-5 along
12 |     the dimensions of relevance, consistency, fluency, and coherence. It is based on the paper
13 |     [Human-like Summarization Evaluation with ChatGPT](https://arxiv.org/abs/2304.02554) which finds that using `gpt-3.5-0301`
14 |     leads to a highest correlation with human expert judgment when grading summaries on a Likert scale from 1-5 than baseline
15 |     methods. Noteworthy is that [BARTScore](https://arxiv.org/abs/2106.11520) was very competitive to `gpt-3.5-0301`.
16 | 
17 |     Args:
18 |         is_azure: Whether to use the Azure API. Defaults to False.
19 |         article_field: The key name/field used for the content which should be summarized. Defaults to "article".
20 |         model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-4".
21 | 
22 |     Returns:
23 |         Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating
24 |         the quality of the summary on a Likert scale from 1-5 along the dimensions of relevance, consistency, fluency, and coherence.
25 |     """
26 | 
27 |     def likert_scale(log: Log) -> float:
28 |         article = log.inputs[article_field]
29 |         output = log.output
30 |         prompt = f"""Evaluate the quality of summaries written for a news article. Rate each summary on four dimensions: relevance, consistency, fluency, and coherence. You should rate on a scale from 1 (worst) to 5 (best).
31 | 
32 | Definitions are as follows:
33 | Relevance: The rating measures how well the summary captures the key points of the article. Consider whether all and only the important aspects are contained in the summary.
34 | Consistency: The rating measures whether the facts in the summary are consistent with the facts in the original article. Consider whether the summary does reproduce all facts accurately and does not make up untrue information.
35 | Fluency: This rating measures the quality of individual sentences, whether they are well-written and grammatically correct. Consider the quality of individual sentences.
36 | Coherence: The rating measures the quality of all sentences collectively, to fit together and sound natural. Consider the quality of the summary as a whole.
37 | 
38 | The article and the summary are given below:
39 | Article: {article}
40 | Summary: {output}"""
41 |         response = call_openai(
42 |             model=model,
43 |             messages=[
44 |                 {"role": "user", "content": prompt},
45 |             ],
46 |             temperature=0.0,
47 |             is_azure=is_azure,
48 |         )
49 | 
50 |         # extract the scores
51 |         pattern = re.compile(r"\d+")
52 |         matches = pattern.findall(response)
53 |         if matches:
54 |             scores = matches
55 |         else:
56 |             scores = [0, 0, 0, 0]
57 | 
58 |         # normalize the scores
59 |         scores = [float(score) / 5.0 for score in scores]
60 | 
61 |         # average the scores
62 |         return sum(scores) / len(scores)
63 | 
64 |     return likert_scale
65 | 


--------------------------------------------------------------------------------
/parea/experiment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/parea/experiment/__init__.py


--------------------------------------------------------------------------------
/parea/experiment/cli.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import argparse
 4 | import csv
 5 | import os
 6 | import sys
 7 | import traceback
 8 | from importlib import util
 9 | 
10 | from .experiment import _experiments
11 | 
12 | 
13 | def load_from_path(module_path):
14 |     # Ensure the directory of user-provided script is in the system path
15 |     dir_name = os.path.dirname(module_path)
16 |     if dir_name not in sys.path:
17 |         sys.path.insert(0, dir_name)
18 | 
19 |     module_name = os.path.basename(module_path)
20 |     spec = util.spec_from_file_location(module_name, module_path)
21 |     module = util.module_from_spec(spec)
22 |     spec.loader.exec_module(module)
23 | 
24 |     if spec.name not in sys.modules:
25 |         sys.modules[spec.name] = module
26 | 
27 | 
28 | def read_input_file(file_path) -> List[dict]:
29 |     with open(file_path) as file:
30 |         reader = csv.DictReader(file)
31 |         inputs = list(reader)
32 |     return inputs
33 | 
34 | 
35 | def experiment(args):
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument("file", help="Path to the experiment", type=str)
38 |     parser.add_argument("--run_name", help="Name of the experiment run", type=str, default=None)
39 | 
40 |     parsed_args = parser.parse_args(args)
41 | 
42 |     try:
43 |         load_from_path(parsed_args.file)
44 |     except Exception as e:
45 |         print(f"Error loading function: {e}\n", file=sys.stderr)
46 |         traceback.print_exc()
47 |         sys.exit(1)
48 | 
49 |     for _experiment in _experiments:
50 |         _experiment.run(parsed_args.run_name)
51 | 


--------------------------------------------------------------------------------
/parea/experiment/datasets.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | 
 3 | from parea.helpers import gen_random_name
 4 | from parea.schemas.models import CreateTestCase, CreateTestCaseCollection
 5 | from parea.utils.universal_encoder import json_dumps
 6 | 
 7 | 
 8 | def create_test_collection(data: List[Dict[str, Any]], name: Optional[str] = None) -> CreateTestCaseCollection:
 9 |     """Create a test case collection from a dictionary of test cases.
10 |     Args:
11 |         data: list of key-value pairs where keys represent input names.
12 |             Each item in the list represent a test case row.
13 |             Target and Tags are reserved keys. There can only be one target and tags key per dict item.
14 |             If target is present it will represent the target/expected response for the inputs.
15 |             If tags are present they must be a list of json_serializable values.
16 |         name: A unique name for the test collection. If not provided a random name will be generated.
17 | 
18 |     Returns: CreateTestCaseCollection
19 |     """
20 |     if not name:
21 |         name = gen_random_name()
22 | 
23 |     column_names = list({k for row in data for k in row.keys() if k not in ["target", "tags"]})
24 |     test_cases = create_test_cases(data)
25 | 
26 |     return CreateTestCaseCollection(name=name, column_names=column_names, test_cases=test_cases)
27 | 
28 | 
29 | def create_test_cases(data: List[Dict[str, Any]]) -> List[CreateTestCase]:
30 |     """Create a list of test cases from a dictionary.
31 |     Args:
32 |         data: list of key-value pairs where keys represent input names.
33 |             Each item in the list represent a test case row.
34 |             Target and Tags are reserved keys. There can only be one target and tags key per dict item.
35 |             If target is present it will represent the target/expected response for the inputs.
36 |             If tags are present they must be a list of json_serializable values.
37 | 
38 |     Returns: List[CreateTestCase]
39 |     """
40 |     test_cases: List[CreateTestCase] = []
41 |     for row in data:
42 |         inputs: Dict[str, str] = {}
43 |         target: Optional[str] = None
44 |         tags: list = []
45 |         for k, v in row.items():
46 |             if k == "target":
47 |                 if target is not None:
48 |                     print("There can only be one target key per test case. Only the first target will be used.")
49 |                 target = json_dumps(v)
50 |             elif k == "tags":
51 |                 if not isinstance(v, list):
52 |                     raise ValueError("Tags must be a list of json serializable values.")
53 |                 if tags:
54 |                     print("There can only be one tags key per test case. Only the first set of tags will be used.")
55 |                 tags = [tag if isinstance(tag, str) else json_dumps(tag) for tag in v]
56 |             else:
57 |                 inputs[k] = v if isinstance(v, str) else json_dumps(v)
58 |         test_cases.append(CreateTestCase(inputs=inputs, target=target, tags=tags))
59 | 
60 |     return test_cases
61 | 


--------------------------------------------------------------------------------
/parea/experiment/dvc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | from parea.constants import PAREA_DVC_DIR, PAREA_DVC_METRICS_FILE, PAREA_DVC_YAML_FILE
 5 | from parea.utils.universal_encoder import json_dumps
 6 | 
 7 | 
 8 | def is_git_repo():
 9 |     try:
10 |         subprocess.check_output(["git", "branch"], stderr=subprocess.STDOUT)
11 |         return True
12 |     except:
13 |         return False
14 | 
15 | 
16 | def save_results_to_dvc_if_init(experiment_name: str, metrics: dict):
17 |     if not parea_dvc_initialized(only_check=True):
18 |         return
19 |     write_metrics_to_dvc(metrics)
20 |     try:
21 |         subprocess.run(["dvc", "exp", "save", "-n", experiment_name], check=True)
22 |     except subprocess.CalledProcessError as e:
23 |         print(f"Failed to save results to DVC: {e}")
24 | 
25 | 
26 | def write_metrics_to_dvc(metrics: dict):
27 |     git_root = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], text=True, stderr=subprocess.STDOUT).strip()
28 |     with open(os.path.join(git_root, PAREA_DVC_METRICS_FILE), "w") as f:
29 |         f.write(json_dumps(metrics, indent=2))
30 | 
31 | 
32 | def _check_has_been_committed(git_root: str, file: str) -> bool:
33 |     output = subprocess.check_output(["git", "log", "--", file], cwd=git_root, text=True, stderr=subprocess.STDOUT)
34 |     return output and len(output) > 0
35 | 
36 | 
37 | def parea_dvc_initialized(only_check: bool) -> bool:
38 |     print_fn = print if not only_check else lambda *args, **kwargs: None
39 | 
40 |     if not is_git_repo():
41 |         print_fn("Git repository is not found. Please run `git init` to initialize a git repository.")
42 |         return False
43 | 
44 |     git_root = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], text=True, stderr=subprocess.STDOUT).strip()
45 | 
46 |     # make sure DVC is initialized
47 |     if not os.path.exists(os.path.join(git_root, ".dvc")):
48 |         print_fn("DVC is not initialized. Please run `dvc init` to initialize DVC.")
49 |         return False
50 | 
51 |     # make sure dvc.yaml and metrics.json exist in .parea directory
52 |     if not os.path.exists(os.path.join(git_root, PAREA_DVC_YAML_FILE)):
53 |         if only_check:
54 |             return False
55 |         else:
56 |             print_fn(f"{PAREA_DVC_YAML_FILE} is not found. Creating the file.")
57 |             if not os.path.exists(os.path.join(git_root, PAREA_DVC_DIR)):
58 |                 os.mkdir(os.path.join(git_root, PAREA_DVC_DIR))
59 |             with open(os.path.join(git_root, PAREA_DVC_YAML_FILE), "w") as f:
60 |                 f.write("metrics:\n  - metrics.json\n")
61 |             subprocess.run(["git", "add", PAREA_DVC_YAML_FILE], cwd=git_root, check=True)
62 |     if not os.path.exists(os.path.join(git_root, PAREA_DVC_METRICS_FILE)):
63 |         if only_check:
64 |             return False
65 |         else:
66 |             print_fn(f"{PAREA_DVC_METRICS_FILE} is not found. Creating the file.")
67 |             if not os.path.exists(os.path.join(git_root, PAREA_DVC_DIR)):
68 |                 os.mkdir(os.path.join(git_root, PAREA_DVC_DIR))
69 |             write_metrics_to_dvc({})
70 |             subprocess.run(["git", "add", PAREA_DVC_METRICS_FILE], cwd=git_root, check=True)
71 | 
72 |     # make sure dvc.yaml and metrics.json are committed
73 |     dvc_yaml_file_missing = not _check_has_been_committed(git_root, PAREA_DVC_YAML_FILE)
74 |     dvc_metrics_file_missing = not _check_has_been_committed(git_root, PAREA_DVC_METRICS_FILE)
75 |     if dvc_metrics_file_missing:
76 |         print_fn(f"{PAREA_DVC_METRICS_FILE} is not committed. Please to commit the file to your git history.")
77 |     if dvc_yaml_file_missing:
78 |         print_fn(f"{PAREA_DVC_YAML_FILE} is not committed. Please to commit the file to your git history.")
79 |     if dvc_metrics_file_missing or dvc_yaml_file_missing:
80 |         return False
81 | 
82 |     print_fn("Parea's DVC integration is initialized.")
83 |     return True
84 | 


--------------------------------------------------------------------------------
/parea/parea_logger.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, Optional
  2 | 
  3 | import json
  4 | import logging
  5 | import os
  6 | 
  7 | from attrs import asdict, define, field
  8 | from cattrs import structure
  9 | 
 10 | from parea.api_client import HTTPClient
 11 | from parea.constants import PAREA_OS_ENV_EXPERIMENT_UUID
 12 | from parea.helpers import serialize_metadata_values
 13 | from parea.schemas.log import TraceIntegrations
 14 | from parea.schemas.models import CreateGetProjectResponseSchema, TraceLog, UpdateLog
 15 | from parea.utils.trace_integrations.langchain_utils import _dumps_json
 16 | from parea.utils.universal_encoder import json_dumps
 17 | 
 18 | logger = logging.getLogger()
 19 | 
 20 | LOG_ENDPOINT = "/trace_log"
 21 | VENDOR_LOG_ENDPOINT = "/trace_log/{vendor}"
 22 | 
 23 | 
 24 | @define
 25 | class PareaLogger:
 26 |     _client: HTTPClient = field(init=False, default=None)
 27 |     _project_uuid: str = field(init=False, default=None)
 28 |     _project_name: str = field(init=False, default=None)
 29 | 
 30 |     def set_client(self, client: HTTPClient) -> None:
 31 |         self._client = client
 32 | 
 33 |     def set_project_uuid(self, project_uuid: str, project_name: str) -> None:
 34 |         self._project_uuid = project_uuid
 35 |         self._project_name = project_name
 36 | 
 37 |     def _get_project_uuid(self) -> str:
 38 |         try:
 39 |             if not self._project_uuid:
 40 |                 self._project_uuid = self._create_or_get_project(self._project_name or "default").uuid
 41 |             return self._project_uuid
 42 |         except Exception as e:
 43 |             logger.error(f"PareaLogger: Error getting project uuid for project {self._project_name}: {e}")
 44 |             raise
 45 | 
 46 |     def _create_or_get_project(self, name: str) -> CreateGetProjectResponseSchema:
 47 |         r = self._client.request(
 48 |             "POST",
 49 |             "/project",
 50 |             data={"name": name},
 51 |         )
 52 |         return structure(r.json(), CreateGetProjectResponseSchema)
 53 | 
 54 |     def update_log(self, data: UpdateLog) -> None:
 55 |         data = serialize_metadata_values(data)
 56 |         self._client.request(
 57 |             "PUT",
 58 |             LOG_ENDPOINT,
 59 |             data=asdict(data),
 60 |         )
 61 | 
 62 |     def record_log(self, data: TraceLog) -> None:
 63 |         data = serialize_metadata_values(data)
 64 |         data.project_uuid = self._get_project_uuid()
 65 |         self._client.request(
 66 |             "POST",
 67 |             LOG_ENDPOINT,
 68 |             data=asdict(data),
 69 |         )
 70 | 
 71 |     async def arecord_log(self, data: TraceLog) -> None:
 72 |         data = serialize_metadata_values(data)
 73 |         data.project_uuid = self._get_project_uuid()
 74 |         await self._client.request_async(
 75 |             "POST",
 76 |             LOG_ENDPOINT,
 77 |             data=asdict(data),
 78 |         )
 79 | 
 80 |     def default_log(self, data: TraceLog) -> None:
 81 |         if self._client:
 82 |             if data.target:
 83 |                 data.target = json_dumps(data.target)
 84 |             self.record_log(data)
 85 | 
 86 |     def record_vendor_log(self, data: Dict[str, Any], vendor: TraceIntegrations) -> None:
 87 |         data["project_uuid"] = self._get_project_uuid()
 88 |         if experiment_uuid := os.getenv(PAREA_OS_ENV_EXPERIMENT_UUID, None):
 89 |             data["experiment_uuid"] = experiment_uuid
 90 |         self._client.add_integration("langchain")
 91 |         self._client.request(
 92 |             "POST",
 93 |             VENDOR_LOG_ENDPOINT.format(vendor=vendor.value),
 94 |             data=json.loads(_dumps_json(data)),  # uuid is not serializable
 95 |         )
 96 | 
 97 |     async def arecord_vendor_log(self, data: Dict[str, Any], vendor: TraceIntegrations) -> None:
 98 |         data["project_uuid"] = self._get_project_uuid()
 99 |         if experiment_uuid := os.getenv(PAREA_OS_ENV_EXPERIMENT_UUID, None):
100 |             data["experiment_uuid"] = experiment_uuid
101 |         self._client.add_integration("langchain")
102 |         await self._client.request_async(
103 |             "POST",
104 |             VENDOR_LOG_ENDPOINT.format(vendor=vendor.value),
105 |             data=json.loads(_dumps_json(data)),  # uuid is not serializable
106 |         )
107 | 
108 | 
109 | parea_logger = PareaLogger()
110 | 


--------------------------------------------------------------------------------
/parea/schemas/__init__.py:
--------------------------------------------------------------------------------
1 | from .log import *
2 | from .models import *
3 | 


--------------------------------------------------------------------------------
/parea/types.py:
--------------------------------------------------------------------------------
 1 | from openai import AsyncStream, Stream
 2 | 
 3 | 
 4 | class OpenAIAsyncStreamWrapper:
 5 |     def __init__(self, async_stream: AsyncStream, accumulator, info_from_response, update_accumulator_streaming, final_processing_and_logging):
 6 |         self._async_stream = async_stream
 7 |         self._final_processing_and_logging = final_processing_and_logging
 8 |         self._update_accumulator_streaming = update_accumulator_streaming
 9 |         self._accumulator = accumulator
10 |         self._info_from_response = info_from_response
11 | 
12 |     def __getattr__(self, attr):
13 |         # delegate attribute access to the original async_stream
14 |         return getattr(self._async_stream, attr)
15 | 
16 |     async def __aiter__(self):
17 |         async for chunk in self._async_stream:
18 |             self._update_accumulator_streaming(self._accumulator, self._info_from_response, chunk)
19 |             yield chunk
20 | 
21 |         self._final_processing_and_logging(self._accumulator, self._info_from_response)
22 | 
23 | 
24 | class OpenAIStreamWrapper:
25 |     def __init__(self, stream: Stream, accumulator, info_from_response, update_accumulator_streaming, final_processing_and_logging):
26 |         self._stream = stream
27 |         self._final_processing_and_logging = final_processing_and_logging
28 |         self._update_accumulator_streaming = update_accumulator_streaming
29 |         self._accumulator = accumulator
30 |         self._info_from_response = info_from_response
31 | 
32 |     def __getattr__(self, attr):
33 |         # delegate attribute access to the original async_stream
34 |         return getattr(self._stream, attr)
35 | 
36 |     def __iter__(self):
37 |         for chunk in self._stream:
38 |             self._update_accumulator_streaming(self._accumulator, self._info_from_response, chunk)
39 |             yield chunk
40 | 
41 |         self._final_processing_and_logging(self._accumulator, self._info_from_response)
42 | 


--------------------------------------------------------------------------------
/parea/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/parea/utils/__init__.py


--------------------------------------------------------------------------------
/parea/utils/trace_integrations/langchain.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional, Union
 2 | 
 3 | import logging
 4 | from uuid import UUID
 5 | 
 6 | from langchain_core.tracers import BaseTracer
 7 | from langchain_core.tracers.schemas import ChainRun, LLMRun, Run, ToolRun
 8 | 
 9 | from parea.helpers import is_logging_disabled
10 | from parea.parea_logger import parea_logger
11 | from parea.schemas import UpdateTraceScenario
12 | from parea.schemas.log import TraceIntegrations
13 | from parea.utils.trace_utils import fill_trace_data, get_current_trace_id, get_root_trace_id
14 | 
15 | logger = logging.getLogger()
16 | 
17 | 
18 | class PareaAILangchainTracer(BaseTracer):
19 |     parent_trace_id: UUID
20 |     _parea_root_trace_id: str = None
21 |     _parea_parent_trace_id: str = None
22 |     _session_id: Optional[str] = None
23 |     _tags: List[str] = []
24 |     _metadata: Dict[str, Any] = {}
25 |     _end_user_identifier: Optional[str] = None
26 |     _deployment_id: Optional[str] = None
27 |     _log_sample_rate: Optional[float] = 1.0
28 | 
29 |     def __init__(
30 |         self,
31 |         session_id: Optional[str] = None,
32 |         tags: Optional[List[str]] = None,
33 |         metadata: Optional[Dict[str, Any]] = None,
34 |         end_user_identifier: Optional[str] = None,
35 |         deployment_id: Optional[str] = None,
36 |         log_sample_rate: Optional[float] = 1.0,
37 |         **kwargs: Any,
38 |     ):
39 |         super().__init__(**kwargs)
40 |         self._session_id = session_id
41 |         self._end_user_identifier = end_user_identifier
42 |         self._deployment_id = deployment_id
43 |         self._log_sample_rate = log_sample_rate
44 |         if tags:
45 |             self._tags = tags
46 |         if metadata:
47 |             self._metadata = metadata
48 | 
49 |     def _persist_run(self, run: Union[Run, LLMRun, ChainRun, ToolRun]) -> None:
50 |         if is_logging_disabled():
51 |             return
52 |         try:
53 |             self.parent_trace_id = run.id
54 |             # using .dict() since langchain Run class currently set to Pydantic v1
55 |             data = run.dict()
56 |             data["_parea_root_trace_id"] = self._parea_root_trace_id or None
57 |             data["_session_id"] = self._session_id
58 |             data["_tags"] = self._tags
59 |             data["_metadata"] = self._metadata
60 |             data["_end_user_identifier"] = self._end_user_identifier
61 |             data["_deployment_id"] = self._deployment_id
62 |             data["_log_sample_rate"] = self._log_sample_rate
63 |             # check if run has an attribute execution order
64 |             if (hasattr(run, "execution_order") and run.execution_order == 1) or run.parent_run_id is None:
65 |                 data["_parea_parent_trace_id"] = self._parea_parent_trace_id or None
66 |             parea_logger.record_vendor_log(data, TraceIntegrations.LANGCHAIN)
67 |         except Exception as e:
68 |             logger.exception(f"Error occurred while logging langchain run: {e}", stack_info=True)
69 | 
70 |     def get_parent_trace_id(self) -> UUID:
71 |         return self.parent_trace_id
72 | 
73 |     def _on_run_create(self, run: Run) -> None:
74 |         if (hasattr(run, "execution_order") and run.execution_order == 1) or run.parent_run_id is None:
75 |             # need to check if any traces already exist
76 |             self._parea_root_trace_id = get_root_trace_id()
77 |             if parent_trace_id := get_current_trace_id():
78 |                 self._parea_parent_trace_id = parent_trace_id
79 |                 fill_trace_data(str(run.id), {"parent_trace_id": parent_trace_id}, UpdateTraceScenario.LANGCHAIN_CHILD)
80 | 
81 |     def _on_llm_end(self, run: Run) -> None:
82 |         self._persist_run(run)
83 | 
84 |     def _on_chain_end(self, run: Run) -> None:
85 |         self._persist_run(run)
86 | 


--------------------------------------------------------------------------------
/parea/utils/trace_integrations/wrapt_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from copy import copy, deepcopy
 4 | 
 5 | from wrapt import BoundFunctionWrapper, FunctionWrapper
 6 | 
 7 | 
 8 | class CopyableBoundFunctionWrapper(BoundFunctionWrapper):  # type: ignore
 9 |     """
10 |     A bound function wrapper that can be copied and deep-copied. When used to
11 |     wrap a class method, this allows the entire class to be copied and
12 |     deep-copied.
13 | 
14 |     For reference, see
15 |     https://github.com/GrahamDumpleton/wrapt/issues/86#issuecomment-426161271
16 |     and
17 |     https://wrapt.readthedocs.io/en/master/wrappers.html#custom-function-wrappers
18 |     """
19 | 
20 |     def __copy__(self) -> "CopyableBoundFunctionWrapper":
21 |         return CopyableBoundFunctionWrapper(copy(self.__wrapped__), self._self_instance, self._self_wrapper)
22 | 
23 |     def __deepcopy__(self, memo: Dict[Any, Any]) -> "CopyableBoundFunctionWrapper":
24 |         return CopyableBoundFunctionWrapper(deepcopy(self.__wrapped__, memo), self._self_instance, self._self_wrapper)
25 | 
26 | 
27 | class CopyableFunctionWrapper(FunctionWrapper):  # type: ignore
28 |     """
29 |     A function wrapper that can be copied and deep-copied. When used to wrap a
30 |     class method, this allows the entire class to be copied and deep-copied.
31 | 
32 |     For reference, see
33 |     https://github.com/GrahamDumpleton/wrapt/issues/86#issuecomment-426161271
34 |     and
35 |     https://wrapt.readthedocs.io/en/master/wrappers.html#custom-function-wrappers
36 |     """
37 | 
38 |     __bound_function_wrapper__ = CopyableBoundFunctionWrapper
39 | 
40 |     def __copy__(self) -> "CopyableFunctionWrapper":
41 |         return CopyableFunctionWrapper(copy(self.__wrapped__), self._self_wrapper)
42 | 
43 |     def __deepcopy__(self, memo: Dict[Any, Any]) -> "CopyableFunctionWrapper":
44 |         return CopyableFunctionWrapper(deepcopy(self.__wrapped__, memo), self._self_wrapper)
45 | 


--------------------------------------------------------------------------------
/parea/wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | from parea.wrapper.openai.openai import OpenAIWrapper
2 | 
3 | from .openai_raw_api_tracer import get_formatted_openai_response
4 | from .wrapper import Wrapper
5 | 


--------------------------------------------------------------------------------
/parea/wrapper/anthropic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/parea/wrapper/anthropic/__init__.py


--------------------------------------------------------------------------------
/parea/wrapper/anthropic/stream_wrapper.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from types import TracebackType
  4 | from typing import Callable
  5 | 
  6 | from anthropic import AsyncMessageStreamManager, AsyncStream, MessageStreamManager, Stream
  7 | from anthropic.types import Message
  8 | 
  9 | 
 10 | class AnthropicStreamWrapper:
 11 |     def __init__(self, stream: Stream, accumulator, info_from_response, update_accumulator_streaming, final_processing_and_logging):
 12 |         self._stream = stream
 13 |         self._final_processing_and_logging = final_processing_and_logging
 14 |         self._update_accumulator_streaming = update_accumulator_streaming
 15 |         self._accumulator = accumulator
 16 |         self._info_from_response = info_from_response
 17 | 
 18 |     def __getattr__(self, attr):
 19 |         # delegate attribute access to the original stream
 20 |         return getattr(self._stream, attr) if hasattr(self._stream, attr) else None
 21 | 
 22 |     def __iter__(self):
 23 |         for chunk in self._stream:
 24 |             self._update_accumulator_streaming(self._accumulator, self._info_from_response, chunk)
 25 |             yield chunk
 26 | 
 27 |         self._final_processing_and_logging(self._accumulator, self._info_from_response)
 28 | 
 29 | 
 30 | class AnthropicAsyncStreamWrapper:
 31 |     def __init__(self, stream: AsyncStream, accumulator, info_from_response, update_accumulator_streaming, final_processing_and_logging):
 32 |         self._stream = stream
 33 |         self._final_processing_and_logging = final_processing_and_logging
 34 |         self._update_accumulator_streaming = update_accumulator_streaming
 35 |         self._accumulator = accumulator
 36 |         self._info_from_response = info_from_response
 37 | 
 38 |     def __getattr__(self, attr):
 39 |         # delegate attribute access to the original async_stream
 40 |         return getattr(self._stream, attr) if hasattr(self._stream, attr) else None
 41 | 
 42 |     async def __aiter__(self):
 43 |         async for chunk in self._stream:
 44 |             self._update_accumulator_streaming(self._accumulator, self._info_from_response, chunk)
 45 |             yield chunk
 46 | 
 47 |         self._final_processing_and_logging(self._accumulator, self._info_from_response)
 48 | 
 49 | 
 50 | class MessageStreamManagerWrapper(MessageStreamManager):
 51 |     def __init__(self, msm_instance: MessageStreamManager, resolve_and_log: Callable):
 52 |         self._msm_instance = msm_instance
 53 |         self._resolve_and_log = resolve_and_log
 54 | 
 55 |     def __getattr__(self, attr):
 56 |         if attr != "_private_stream":
 57 |             return getattr(self._msm_instance, attr)
 58 |         else:
 59 |             return self._private_stream
 60 | 
 61 |     def __enter__(self):
 62 |         self._private_stream = self._msm_instance.__enter__()
 63 |         return self._private_stream
 64 | 
 65 |     def __exit__(
 66 |         self,
 67 |         exc_type: type[BaseException] | None,
 68 |         exc: BaseException | None,
 69 |         exc_tb: TracebackType | None,
 70 |     ) -> None:
 71 |         m: Message = self._private_stream.get_final_message()
 72 |         self._resolve_and_log(m)
 73 |         return super().__exit__(exc_type, exc, exc_tb)
 74 | 
 75 | 
 76 | class MessageAsyncStreamManagerWrapper(AsyncMessageStreamManager):
 77 |     def __init__(self, msm_instance: AsyncMessageStreamManager, resolve_and_log: Callable):
 78 |         self._msm_instance = msm_instance
 79 |         self._resolve_and_log = resolve_and_log
 80 | 
 81 |     def __getattr__(self, attr):
 82 |         if attr != "_private_stream":
 83 |             return getattr(self._msm_instance, attr)
 84 |         else:
 85 |             return self._private_stream
 86 | 
 87 |     async def __aenter__(self):
 88 |         self._private_stream = await self._msm_instance.__aenter__()
 89 |         return self._private_stream
 90 | 
 91 |     async def __aexit__(
 92 |         self,
 93 |         exc_type: type[BaseException] | None,
 94 |         exc: BaseException | None,
 95 |         exc_tb: TracebackType | None,
 96 |     ) -> None:
 97 |         m: Message = await self._private_stream.get_final_message()
 98 |         self._resolve_and_log(m)
 99 |         return await super().__aexit__(exc_type, exc, exc_tb)
100 | 


--------------------------------------------------------------------------------
/parea/wrapper/openai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/parea/wrapper/openai/__init__.py


--------------------------------------------------------------------------------
/parea/wrapper/openai_raw_api_tracer.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, AsyncGenerator, Generator
 2 | 
 3 | import json
 4 | from collections import defaultdict
 5 | 
 6 | from parea.constants import CHUNK_DONE_SENTINEL
 7 | from parea.utils.trace_utils import get_current_trace_id
 8 | from parea.utils.universal_encoder import json_dumps
 9 | from parea.wrapper.utils import convert_openai_raw_stream_to_log
10 | 
11 | 
12 | def process_stream_and_yield(response, data: dict) -> Generator:
13 |     trace_id = get_current_trace_id()
14 |     accumulated_content = []
15 |     accumulated_tools = defaultdict(lambda: {"function": {"arguments": [], "name": ""}})
16 | 
17 |     for chunk in response.iter_lines():
18 |         format_and_accumulate_streaming_chunk(trace_id, accumulated_content, accumulated_tools, data, chunk)
19 |         yield chunk
20 | 
21 | 
22 | async def aprocess_stream_and_yield(response, data: dict) -> AsyncGenerator:
23 |     trace_id = get_current_trace_id()
24 |     accumulated_content = []
25 |     accumulated_tools = defaultdict(lambda: {"function": {"arguments": [], "name": ""}})
26 | 
27 |     async for chunk in response.aiter_lines():
28 |         format_and_accumulate_streaming_chunk(trace_id, accumulated_content, accumulated_tools, data, chunk)
29 |         yield chunk
30 | 
31 | 
32 | def format_and_accumulate_streaming_chunk(trace_id: str, accumulated_content: list, accumulated_tools: dict, data: dict, chunk: Any) -> None:
33 |     from openai.types.chat import ChatCompletionChunk
34 | 
35 |     try:
36 |         chunk = chunk.decode("utf-8")
37 |     except AttributeError:
38 |         pass
39 |     if chunk == CHUNK_DONE_SENTINEL:
40 |         # when done send accumulated content to be logged in background thread
41 |         convert_openai_raw_stream_to_log(accumulated_content, accumulated_tools, data, trace_id)
42 |     else:
43 |         chunk_data = raw_chunk_to_chat_completion_chunk(chunk)
44 |         if isinstance(chunk_data, ChatCompletionChunk):
45 |             for choice in chunk_data.choices or []:
46 |                 delta = choice.delta
47 | 
48 |                 if delta.content:
49 |                     accumulated_content.append(delta.content)
50 | 
51 |                 if delta.function_call:
52 |                     accumulated_tools[0]["function"]["name"] = delta.function_call.name or accumulated_tools[0]["function"]["name"]
53 |                     if delta.function_call.arguments:
54 |                         accumulated_tools[0]["function"]["arguments"].append(delta.function_call.arguments)
55 | 
56 |                 for tool_call in delta.tool_calls or []:
57 |                     tool_id = tool_call.index
58 |                     accumulated_tools[tool_id]["function"]["name"] = tool_call.function.name or accumulated_tools[tool_id]["function"]["name"]
59 |                     if tool_call.function.arguments:
60 |                         accumulated_tools[tool_id]["function"]["arguments"].append(tool_call.function.arguments)
61 | 
62 | 
63 | def raw_chunk_to_chat_completion_chunk(chunk: str):
64 |     from openai.types.chat import ChatCompletionChunk
65 | 
66 |     try:
67 |         return ChatCompletionChunk(**json.loads(chunk[6:].strip()))
68 |     except json.JSONDecodeError:
69 |         return chunk
70 | 
71 | 
72 | def get_formatted_openai_response(r):
73 |     # helper function to format the response from OpenAI
74 |     if r["choices"][0]["message"].get("content"):
75 |         return r["choices"][0]["message"]["content"].strip()
76 |     elif r["choices"][0]["message"].get("function_call"):
77 |         function_call = r["choices"][0]["message"]["function_call"]
78 |         formatted_function_call = {
79 |             "name": function_call["name"],
80 |             "arguments": json.loads(function_call["arguments"]),
81 |         }
82 |         return json_dumps(formatted_function_call, indent=4)
83 |     elif r["choices"][0]["message"].get("tool_calls"):
84 |         formatted_tool_calls = []
85 |         tool_calls = r["choices"][0]["message"]["tool_calls"]
86 |         for tool_call in tool_calls:
87 |             formatted_tool_call = {
88 |                 "name": tool_call["function"]["name"],
89 |                 "arguments": json.loads(tool_call["function"]["arguments"]),
90 |             }
91 |             formatted_tool_calls.append(formatted_tool_call)
92 |         return json_dumps(formatted_tool_calls, indent=4)
93 |     return json_dumps(r, indent=4)
94 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [darglint]
2 | # https://github.com/terrencepreilly/darglint
3 | strictness = long
4 | docstring_style = google
5 | 


--------------------------------------------------------------------------------
/tests/test_import.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import pkgutil
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | def test_imports():
 8 |     try:
 9 |         package = importlib.import_module("parea")
10 |         for _, module_name, _ in pkgutil.iter_modules(package.__path__):
11 |             importlib.import_module(f"parea.{module_name}")
12 |     except ImportError:
13 |         pytest.fail("Import failed", pytrace=False)
14 | 


--------------------------------------------------------------------------------