├── .dockerignore ├── .dvcignore ├── .editorconfig ├── .env.sample ├── .github ├── .stale.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ ├── feature_request.md │ └── question.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml ├── release-drafter.yml └── workflows │ ├── build.yml │ ├── greetings.yml │ └── release-drafter.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── assets ├── PareaLogoLight.png └── images │ └── coverage.svg ├── cookbook ├── ab_testing.py ├── anthropic │ ├── tracing_anthropic.py │ ├── tracing_anthropic_tool_use.py │ ├── tracing_bedrock.py │ └── tracing_with_images_anthropic.py ├── assets │ ├── data │ │ ├── 2022-letter.txt │ │ ├── __init__.py │ │ ├── anthropic_tool_use_examples.py │ │ ├── openai_input_examples.py │ │ └── state_of_the_union.txt │ └── img │ │ ├── dashboard.png │ │ ├── dashboard_detailed_view.png │ │ ├── deployed_prompts.png │ │ ├── feedback.png │ │ ├── logs.png │ │ ├── meta_data.png │ │ └── trace_log_view.png ├── async_enpoints_for_datasets.py ├── cohere │ ├── trace_cohere.py │ ├── trace_cohere_tools.py │ └── tracing_with_cohere.py ├── dspy │ ├── dspy_examples.py │ ├── dspy_threading.py │ └── tracing_and_evaluation_tutorial.ipynb ├── endpoints_for_datasets.py ├── evals_and_experiments │ ├── RAG_experiment_with_auto_evals.py │ ├── async_experiments.py │ ├── deployed_prompt_and_dataset.py │ ├── deployed_prompt_dataset_and_eval.py │ ├── experiment_test_substeps.py │ ├── list_experiments.py │ ├── modify_dataset_before_experiment.py │ ├── parea_evaluation_deepdive.ipynb │ ├── route_llm_experiment.py │ ├── run_experiment.py │ ├── run_experiment_agreement_among_evals.py │ ├── run_experiment_balanced_acc.py │ ├── run_experiment_evas_with_reason.py │ └── run_experiment_using_saved_test_collection.py ├── fetch_logs.py ├── finetuning │ └── download_as_jsonl.py ├── guidance │ └── tracing_guidance.py ├── instructor │ ├── dynamic_few_shot_injection_with_from_feedback.py │ ├── instructor_blog_example_simple.py │ ├── instructor_blog_example_validation_context.py │ ├── instructor_evals.py │ └── instructor_streaming.py ├── langchain │ ├── trace_class_call_method.py │ ├── trace_langchain_RAG_evals.py │ ├── trace_langchain_RAG_with_experiment.py │ ├── trace_langchain_anthropic_function_calling.py │ ├── trace_langchain_azure_RAG_with_experiment.py │ ├── trace_langchain_bedrock_rag.py │ ├── trace_langchain_inside_trace_decorator.py │ ├── trace_langchain_rag_agents.py │ ├── trace_langchain_rag_question_answering.py │ ├── trace_langchain_simple.py │ └── trace_langchain_with_deployed_prompt.py ├── marvin │ └── trace_marvin.py ├── openai │ ├── dynamic_few_shot_injection_with_evals.py │ ├── simple_experiment_with_openai.py │ ├── trace_class_call_method.py │ ├── tracing_and_evaluating_openai_endpoint.py │ ├── tracing_azure_open_ai.py │ ├── tracing_open_ai_streams.py │ ├── tracing_openai_assistant_endpoint.py │ ├── tracing_templated_llm_calls.py │ ├── tracing_tool_calling.py │ ├── tracing_with_images_open_ai.py │ ├── tracing_with_open_ai_endpoint_directly.py │ ├── tracing_with_openai_requests_api.py │ ├── tracing_with_openai_with_functions.py │ └── tracing_with_openai_with_structured_output.py ├── parea_llm_proxy │ ├── deployments │ │ ├── fetching_and_using_parea_deployments.py │ │ └── tracing_with_deployed_prompt.py │ ├── dynamic_few_shot_injection.py │ ├── tracing_with_Parea_sdk.ipynb │ ├── tracing_with_agent.py │ ├── tracing_with_function_calling_and_chains.ipynb │ ├── tracing_with_parea_streaming.py │ └── tracing_without_deployed_prompt.py ├── tracing_with_threading.py └── use_dataset_for_finetuning.py ├── cookiecutter-config-file.yml ├── parea ├── __init__.py ├── api_client.py ├── cache │ ├── __init__.py │ ├── cache.py │ └── in_memory.py ├── client.py ├── constants.py ├── evals │ ├── __init__.py │ ├── chat │ │ ├── __init__.py │ │ └── goal_success_ratio.py │ ├── dataset_level │ │ ├── __init__.py │ │ └── balanced_acc.py │ ├── general │ │ ├── __init__.py │ │ ├── answer_matches_target_llm_grader.py │ │ ├── answer_matches_target_recall.py │ │ ├── answer_relevancy.py │ │ ├── levenshtein.py │ │ ├── llm_grader.py │ │ ├── lm_vs_lm.py │ │ ├── self_check.py │ │ └── semantic_similarity.py │ ├── rag │ │ ├── __init__.py │ │ ├── answer_context_faithfulness_binary.py │ │ ├── answer_context_faithfulness_precision.py │ │ ├── answer_context_faithfulness_statement_level.py │ │ ├── context_has_answer.py │ │ ├── context_query_relevancy.py │ │ ├── context_ranking_listwise.py │ │ ├── context_ranking_pointwise.py │ │ └── percent_target_supported_by_context.py │ ├── summary │ │ ├── __init__.py │ │ ├── factual_inconsistency_binary.py │ │ ├── factual_inconsistency_scale.py │ │ └── likert_scale.py │ └── utils.py ├── experiment │ ├── __init__.py │ ├── cli.py │ ├── datasets.py │ ├── dvc.py │ └── experiment.py ├── helpers.py ├── parea_logger.py ├── schemas │ ├── __init__.py │ ├── log.py │ └── models.py ├── types.py ├── utils │ ├── __init__.py │ ├── trace_integrations │ │ ├── dspy.py │ │ ├── instructor.py │ │ ├── langchain.py │ │ ├── langchain_utils.py │ │ └── wrapt_utils.py │ ├── trace_utils.py │ └── universal_encoder.py └── wrapper │ ├── __init__.py │ ├── anthropic │ ├── __init__.py │ ├── anthropic.py │ └── stream_wrapper.py │ ├── cohere │ ├── helpers.py │ └── wrap_cohere.py │ ├── openai │ ├── __init__.py │ └── openai.py │ ├── openai_beta_wrapper.py │ ├── openai_raw_api_tracer.py │ ├── utils.py │ └── wrapper.py ├── poetry.lock ├── pyproject.toml ├── setup.cfg └── tests ├── test_import.py └── test_test_case_collection.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | .github 5 | 6 | # Docker 7 | .dockerignore 8 | 9 | # IDE 10 | .idea 11 | .vscode 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | **/__pycache__/ 16 | *.pyc 17 | *.pyo 18 | *.pyd 19 | .Python 20 | *.py[cod] 21 | *$py.class 22 | .pytest_cache/ 23 | ..mypy_cache/ 24 | 25 | # poetry 26 | .venv 27 | 28 | # C extensions 29 | *.so 30 | 31 | # Virtual environment 32 | .venv 33 | venv 34 | 35 | .DS_Store 36 | .AppleDouble 37 | .LSOverride 38 | ._* 39 | /LocalREADME.md 40 | -------------------------------------------------------------------------------- /.dvcignore: -------------------------------------------------------------------------------- 1 | # Add patterns of files dvc should ignore, which could improve 2 | # the performance. Learn more at 3 | # https://dvc.org/doc/user-guide/dvcignore 4 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # Check http://editorconfig.org for more information 2 | # This is the main config file for this project: 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | insert_final_newline = true 9 | indent_style = space 10 | indent_size = 2 11 | trim_trailing_whitespace = true 12 | 13 | [*.{py, pyi}] 14 | indent_style = space 15 | indent_size = 4 16 | 17 | [Makefile] 18 | indent_style = tab 19 | 20 | [*.md] 21 | trim_trailing_whitespace = false 22 | 23 | [*.{diff,patch}] 24 | trim_trailing_whitespace = false 25 | -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- 1 | API_KEY= 2 | -------------------------------------------------------------------------------- /.github/.stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 60 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 7 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | # Label to use when marking an issue as stale 10 | staleLabel: wontfix 11 | # Comment to post when marking an issue as stale. Set to `false` to disable 12 | markComment: > 13 | This issue has been automatically marked as stale because it has not had 14 | recent activity. It will be closed if no further activity occurs. Thank you 15 | for your contributions. 16 | # Comment to post when closing a stale issue. Set to `false` to disable 17 | closeComment: false 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Bug report 3 | about: If something isn't working 🔧 4 | title: '' 5 | labels: bug 6 | assignees: 7 | --- 8 | 9 | ## 🐛 Bug Report 10 | 11 | 12 | 13 | ## 🔬 How To Reproduce 14 | 15 | Steps to reproduce the behavior: 16 | 17 | 1. ... 18 | 19 | ### Code sample 20 | 21 | 22 | 23 | ### Environment 24 | 25 | * OS: [e.g. Linux / Windows / macOS] 26 | * Python version, get it with: 27 | 28 | ```bash 29 | python --version 30 | ``` 31 | 32 | ### Screenshots 33 | 34 | 35 | 36 | ## 📈 Expected behavior 37 | 38 | 39 | 40 | ## 📎 Additional context 41 | 42 | 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # Configuration: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository 2 | 3 | blank_issues_enabled: false 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🚀 Feature request 3 | about: Suggest an idea for this project 🏖 4 | title: '' 5 | labels: enhancement 6 | assignees: 7 | --- 8 | 9 | ## 🚀 Feature Request 10 | 11 | 12 | 13 | ## 🔈 Motivation 14 | 15 | 16 | 17 | ## 🛰 Alternatives 18 | 19 | 20 | 21 | ## 📎 Additional context 22 | 23 | 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ❓ Question 3 | about: Ask a question about this project 🎓 4 | title: '' 5 | labels: question 6 | assignees: 7 | --- 8 | 9 | ## Checklist 10 | 11 | 12 | 13 | - [ ] I've searched the project's [`issues`](https://github.com/parea-ai/parea-sdk/issues?q=is%3Aissue). 14 | 15 | ## ❓ Question 16 | 17 | 18 | 19 | How can I [...]? 20 | 21 | Is it possible to [...]? 22 | 23 | ## 📎 Additional context 24 | 25 | 26 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | 4 | 5 | ## Related Issue 6 | 7 | 8 | 9 | ## Type of Change 10 | 11 | 12 | 13 | - [ ] 📚 Examples / docs / tutorials / dependencies update 14 | - [ ] 🔧 Bug fix (non-breaking change which fixes an issue) 15 | - [ ] 🥂 Improvement (non-breaking change which improves an existing feature) 16 | - [ ] 🚀 New feature (non-breaking change which adds functionality) 17 | - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to change) 18 | - [ ] 🔐 Security fix 19 | - [ ] 🆙 Version bump 20 | 21 | ## Checklist 22 | 23 | 24 | 25 | - [ ] I've read the [`CODE_OF_CONDUCT.md`](https://github.com/parea-ai/parea-sdk/blob/master/CODE_OF_CONDUCT.md) 26 | document. 27 | - [ ] I've read the [`CONTRIBUTING.md`](https://github.com/parea-ai/parea-sdk/blob/master/CONTRIBUTING.md) guide. 28 | - [ ] I've updated the code style using `make codestyle`. 29 | - [ ] I've written tests for all new methods and classes that I created. 30 | - [ ] I've written the docstring in Google format for all the methods and classes that I used. 31 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Configuration: https://dependabot.com/docs/config-file/ 2 | # Docs: https://docs.github.com/en/github/administering-a-repository/keeping-your-dependencies-updated-automatically 3 | 4 | version: 2 5 | 6 | updates: 7 | - package-ecosystem: "pip" 8 | directory: "/" 9 | schedule: 10 | interval: "daily" 11 | allow: 12 | - dependency-type: "all" 13 | commit-message: 14 | prefix: ":arrow_up:" 15 | open-pull-requests-limit: 50 16 | 17 | - package-ecosystem: "github-actions" 18 | directory: "/" 19 | schedule: 20 | interval: "daily" 21 | allow: 22 | - dependency-type: "all" 23 | commit-message: 24 | prefix: ":arrow_up:" 25 | open-pull-requests-limit: 50 26 | 27 | - package-ecosystem: "docker" 28 | directory: "/docker" 29 | schedule: 30 | interval: "weekly" 31 | allow: 32 | - dependency-type: "all" 33 | commit-message: 34 | prefix: ":arrow_up:" 35 | open-pull-requests-limit: 50 36 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | # Release drafter configuration https://github.com/release-drafter/release-drafter#configuration 2 | # Emojis were chosen to match the https://gitmoji.carloscuesta.me/ 3 | 4 | name-template: "v$NEXT_PATCH_VERSION" 5 | tag-template: "v$NEXT_PATCH_VERSION" 6 | 7 | categories: 8 | - title: ":rocket: Features" 9 | labels: [enhancement, feature] 10 | - title: ":wrench: Fixes & Refactoring" 11 | labels: [bug, refactoring, bugfix, fix] 12 | - title: ":package: Build System & CI/CD" 13 | labels: [build, ci, testing] 14 | - title: ":boom: Breaking Changes" 15 | labels: [breaking] 16 | - title: ":pencil: Documentation" 17 | labels: [documentation] 18 | - title: ":arrow_up: Dependencies updates" 19 | labels: [dependencies] 20 | 21 | template: | 22 | ## What’s Changed 23 | 24 | $CHANGES 25 | 26 | ## :busts_in_silhouette: List of contributors 27 | 28 | $CONTRIBUTORS 29 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [ push, pull_request ] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [ "3.11" ] 11 | 12 | steps: 13 | - uses: actions/checkout@v4.2.2 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | 19 | - name: Install Poetry 20 | uses: snok/install-poetry@v1 21 | with: 22 | virtualenvs-create: true 23 | virtualenvs-in-project: true 24 | virtualenvs-path: .venv 25 | installer-parallel: true 26 | 27 | - name: Load cached venv 28 | id: cached-poetry-dependencies 29 | uses: actions/cache@v4 30 | with: 31 | path: .venv 32 | key: venv-${{ matrix.python-version }}-${{ hashFiles('poetry.lock') }} 33 | 34 | - name: Install dependencies 35 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 36 | run: poetry install --no-interaction --no-root 37 | 38 | - name: Run style checks 39 | run: | 40 | make check-codestyle 41 | 42 | - name: Run tests 43 | run: | 44 | make test 45 | -------------------------------------------------------------------------------- /.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/first-interaction@v1.3.0 10 | with: 11 | repo-token: ${{ secrets.GITHUB_TOKEN }} 12 | pr-message: 'Hello @${{ github.actor }}, thank you for submitting a PR! We will respond as soon as possible.' 13 | issue-message: | 14 | Hello @${{ github.actor }}, thank you for your interest in our work! 15 | 16 | If this is a bug report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you. 17 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | # branches to consider in the event; optional, defaults to all 6 | branches: 7 | - master 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Drafts your next Release notes as Pull Requests are merged into "master" 14 | - uses: release-drafter/release-drafter@v6.0.0 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.9 3 | 4 | default_stages: [commit, push] 5 | 6 | repos: 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v2.5.0 9 | hooks: 10 | - id: check-yaml 11 | - id: end-of-file-fixer 12 | exclude: LICENSE 13 | 14 | - repo: local 15 | hooks: 16 | - id: pyupgrade 17 | name: pyupgrade 18 | entry: poetry run pyupgrade --py38-plus 19 | types: [python] 20 | language: system 21 | 22 | - repo: local 23 | hooks: 24 | - id: isort 25 | name: isort 26 | entry: poetry run isort --settings-path pyproject.toml 27 | types: [python] 28 | language: system 29 | 30 | - repo: local 31 | hooks: 32 | - id: black 33 | name: black 34 | entry: poetry run black --config pyproject.toml 35 | types: [python] 36 | language: system 37 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at joel@parea.ai. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | ## Dependencies 4 | 5 | We use `poetry` to manage the [dependencies](https://github.com/python-poetry/poetry). 6 | If you dont have `poetry`, you should install with `make poetry-download`. 7 | 8 | To install dependencies and prepare [`pre-commit`](https://pre-commit.com/) hooks you would need to run `install` command: 9 | 10 | ```bash 11 | make install 12 | make pre-commit-install 13 | ``` 14 | 15 | To activate your `virtualenv` run `poetry shell`. 16 | 17 | ## Codestyle 18 | 19 | After installation you may execute code formatting. 20 | 21 | ```bash 22 | make codestyle 23 | ``` 24 | 25 | ### Checks 26 | 27 | Many checks are configured for this project. Command `make check-codestyle` will check black, isort and darglint. 28 | The `make check-safety` command will look at the security of your code. 29 | 30 | Comand `make lint` applies all checks. 31 | 32 | ### Before submitting 33 | 34 | Before submitting your code please do the following steps: 35 | 36 | 1. Add any changes you want 37 | 1. Add tests for the new changes 38 | 1. Edit documentation if you have changed something significant 39 | 1. Run `make codestyle` to format your changes. 40 | 1. Run `make lint` to ensure that types, security and docstrings are okay. 41 | 42 | ## Other help 43 | 44 | You can contribute by spreading a word about this library. 45 | It would also be a huge contribution to write 46 | a short article on how you are using this project. 47 | You can also share your best practices with us. 48 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #* Variables 2 | SHELL := /usr/bin/env bash 3 | PYTHON := python3 4 | PYTHONPATH := `pwd` 5 | 6 | #* Docker variables 7 | IMAGE := parea 8 | VERSION := latest 9 | 10 | #* Poetry 11 | .PHONY: poetry-download 12 | poetry-download: 13 | curl -sSL https://install.python-poetry.org | $(PYTHON) - 14 | 15 | .PHONY: poetry-remove 16 | poetry-remove: 17 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | $(PYTHON) - --uninstall 18 | 19 | #* Installation 20 | .PHONY: install 21 | install: 22 | poetry lock -n && poetry export --without-hashes > requirements.txt 23 | poetry install -n 24 | poetry run mypy --install-types --non-interactive ./ 25 | 26 | .PHONY: pre-commit-install 27 | pre-commit-install: 28 | poetry run pre-commit install 29 | 30 | #* Formatters 31 | .PHONY: codestyle 32 | codestyle: 33 | poetry run pyupgrade --exit-zero-even-if-changed --py38-plus **/*.py 34 | poetry run isort --settings-path pyproject.toml ./ 35 | poetry run black --config pyproject.toml ./ 36 | 37 | .PHONY: formatting 38 | formatting: codestyle 39 | 40 | #* Linting 41 | .PHONY: test 42 | test: 43 | PYTHONPATH=$(PYTHONPATH) poetry run pytest -c pyproject.toml --cov-report=html --cov=parea tests/ 44 | 45 | .PHONY: check-codestyle 46 | check-codestyle: 47 | poetry run isort --diff --check-only --settings-path pyproject.toml ./ 48 | poetry run black --diff --check --config pyproject.toml ./ 49 | poetry run darglint --verbosity 2 parea tests 50 | 51 | .PHONY: mypy 52 | mypy: 53 | poetry run mypy --config-file pyproject.toml ./ 54 | 55 | #.PHONY: check-safety 56 | # check-safety: 57 | # poetry check 58 | # poetry run safety check --full-report 59 | # poetry run bandit -ll --recursive parea tests 60 | 61 | .PHONY: lint 62 | lint: test check-codestyle mypy 63 | 64 | .PHONY: update-dev-deps 65 | update-dev-deps: 66 | poetry add -D bandit@latest darglint@latest "isort[colors]@latest" mypy@latest pre-commit@latest pydocstyle@latest pylint@latest pytest@latest pyupgrade@latest safety@latest coverage@latest coverage-badge@latest pytest-html@latest pytest-cov@latest 67 | poetry add -D --allow-prereleases black@latest 68 | 69 | #* Docker 70 | # Example: make docker-build VERSION=latest 71 | # Example: make docker-build IMAGE=some_name VERSION=0.1.0 72 | .PHONY: docker-build 73 | docker-build: 74 | @echo Building docker $(IMAGE):$(VERSION) ... 75 | docker build \ 76 | -t $(IMAGE):$(VERSION) . \ 77 | -f ./docker/Dockerfile --no-cache 78 | 79 | # Example: make docker-remove VERSION=latest 80 | # Example: make docker-remove IMAGE=some_name VERSION=0.1.0 81 | .PHONY: docker-remove 82 | docker-remove: 83 | @echo Removing docker $(IMAGE):$(VERSION) ... 84 | docker rmi -f $(IMAGE):$(VERSION) 85 | 86 | #* Cleaning 87 | .PHONY: pycache-remove 88 | pycache-remove: 89 | find . | grep -E "(__pycache__|\.pyc|\.pyo$$)" | xargs rm -rf 90 | 91 | .PHONY: dsstore-remove 92 | dsstore-remove: 93 | find . | grep -E ".DS_Store" | xargs rm -rf 94 | 95 | .PHONY: mypycache-remove 96 | mypycache-remove: 97 | find . | grep -E ".mypy_cache" | xargs rm -rf 98 | 99 | .PHONY: ipynbcheckpoints-remove 100 | ipynbcheckpoints-remove: 101 | find . | grep -E ".ipynb_checkpoints" | xargs rm -rf 102 | 103 | .PHONY: pytestcache-remove 104 | pytestcache-remove: 105 | find . | grep -E ".pytest_cache" | xargs rm -rf 106 | 107 | .PHONY: build-remove 108 | build-remove: 109 | rm -rf build/ 110 | 111 | .PHONY: cleanup 112 | cleanup: pycache-remove dsstore-remove mypycache-remove ipynbcheckpoints-remove pytestcache-remove 113 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security 2 | 3 | ## 🔐 Reporting Security Issues 4 | 5 | > Do not open issues that might have security implications! 6 | > It is critical that security related issues are reported privately so we have time to address them before they become public knowledge. 7 | 8 | Vulnerabilities can be reported by emailing core members: 9 | 10 | - parea-ai [joel@parea.ai](mailto:joel@parea.ai) 11 | 12 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 13 | 14 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 15 | - Full paths of source file(s) related to the manifestation of the issue 16 | - The location of the affected source code (tag/branch/commit or direct URL) 17 | - Any special configuration required to reproduce the issue 18 | - Environment (e.g. Linux / Windows / macOS) 19 | - Step-by-step instructions to reproduce the issue 20 | - Proof-of-concept or exploit code (if possible) 21 | - Impact of the issue, including how an attacker might exploit the issue 22 | 23 | This information will help us triage your report more quickly. 24 | 25 | ## Preferred Languages 26 | 27 | We prefer all communications to be in English. 28 | -------------------------------------------------------------------------------- /assets/PareaLogoLight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/assets/PareaLogoLight.png -------------------------------------------------------------------------------- /assets/images/coverage.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | coverage 17 | coverage 18 | 80% 19 | 80% 20 | 21 | 22 | -------------------------------------------------------------------------------- /cookbook/ab_testing.py: -------------------------------------------------------------------------------- 1 | # checkout the associated tutorial at https://docs.parea.ai//tutorials/running-ab-tests/llm-generated-emails 2 | 3 | from typing import Tuple 4 | 5 | import os 6 | import random 7 | 8 | from openai import OpenAI 9 | 10 | from parea import Parea, get_current_trace_id, parea_logger, trace, trace_insert 11 | from parea.schemas import EvaluationResult, UpdateLog 12 | 13 | client = OpenAI() 14 | # instantiate Parea client 15 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 16 | # wrap OpenAI client to trace calls 17 | p.wrap_openai_client(client) 18 | 19 | 20 | ab_test_name = "long-vs-short-emails" 21 | 22 | 23 | @trace # decorator to trace functions with Parea 24 | def generate_email(user: str) -> Tuple[str, str, str]: 25 | # randomly choose to generate a long or short email 26 | if random.random() < 0.5: 27 | variant = "variant_0" 28 | prompt = f"Generate a long email for {user}" 29 | else: 30 | variant = "variant_1" 31 | prompt = f"Generate a short email for {user}" 32 | # tag the requests with the A/B test name & chosen variant 33 | trace_insert( 34 | { 35 | "metadata": { 36 | "ab_test_name": ab_test_name, 37 | f"ab_test_{ab_test_name}": variant, 38 | } 39 | } 40 | ) 41 | 42 | email = ( 43 | client.chat.completions.create( 44 | model="gpt-4o", 45 | messages=[ 46 | { 47 | "role": "user", 48 | "content": prompt, 49 | } 50 | ], 51 | ) 52 | .choices[0] 53 | .message.content 54 | ) 55 | # need to return in addition to the email, the trace_id and the chosen variant 56 | return email, get_current_trace_id(), variant 57 | 58 | 59 | def capture_feedback(feedback: float, trace_id: str, ab_test_variant: str, user_corrected_email: str = None) -> None: 60 | field_name_to_value_map = { 61 | "scores": [EvaluationResult(name=f"ab_test_{ab_test_variant}", score=feedback, reason="any additional user feedback on why it's good/bad")], 62 | } 63 | if user_corrected_email: 64 | field_name_to_value_map["target"] = user_corrected_email 65 | 66 | parea_logger.update_log( 67 | UpdateLog( 68 | trace_id=trace_id, 69 | field_name_to_value_map=field_name_to_value_map, 70 | ) 71 | ) 72 | 73 | 74 | def main(): 75 | # generate email and get trace ID 76 | email, trace_id, ab_test_variant = generate_email("Max Mustermann") 77 | 78 | # create a biased feedback for shorter emals 79 | if ab_test_variant == "variant_1": 80 | user_feedback = 0.0 if random.random() < 0.7 else 1.0 81 | else: 82 | user_feedback = 0.0 if random.random() < 0.3 else 1.0 83 | 84 | capture_feedback(user_feedback, trace_id, ab_test_variant, "Hi Max") 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /cookbook/anthropic/tracing_anthropic.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | import anthropic 5 | from anthropic.types import ContentBlockDeltaEvent, MessageDeltaEvent, MessageStartEvent 6 | from dotenv import load_dotenv 7 | 8 | from parea import Parea 9 | 10 | load_dotenv() 11 | 12 | 13 | client = anthropic.Anthropic() 14 | aclient = anthropic.AsyncAnthropic() 15 | 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 17 | p.wrap_anthropic_client(client) 18 | p.wrap_anthropic_client(aclient) 19 | 20 | 21 | client_kwargs = {"model": "claude-3-opus-20240229", "max_tokens": 1024, "messages": [{"role": "user", "content": "Hello, Claude"}]} 22 | 23 | 24 | def anthropic_sync(): 25 | message = client.messages.create(**client_kwargs) 26 | print(message.content[0].text) 27 | 28 | 29 | def anthropic_stream(): 30 | message = client.messages.create(**client_kwargs, stream=True) 31 | for event in message: 32 | if isinstance(event, MessageStartEvent): 33 | print(f"{event.type}: {event.message.usage.input_tokens}") 34 | elif isinstance(event, ContentBlockDeltaEvent): 35 | print(f"{event.type}: {event.delta.text}") 36 | elif isinstance(event, MessageDeltaEvent): 37 | print(f"{event.type}: {event.usage.output_tokens}") 38 | else: 39 | print(f"{event.type}: {event}") 40 | 41 | 42 | def anthropic_stream_context_manager(): 43 | with client.messages.stream(**client_kwargs) as stream: 44 | for text in stream.text_stream: 45 | print(text, end="", flush=True) 46 | print() 47 | message = stream.get_final_message() 48 | print(message.model_dump_json(indent=2)) 49 | 50 | 51 | async def async_anthropic(): 52 | message = await aclient.messages.create(**client_kwargs) 53 | print(message.content[0].text) 54 | 55 | 56 | async def async_anthropic_stream(): 57 | message = await aclient.messages.create(**client_kwargs, stream=True) 58 | async for event in message: 59 | if isinstance(event, MessageStartEvent): 60 | print(f"{event.type}: {event.message.usage.input_tokens}") 61 | elif isinstance(event, ContentBlockDeltaEvent): 62 | print(f"{event.type}: {event.delta.text}") 63 | elif isinstance(event, MessageDeltaEvent): 64 | print(f"{event.type}: {event.usage.output_tokens}") 65 | else: 66 | print(f"{event.type}: {event}") 67 | 68 | 69 | async def async_anthropic_stream_context_manager(): 70 | async with aclient.messages.stream(**client_kwargs) as stream: 71 | async for text in stream.text_stream: 72 | print(text, end="", flush=True) 73 | print() 74 | message = await stream.get_final_message() 75 | print(message.model_dump_json(indent=2)) 76 | 77 | 78 | if __name__ == "__main__": 79 | anthropic_sync() 80 | anthropic_stream() 81 | anthropic_stream_context_manager() 82 | asyncio.run(async_anthropic()) 83 | asyncio.run(async_anthropic_stream()) 84 | asyncio.run(async_anthropic_stream_context_manager()) 85 | -------------------------------------------------------------------------------- /cookbook/anthropic/tracing_anthropic_tool_use.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import anthropic 4 | from dotenv import load_dotenv 5 | 6 | from cookbook.assets.data.anthropic_tool_use_examples import missing_information, multiple_tool_use, single_tool_use 7 | from parea import Parea 8 | 9 | load_dotenv() 10 | 11 | client = anthropic.Anthropic() 12 | aclient = anthropic.AsyncAnthropic() 13 | 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 15 | p.wrap_anthropic_client(client) 16 | p.wrap_anthropic_client(aclient) 17 | 18 | 19 | def anthropic_sync(create_kwargs): 20 | message = client.messages.create(**create_kwargs) 21 | print(message.content) 22 | 23 | 24 | def anthropic_sync_stream(create_kwargs): 25 | message = client.messages.create(stream=True, **create_kwargs) 26 | for m in message: 27 | print(m) 28 | 29 | 30 | async def async_anthropic(create_kwargs): 31 | message = await aclient.messages.create(**create_kwargs) 32 | print(message.content) 33 | 34 | 35 | if __name__ == "__main__": 36 | anthropic_sync(single_tool_use) 37 | anthropic_sync_stream(single_tool_use) 38 | anthropic_sync(multiple_tool_use) 39 | anthropic_sync(missing_information) 40 | # asyncio.run(async_anthropic(single_tool_use)) 41 | # asyncio.run(async_anthropic(multiple_tool_use)) 42 | # asyncio.run(async_anthropic(missing_information)) 43 | -------------------------------------------------------------------------------- /cookbook/anthropic/tracing_bedrock.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from anthropic import AnthropicBedrock 4 | from dotenv import load_dotenv 5 | 6 | from parea import Parea 7 | 8 | load_dotenv() 9 | 10 | client = AnthropicBedrock( 11 | # Authenticate by either providing the keys below or use the default AWS credential providers, such as 12 | # using ~/.aws/credentials or the "AWS_SECRET_ACCESS_KEY" and "AWS_ACCESS_KEY_ID" environment variables. 13 | aws_access_key="", 14 | aws_secret_key="", 15 | # Temporary credentials can be used with aws_session_token. 16 | # Read more at https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html. 17 | aws_session_token="", 18 | # aws_region changes the aws region to which the request is made. By default, we read AWS_REGION, 19 | # and if that's not present, we default to us-east-1. Note that we do not read ~/.aws/config for the region. 20 | aws_region="us-west-2", 21 | ) 22 | 23 | 24 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 25 | p.wrap_anthropic_client(client) 26 | 27 | message = client.messages.create(model="anthropic.claude-3-5-sonnet-20240620-v1:0", max_tokens=256, messages=[{"role": "user", "content": "Hello, world"}]) 28 | print(message.content) 29 | -------------------------------------------------------------------------------- /cookbook/anthropic/tracing_with_images_anthropic.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import base64 4 | import json 5 | import os 6 | 7 | import requests 8 | from anthropic import Anthropic 9 | from dotenv import load_dotenv 10 | from openai import OpenAI 11 | 12 | from parea import Parea, trace, trace_insert 13 | from parea.schemas import TraceLogImage 14 | 15 | load_dotenv() 16 | 17 | 18 | oai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 19 | a_client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) 20 | 21 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 22 | p.wrap_openai_client(oai_client) 23 | p.wrap_anthropic_client(a_client) 24 | 25 | 26 | @trace 27 | def image_maker(query: str) -> str: 28 | response = oai_client.images.generate(prompt=query, model="dall-e-3") 29 | image_url = response.data[0].url 30 | caption = {"original_prompt": query, "revised_prompt": response.data[0].revised_prompt} 31 | trace_insert({"images": [TraceLogImage(url=image_url, caption=json.dumps(caption))]}) 32 | return image_url 33 | 34 | 35 | @trace 36 | def ask_vision(image_url: str) -> Optional[str]: 37 | image_data = requests.get(image_url).content 38 | base64_image = base64.b64encode(image_data).decode("utf-8") 39 | 40 | response = a_client.messages.create( 41 | model="claude-3-haiku-20240307", 42 | messages=[ 43 | { 44 | "role": "user", 45 | "content": [ 46 | { 47 | "type": "image", 48 | "source": { 49 | "type": "base64", 50 | "media_type": "image/png", 51 | "data": base64_image, 52 | }, 53 | }, 54 | {"type": "text", "text": "What’s in this image?"}, 55 | ], 56 | } 57 | ], 58 | max_tokens=300, 59 | ) 60 | return response.content[0].text 61 | 62 | 63 | @trace 64 | def main(query: str) -> str: 65 | image_url = image_maker(query) 66 | return ask_vision(image_url) 67 | 68 | 69 | if __name__ == "__main__": 70 | result = main("A dog sitting comfortably on a chair") 71 | print(result) 72 | -------------------------------------------------------------------------------- /cookbook/assets/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/data/__init__.py -------------------------------------------------------------------------------- /cookbook/assets/data/anthropic_tool_use_examples.py: -------------------------------------------------------------------------------- 1 | single_tool_use = { 2 | "model": "claude-3-opus-20240229", 3 | "max_tokens": 1024, 4 | "messages": [{"role": "user", "content": "What's the weather like in San Francisco?"}], 5 | "tools": [ 6 | { 7 | "name": "get_weather", 8 | "description": "Get the current weather in a given location", 9 | "input_schema": { 10 | "type": "object", 11 | "properties": { 12 | "location": { 13 | "type": "string", 14 | "description": "The city and state, e.g. San Francisco, CA", 15 | } 16 | }, 17 | "required": ["location"], 18 | }, 19 | } 20 | ], 21 | } 22 | 23 | 24 | multiple_tool_use = { 25 | "model": "claude-3-opus-20240229", 26 | "max_tokens": 1024, 27 | "messages": [{"role": "user", "content": "What is the weather like right now in New York? Also what time is it there?"}], 28 | "tools": [ 29 | { 30 | "name": "get_weather", 31 | "description": "Get the current weather in a given location", 32 | "input_schema": { 33 | "type": "object", 34 | "properties": { 35 | "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, 36 | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit of temperature, either 'celsius' or 'fahrenheit'"}, 37 | }, 38 | "required": ["location"], 39 | }, 40 | }, 41 | { 42 | "name": "get_time", 43 | "description": "Get the current time in a given time zone", 44 | "input_schema": { 45 | "type": "object", 46 | "properties": {"timezone": {"type": "string", "description": "The IANA time zone name, e.g. America/Los_Angeles"}}, 47 | "required": ["timezone"], 48 | }, 49 | }, 50 | ], 51 | } 52 | 53 | 54 | missing_information = { 55 | "model": "claude-3-opus-20240229", 56 | "max_tokens": 1024, 57 | "tools": [ 58 | { 59 | "name": "get_weather", 60 | "description": "Get the current weather in a given location", 61 | "input_schema": { 62 | "type": "object", 63 | "properties": { 64 | "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, 65 | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": 'The unit of temperature, either "celsius" or "fahrenheit"'}, 66 | }, 67 | "required": ["location"], 68 | }, 69 | } 70 | ], 71 | "messages": [ 72 | {"role": "user", "content": "What is the weather like in San Francisco?"}, 73 | { 74 | "role": "assistant", 75 | "content": [ 76 | {"type": "text", "text": "I need to use get_weather, and the user wants SF, which is likely San Francisco, CA."}, 77 | {"type": "tool_use", "id": "toolu_01A09q90qw90lq917835lq9", "name": "get_weather", "input": {"location": "San Francisco, CA", "unit": "celsius"}}, 78 | ], 79 | }, 80 | {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "toolu_01A09q90qw90lq917835lq9", "content": "65 degrees"}]}, 81 | ], 82 | } 83 | -------------------------------------------------------------------------------- /cookbook/assets/data/openai_input_examples.py: -------------------------------------------------------------------------------- 1 | tool_calling_example = { 2 | "model": "gpt-3.5-turbo-0125", 3 | "messages": [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}], 4 | "tools": [ 5 | { 6 | "type": "function", 7 | "function": { 8 | "name": "get_current_weather", 9 | "description": "Get the current weather in a given location", 10 | "parameters": { 11 | "type": "object", 12 | "properties": { 13 | "location": { 14 | "type": "string", 15 | "description": "The city and state, e.g. San Francisco, CA", 16 | }, 17 | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, 18 | }, 19 | "required": ["location"], 20 | }, 21 | }, 22 | } 23 | ], 24 | "tool_choice": "auto", 25 | } 26 | 27 | functions_example = { 28 | "model": "gpt-3.5-turbo-0125", 29 | "messages": [ 30 | { 31 | "role": "system", 32 | "content": f"You are a sophisticated AI assistant, " 33 | f"a specialist in user intent detection and interpretation. " 34 | f"Your task is to perceive and respond to the user's needs, even when they're expressed " 35 | f"in an indirect or direct manner. You excel in recognizing subtle cues: for example, " 36 | f"if a user states they are 'hungry', you should assume they are seeking nearby dining " 37 | f"options such as a restaurant or a cafe. If they indicate feeling 'tired', 'weary', " 38 | f"or mention a long journey, interpret this as a request for accommodation options like " 39 | f"hotels or guest houses. However, remember to navigate the fine line of interpretation " 40 | f"and assumption: if a user's intent is unclear or can be interpreted in multiple ways, " 41 | f"do not hesitate to politely ask for additional clarification. Make sure to tailor your " 42 | f"responses to the user based on their preferences and past experiences which can " 43 | f"be found here: Name: John Doe", 44 | }, 45 | {"role": "user", "content": "I'm hungry"}, 46 | ], 47 | "functions": [ 48 | { 49 | "name": "call_google_places_api", 50 | "description": f""" 51 | This function calls the Google Places API to find the top places of a specified type near 52 | a specific location. It can be used when a user expresses a need (e.g., feeling hungry or tired) or wants to 53 | find a certain type of place (e.g., restaurant or hotel). 54 | """, 55 | "parameters": {"type": "object", "properties": {"place_type": {"type": "string", "description": "The type of place to search for."}}}, 56 | "result": {"type": "array", "items": {"type": "string"}}, 57 | } 58 | ], 59 | } 60 | 61 | simple_example = { 62 | "model": "gpt-3.5-turbo-0125", 63 | "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}], 64 | } 65 | 66 | simple_example_json = { 67 | "model": "gpt-3.5-turbo-0125", 68 | "messages": [{"role": "system", "content": "You are a helpful assistant talking JSON."}, {"role": "user", "content": "Hello!"}], 69 | "response_format": {"type": "json_object"}, 70 | } 71 | -------------------------------------------------------------------------------- /cookbook/assets/img/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/dashboard.png -------------------------------------------------------------------------------- /cookbook/assets/img/dashboard_detailed_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/dashboard_detailed_view.png -------------------------------------------------------------------------------- /cookbook/assets/img/deployed_prompts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/deployed_prompts.png -------------------------------------------------------------------------------- /cookbook/assets/img/feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/feedback.png -------------------------------------------------------------------------------- /cookbook/assets/img/logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/logs.png -------------------------------------------------------------------------------- /cookbook/assets/img/meta_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/meta_data.png -------------------------------------------------------------------------------- /cookbook/assets/img/trace_log_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/cookbook/assets/img/trace_log_view.png -------------------------------------------------------------------------------- /cookbook/async_enpoints_for_datasets.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from dotenv import load_dotenv 5 | 6 | from parea import Parea 7 | from parea.schemas import TestCase, TestCaseCollection, UpdateTestCase 8 | 9 | load_dotenv() 10 | 11 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 12 | 13 | 14 | data = [{"problem": "1+2", "target": 3, "tags": ["easy"]}, {"problem": "Solve the differential equation dy/dx = 3y.", "target": "y = c * e^(3x)", "tags": ["hard"]}] 15 | new_data = [{"problem": "Evaluate the integral ∫x^2 dx from 0 to 3.", "target": 9, "tags": ["hard"]}] 16 | 17 | 18 | async def update_test_case_example(): 19 | dataset: TestCaseCollection = await p.aget_collection("math_problems_v3") 20 | test_cases: dict[int, TestCase] = dataset.test_cases 21 | for test_case_id, test_case in test_cases.items(): 22 | if "easy" in test_case.tags: 23 | # updated inputs must match the same k/v pair as original test case 24 | await p.aupdate_test_case( 25 | dataset_id=dataset.id, 26 | test_case_id=test_case_id, 27 | update_request=UpdateTestCase(inputs={"problem": "Evaluate the integral ∫x^6 dx from 0 to 9."}, target="((1/7)x^7)+C", tags=["hard"]), 28 | ) 29 | break 30 | 31 | 32 | async def main(): 33 | await p.acreate_test_collection(data, name="math_problems_v3") 34 | await p.aadd_test_cases(new_data, dataset_id=182) 35 | await update_test_case_example() 36 | 37 | 38 | if __name__ == "__main__": 39 | asyncio.run(main()) 40 | -------------------------------------------------------------------------------- /cookbook/cohere/trace_cohere.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cohere 4 | from dotenv import load_dotenv 5 | 6 | from parea import Parea 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | co = cohere.Client(api_key=os.getenv("COHERE_API_KEY")) 12 | p.wrap_cohere_client(co) 13 | 14 | response = co.chat( 15 | model="command-r-plus", 16 | preamble="You are a helpful assistant talking in JSON.", 17 | message="Generate a JSON describing a person, with the fields 'name' and 'age'", 18 | response_format={"type": "json_object"}, 19 | ) 20 | print(response) 21 | print("\n\n") 22 | 23 | response = co.chat(message="Who discovered gravity?") 24 | print(response) 25 | print("\n\n") 26 | # 27 | docs = [ 28 | "Carson City is the capital city of the American state of Nevada.", 29 | "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", 30 | "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", 31 | "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", 32 | "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", 33 | ] 34 | response = co.rerank( 35 | model="rerank-english-v3.0", 36 | query="What is the capital of the United States?", 37 | documents=docs, 38 | top_n=3, 39 | ) 40 | print(response) 41 | print("\n\n") 42 | 43 | 44 | response = co.chat( 45 | model="command-r-plus", 46 | message="Where do the tallest penguins live?", 47 | documents=[ 48 | {"title": "Tall penguins", "snippet": "Emperor penguins are the tallest."}, 49 | {"title": "Penguin habitats", "snippet": "Emperor penguins only live in Antarctica."}, 50 | {"title": "What are animals?", "snippet": "Animals are different from plants."}, 51 | ], 52 | ) 53 | print(response) 54 | print("\n\n") 55 | 56 | response = co.chat(model="command-r-plus", message="Who is more popular: Nsync or Backstreet Boys?", search_queries_only=True) 57 | print(response) 58 | print("\n\n") 59 | 60 | response = co.chat(model="command-r-plus", message="Who is more popular: Nsync or Backstreet Boys?", connectors=[{"id": "web-search"}]) 61 | print(response) 62 | print("\n\n") 63 | 64 | for event in co.chat_stream(message="Who discovered gravity?"): 65 | print(event) 66 | -------------------------------------------------------------------------------- /cookbook/cohere/tracing_with_cohere.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import os 4 | from datetime import datetime 5 | 6 | import cohere 7 | from dotenv import load_dotenv 8 | 9 | from parea import Parea, trace, trace_insert 10 | 11 | load_dotenv() 12 | 13 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 14 | co = cohere.Client(api_key=os.getenv("COHERE_API_KEY")) 15 | p.wrap_cohere_client(co) 16 | 17 | 18 | def call_llm(message: str, chat_history: Optional[List[dict]] = None, system_message: str = "", model: str = "command-r-plus") -> str: 19 | return co.chat( 20 | model=model, 21 | preamble=system_message, 22 | chat_history=chat_history or [], 23 | message=message, 24 | ).text 25 | 26 | 27 | @trace 28 | def argumentor(query: str, additional_description: str = "") -> str: 29 | return call_llm( 30 | system_message=f"""You are a debater making an argument on a topic. {additional_description}. 31 | The current time is {datetime.now().strftime("%Y-%m-%d")}""", 32 | message=f"The discussion topic is {query}", 33 | ) 34 | 35 | 36 | @trace 37 | def critic(argument: str) -> str: 38 | return call_llm( 39 | system_message="""You are a critic. 40 | What unresolved questions or criticism do you have after reading the following argument? 41 | Provide a concise summary of your feedback.""", 42 | message=argument, 43 | ) 44 | 45 | 46 | @trace 47 | def refiner(query: str, additional_description: str, argument: str, criticism: str) -> str: 48 | return call_llm( 49 | system_message=f"""You are a debater making an argument on a topic. {additional_description}. 50 | The current time is {datetime.now().strftime("%Y-%m-%d")}""", 51 | chat_history=[{"role": "USER", "message": f"""The discussion topic is {query}"""}, {"role": "CHATBOT", "message": argument}, {"role": "USER", "message": criticism}], 52 | message="Please generate a new argument that incorporates the feedback from the user.", 53 | ) 54 | 55 | 56 | @trace 57 | def argument_chain(query: str, additional_description: str = "") -> str: 58 | trace_insert({"session_id": "cus_1234", "end_user_identifier": "user_1234"}) 59 | argument = argumentor(query, additional_description) 60 | criticism = critic(argument) 61 | refined_argument = refiner(query, additional_description, argument, criticism) 62 | return refined_argument 63 | 64 | 65 | @trace(session_id="cus_1234", end_user_identifier="user_1234") 66 | def json_call() -> str: 67 | completion = co.chat( 68 | model="command-r-plus", 69 | preamble="You are a helpful assistant talking in JSON.", 70 | message="What are you?", 71 | response_format={"type": "json_object"}, 72 | ) 73 | return completion.text 74 | 75 | 76 | if __name__ == "__main__": 77 | result = argument_chain( 78 | "Whether sparkling wine is good for you.", 79 | additional_description="Provide a concise, few sentence argument on why sparkling wine is good for you.", 80 | ) 81 | print(result) 82 | print(json_call()) 83 | -------------------------------------------------------------------------------- /cookbook/dspy/dspy_threading.py: -------------------------------------------------------------------------------- 1 | import contextvars 2 | import os 3 | from concurrent.futures import ThreadPoolExecutor 4 | 5 | import dspy 6 | from dotenv import load_dotenv 7 | 8 | from parea import Parea 9 | 10 | load_dotenv() 11 | 12 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 13 | p.trace_dspy() 14 | 15 | gpt3_turbo = dspy.OpenAI(model="gpt-3.5-turbo-1106", max_tokens=300) 16 | dspy.configure(lm=gpt3_turbo) 17 | 18 | 19 | class QASignature(dspy.Signature): 20 | question = dspy.InputField() 21 | answer = dspy.OutputField() 22 | 23 | 24 | class EnsembleQA(dspy.Module): 25 | def __init__(self): 26 | super().__init__() 27 | self.step1 = dspy.ChainOfThought(QASignature) 28 | self.step2 = dspy.ChainOfThought(QASignature) 29 | 30 | def forward(self, question): 31 | with ThreadPoolExecutor(max_workers=2) as executor: 32 | context1 = contextvars.copy_context() 33 | future1 = executor.submit(context1.run, self.step1, question=question) 34 | context2 = contextvars.copy_context() 35 | future2 = executor.submit(context2.run, self.step2, question=question + "?") 36 | 37 | answer1 = future1.result() 38 | answer2 = future2.result() 39 | 40 | return dspy.Prediction(answer=f"{answer1}\n\n{answer2}") 41 | 42 | 43 | qa = EnsembleQA() 44 | response = qa("Who are you?") 45 | print(response.answer) 46 | -------------------------------------------------------------------------------- /cookbook/endpoints_for_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea 6 | from parea.schemas import TestCase, TestCaseCollection, UpdateTestCase 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | 13 | data = [{"problem": "1+2", "target": 3, "tags": ["easy"]}, {"problem": "Solve the differential equation dy/dx = 3y.", "target": "y = c * e^(3x)", "tags": ["hard"]}] 14 | 15 | # this will create a new dataset on Parea named "math_problems_v4". 16 | # The dataset will have one column named "problem", and two columns using the reserved names "target" and "tags". 17 | # when using this dataset the expected prompt template should have a placeholder for the varible problem. 18 | p.create_test_collection(data, name="math_problems_v4") 19 | 20 | new_data = [{"problem": "Evaluate the integral ∫x^2 dx from 0 to 3.", "target": 9, "tags": ["hard"]}] 21 | # this will add the new test cases to the existing "math_problems_v4" dataset. 22 | # New test cases must have the same columns as the existing dataset. 23 | p.add_test_cases(new_data, name="math_problems_v4") 24 | # Or if you can use the dataset ID instead of the name 25 | # p.add_test_cases(new_data, dataset_id=121) 26 | 27 | 28 | def update_test_case_example(): 29 | dataset: TestCaseCollection = p.get_collection("math_problems_v4") 30 | test_cases: dict[int, TestCase] = dataset.test_cases 31 | for test_case_id, test_case in test_cases.items(): 32 | if "easy" in test_case.tags: 33 | # updated inputs must match the same k/v pair as original test case 34 | p.update_test_case( 35 | dataset_id=dataset.id, 36 | test_case_id=test_case_id, 37 | update_request=UpdateTestCase(inputs={"problem": "Evaluate the integral ∫x^6 dx from 0 to 9."}, target="((1/7)x^7)+C", tags=["hard"]), 38 | ) 39 | break 40 | 41 | 42 | if __name__ == "__main__": 43 | update_test_case_example() 44 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/async_experiments.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import uuid 4 | 5 | from dotenv import load_dotenv 6 | 7 | from parea import Parea, trace 8 | from parea.schemas import Completion, LLMInputs, Log, Message, ModelParams, Role 9 | 10 | load_dotenv() 11 | 12 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 13 | 14 | 15 | DATA = [{"topic": "Python"}, {"topic": "Javascript"}, {"topic": "Water"}, {"topic": "Fire"}] 16 | models = ["gpt-4-turbo", "claude-3-haiku-20240307"] 17 | 18 | 19 | def eval_func(log: Log) -> float: 20 | from random import random 21 | 22 | return random() 23 | 24 | 25 | def model_call_factory(model: str): 26 | @trace(eval_funcs=[eval_func]) 27 | def func(topic: str) -> str: 28 | return p.completion( 29 | data=Completion( 30 | llm_configuration=LLMInputs( 31 | model=model, 32 | model_params=ModelParams(temp=1), 33 | messages=[Message(role=Role.user, content=f"Write a short haiku about {topic}")], 34 | ) 35 | ) 36 | ).content 37 | 38 | return func 39 | 40 | 41 | async def main(): 42 | await asyncio.gather( 43 | *[p.experiment(name="Write-Haikus", data=DATA, func=model_call_factory(model), n_trials=4).arun(run_name=f"{model}-{str(uuid.uuid4())[:4]}") for model in models] 44 | ) 45 | 46 | 47 | if __name__ == "__main__": 48 | asyncio.run(main()) 49 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/deployed_prompt_and_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from dotenv import load_dotenv 5 | 6 | from parea import Parea, trace 7 | from parea.evals import call_openai 8 | from parea.schemas import Completion 9 | from parea.schemas.log import EvaluationResult, Log 10 | 11 | load_dotenv() 12 | 13 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 14 | 15 | 16 | def eval_fun(log: Log) -> EvaluationResult: 17 | # access the output and target from the log 18 | # output, target = log.output, log.target 19 | response: str = call_openai( 20 | model="gpt-4o", 21 | messages=[{"role": "system", "content": "Use JSON. provide a score and reason."}], # <- CHANGE THIS 22 | response_format={"type": "json_object"}, 23 | temperature=0.0, 24 | ) 25 | response_dict = json.loads(response) 26 | return EvaluationResult(name="YOUR_EVAL_NAME", score=response_dict["score"], reason=response_dict["reason"]) 27 | 28 | 29 | @trace(eval_funcs=[eval_fun]) 30 | def deployed_prompt(prompt_template_input: str) -> str: 31 | return p.completion(Completion(deployment_id="YOUR_DEPLOYED_PROMPT_ID", llm_inputs={"prompt_template_input_name": prompt_template_input})).content 32 | 33 | 34 | if __name__ == "__main__": 35 | p.experiment( 36 | name="some_experiment_name", 37 | data=172, # dataset Id from Parea, can also use dataset name if unique 38 | func=deployed_prompt, 39 | ).run() 40 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/deployed_prompt_dataset_and_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea, trace 6 | from parea.schemas import Completion 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | 13 | @trace(eval_funcs_names=["YOUR_EVAL_NAME"]) 14 | def deployed_prompt(prompt_template_input: str) -> str: 15 | return p.completion(Completion(deployment_id="YOUR_DEPLOYED_PROMPT_ID", llm_inputs={"prompt_template_input_name": prompt_template_input})).content 16 | 17 | 18 | if __name__ == "__main__": 19 | p.experiment( 20 | name="some_experiment_name", 21 | data=172, # dataset Id from Parea, can also use dataset name if unique 22 | func=deployed_prompt, 23 | ).run() 24 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/experiment_test_substeps.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import json 4 | import os 5 | 6 | from dotenv import load_dotenv 7 | 8 | from parea import Parea, trace 9 | from parea.evals.general.levenshtein import levenshtein_distance 10 | from parea.schemas import Log 11 | 12 | load_dotenv() 13 | 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 15 | 16 | 17 | # evaluation function for the substep 18 | def eval_choose_greeting(log: Log) -> Union[float, None]: 19 | if not (target := log.target): 20 | return None 21 | 22 | target_substep = json.loads(target)["substep"] # log.target is a string 23 | output = log.output 24 | return levenshtein_distance(target_substep, output) 25 | 26 | 27 | # sub-step 28 | @trace(eval_funcs=[eval_choose_greeting]) 29 | def choose_greeting(name: str) -> str: 30 | return "Hello" 31 | 32 | 33 | # end-to-end evaluation function 34 | def eval_greet(log: Log) -> Union[float, None]: 35 | if not (target := log.target): 36 | return None 37 | 38 | target_overall = json.loads(target)["overall"] 39 | output = log.output 40 | return levenshtein_distance(target_overall, output) 41 | 42 | 43 | @trace(eval_funcs=[eval_greet]) 44 | def greet(name: str) -> str: 45 | greeting = choose_greeting(name) 46 | return f"{greeting} {name}" 47 | 48 | 49 | data = [ 50 | { 51 | "name": "Foo", 52 | "target": { 53 | "overall": "Hi Foo", 54 | "substep": "Hi", 55 | }, 56 | }, 57 | { 58 | "name": "Bar", 59 | "target": { 60 | "overall": "Hello Bar", 61 | "substep": "Hello", 62 | }, 63 | }, 64 | ] 65 | 66 | 67 | if __name__ == "__main__": 68 | p.experiment( 69 | name="greeting", 70 | data=data, 71 | func=greet, 72 | ).run(prefix="substep") 73 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/list_experiments.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea 6 | from parea.schemas import ListExperimentUUIDsFilters 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | experiments = p.list_experiments(ListExperimentUUIDsFilters(experiment_name_filter="greeting")) 13 | print(f"Num. experiments: {len(experiments)}") 14 | trace_logs = p.get_experiment_trace_logs(experiments[0].uuid) 15 | print(f"Num. trace logs: {len(trace_logs)}") 16 | print(f"Trace log: {trace_logs[0]}") 17 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/modify_dataset_before_experiment.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from openai import OpenAI 5 | 6 | from parea import Parea, trace 7 | from parea.evals.rag import context_query_relevancy_factory 8 | from parea.schemas import TestCase 9 | 10 | load_dotenv() 11 | 12 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 13 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 14 | p.wrap_openai_client(client) 15 | 16 | context_query_relevancy = context_query_relevancy_factory(model="gpt-4o", context_fields=["context"]) 17 | 18 | 19 | @trace(eval_funcs=[context_query_relevancy]) 20 | def run_experiment(question: str, context: str) -> str: 21 | return ( 22 | client.chat.completions.create( 23 | model="gpt-4o", 24 | temperature=0, 25 | messages=[{"role": "user", "content": f"Answer question using context. Context: {context}. Question: {question}"}], 26 | ) 27 | .choices[0] 28 | .message.content 29 | ) 30 | 31 | 32 | # You can fetch a dataset directly and then modify it to meet our needs before passing it to p.experiment. 33 | def rename_information_to_context(num_samples: int = 3): 34 | dataset = p.get_collection("Example_Dataset_Name") 35 | if dataset: 36 | testcases: list[TestCase] = list(dataset.test_cases.values()) 37 | # Assume dataset looks like this: 38 | # [ 39 | # inputs={"information": "Some long document", "question": "What is X?"}, target="X is Y" ... 40 | # ] 41 | return [{"context": case.inputs["information"], "question": case.inputs["question"], "target": case.target} for case in testcases[:num_samples]] 42 | return [] 43 | 44 | 45 | def main(): 46 | data = rename_information_to_context() 47 | experiment = p.experiment("My_Experiment_Name", func=run_experiment, data=data) 48 | experiment.run() 49 | 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/route_llm_experiment.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from dotenv import load_dotenv 5 | from routellm.controller import Controller 6 | 7 | from parea import Parea, trace, trace_insert 8 | from parea.schemas import Completion, EvaluationResult, LLMInputs, Log, Message, ModelParams, Role 9 | 10 | load_dotenv() 11 | 12 | os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") 13 | os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") 14 | 15 | ROUTER = "mf" 16 | COST_THRESHOLD = 0.11593 17 | # This tells RouteLLM to use the MF router with a cost threshold of 0.11593 18 | RMODEL = f"router-{ROUTER}-{COST_THRESHOLD}" 19 | STRONG_MODEL = "gpt-4o" 20 | WEAK_MODEL = "groq/llama3-70b-8192" 21 | client = Controller( 22 | routers=[ROUTER], 23 | strong_model=STRONG_MODEL, 24 | weak_model=WEAK_MODEL, 25 | ) 26 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 27 | p.wrap_openai_client(client) 28 | 29 | questions = [ 30 | {"question": "Write a function that takes a string as input and returns the string reversed."}, 31 | {"question": "Write a haiku about a sunset."}, 32 | {"question": "Write a cold email to a VP of Eng selling them on OpenAI's API."}, 33 | {"question": "What's the largest city in Germany?"}, 34 | ] 35 | 36 | 37 | def llm_judge(log: Log) -> EvaluationResult: 38 | try: 39 | response = p.completion( 40 | data=Completion( 41 | llm_configuration=LLMInputs( 42 | model="gpt-4o-mini", 43 | messages=[ 44 | Message( 45 | role=Role.user, 46 | content=f"""[Instruction]\nPlease act as an impartial judge and evaluate the quality and 47 | correctness of the response provided. Be as objective as possible. Respond in JSON with two fields: \n 48 | \t 1. score: int = a number from a scale of 0 to 5; 5 being great and 0 being bad.\n 49 | \t 2. reason: str = explain your reasoning for the selected score.\n\n 50 | This is this question asked: QUESTION:\n{log.inputs['question']}\n 51 | This is the response you are judging, RESPONSE:\n{log.output}\n\n""", 52 | ) 53 | ], 54 | model_params=ModelParams(response_format={"type": "json_object"}), 55 | ), 56 | ) 57 | ) 58 | r = json.loads(response.content) 59 | return EvaluationResult(name="LLMJudge", score=int(r["score"]) / 5, reason=r["reason"]) 60 | except Exception as e: 61 | return EvaluationResult(name="error-LLMJudge", score=0, reason=f"Error in grading: {e}") 62 | 63 | 64 | @trace(eval_funcs=[llm_judge]) 65 | def answer_llm(question: str) -> str: 66 | r = client.chat.completions.create( 67 | model=RMODEL, 68 | messages=[{"role": "user", "content": f"Answer this question: {question}\n"}], 69 | ) 70 | trace_insert({"metadata": {"selected_model": r.model}}) 71 | return r.choices[0].message.content 72 | 73 | 74 | if __name__ == "__main__": 75 | p.experiment( 76 | name="RouteLLM", 77 | data=questions, 78 | func=answer_llm, 79 | metadata={ 80 | "router": ROUTER, 81 | "cost_threshold": str(COST_THRESHOLD), 82 | "strong_model": STRONG_MODEL, 83 | "weak_model": WEAK_MODEL, 84 | }, 85 | ).run() 86 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/run_experiment.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea, trace 6 | from parea.evals.general import levenshtein 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | 13 | # annotate the function with the trace decorator and pass the evaluation function(s) 14 | @trace(eval_funcs=[levenshtein]) 15 | def greeting(name: str) -> str: 16 | return f"Hello {name}" 17 | 18 | 19 | data = [ 20 | { 21 | "name": "Foo", 22 | "target": "Hi Foo", 23 | }, 24 | { 25 | "name": "Bar", 26 | "target": "Hello Bar", 27 | }, 28 | ] # test data to run the experiment on (list of dicts) 29 | 30 | 31 | # # Define the experiment 32 | # # You can use the CLI command "parea experiment parea/cookbook/run_experiment.py" to execute this experiment 33 | # # or call `.run()` 34 | # # p.experiment( 35 | # # data=data, # Data to run the experiment on (list of dicts) 36 | # # func=greeting, # Function to run (callable) 37 | # # n_trials=1, # Number of times to run the experiment on the same data 38 | # # ) 39 | 40 | # You can optionally run the experiment manually by calling `.run()` 41 | if __name__ == "__main__": 42 | p.experiment( 43 | name="greeting", 44 | data=data, 45 | func=greeting, 46 | n_trials=3, 47 | ).run() 48 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/run_experiment_agreement_among_evals.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import os 4 | import random 5 | 6 | from dotenv import load_dotenv 7 | 8 | from parea import Parea, trace 9 | from parea.schemas import EvaluatedLog, EvaluationResult, Log 10 | 11 | load_dotenv() 12 | 13 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 14 | 15 | 16 | def random_eval_factory(trial: int): 17 | def random_eval(log: Log) -> EvaluationResult: 18 | return EvaluationResult(score=1 if random.random() < 0.5 else 0, name=f"random_eval_{trial}") 19 | 20 | return random_eval 21 | 22 | 23 | # apply random evaluation function twice 24 | @trace(eval_funcs=[random_eval_factory(1), random_eval_factory(2)]) 25 | async def starts_with_f(name: str) -> str: 26 | if name == "Foo": 27 | return "1" 28 | return "0" 29 | 30 | 31 | # dataset-level evaluation function which checks if both random evaluations agree 32 | def percent_evals_agree(logs: List[EvaluatedLog]) -> float: 33 | correct = 0 34 | total = 0 35 | for log in logs: 36 | if log.scores[0].score == log.scores[1].score: 37 | correct += 1 38 | total += 1 39 | return correct / total 40 | 41 | 42 | data = [ 43 | { 44 | "name": "Foo", 45 | "target": "1", 46 | }, 47 | { 48 | "name": "Bar", 49 | "target": "0", 50 | }, 51 | { 52 | "name": "Far", 53 | "target": "1", 54 | }, 55 | ] # test data to run the experiment on (list of dicts) 56 | 57 | 58 | # You can optionally run the experiment manually by calling `.run()` 59 | if __name__ == "__main__": 60 | p.experiment(name="Greeting", data=data, func=starts_with_f, dataset_level_evals=[percent_evals_agree]).run() 61 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/run_experiment_balanced_acc.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import asyncio 4 | import os 5 | from collections import defaultdict 6 | 7 | from dotenv import load_dotenv 8 | 9 | from parea import Parea, trace 10 | from parea.schemas import EvaluatedLog, Log 11 | 12 | load_dotenv() 13 | 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 15 | 16 | 17 | def is_correct(log: Log) -> bool: 18 | return log.target == log.output 19 | 20 | 21 | def balanced_acc_is_correct(logs: List[EvaluatedLog]) -> float: 22 | score_name = is_correct.__name__ 23 | 24 | correct = defaultdict(int) 25 | total = defaultdict(int) 26 | for log in logs: 27 | if (eval_result := log.get_score(score_name)) is not None: 28 | correct[log.target] += int(eval_result.score) 29 | total[log.target] += 1 30 | recalls = [correct[key] / total[key] for key in correct] 31 | 32 | return sum(recalls) / len(recalls) 33 | 34 | 35 | # or use the pre-built `balanced_acc_factory` to create the function 36 | # from parea.evals.dataset_level import balanced_acc_factory 37 | # 38 | # 39 | # balanced_acc_is_correct = balanced_acc_factory(is_correct.__name__) 40 | 41 | 42 | @trace(eval_funcs=[is_correct]) 43 | async def starts_with_f(name: str) -> str: 44 | await asyncio.sleep(1) 45 | if name == "Foo": 46 | return "1" 47 | return "0" 48 | 49 | 50 | data = [ 51 | { 52 | "name": "Foo", 53 | "target": "1", 54 | }, 55 | { 56 | "name": "Bar", 57 | "target": "0", 58 | }, 59 | { 60 | "name": "Far", 61 | "target": "1", 62 | }, 63 | ] # test data to run the experiment on (list of dicts) 64 | 65 | 66 | # You can optionally run the experiment manually by calling `.run()` 67 | if __name__ == "__main__": 68 | p.experiment(name="Greeting", data=data, func=starts_with_f, dataset_level_evals=[balanced_acc_is_correct], n_workers=2).run() 69 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/run_experiment_evas_with_reason.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea, trace 6 | from parea.schemas.log import EvaluationResult, Log 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | 13 | def eval_func_with_reason(log: Log) -> EvaluationResult: 14 | if log.output == log.target: 15 | return EvaluationResult(name="matches_target", score=1.0, reason="Output matches target") 16 | elif "Hello" in log.target and "Hello" not in log.output: 17 | return EvaluationResult(name="matches_target", score=0, reason="Output misses 'Hello'") 18 | elif "Hi" in log.target and "Hi" not in log.output: 19 | return EvaluationResult(name="matches_target", score=0, reason="Output misses 'Hi'") 20 | else: 21 | return EvaluationResult(name="matches_target", score=0, reason="Output does not match target") 22 | 23 | 24 | # annotate the function with the trace decorator and pass the evaluation function(s) 25 | @trace(eval_funcs=[eval_func_with_reason]) 26 | def greeting(name: str) -> str: 27 | return f"Hello {name}" 28 | 29 | 30 | data = [ 31 | { 32 | "name": "Foo", 33 | "target": "Hi Foo", 34 | }, 35 | { 36 | "name": "Bar", 37 | "target": "Hello Bar", 38 | }, 39 | ] # test data to run the experiment on (list of dicts) 40 | 41 | 42 | # You can optionally run the experiment manually by calling `.run()` 43 | if __name__ == "__main__": 44 | p.experiment( 45 | name="greeting", 46 | data=data, 47 | func=greeting, 48 | n_trials=1, 49 | ).run() 50 | -------------------------------------------------------------------------------- /cookbook/evals_and_experiments/run_experiment_using_saved_test_collection.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea, trace 6 | from parea.schemas import Completion, LLMInputs, Log, Message, ModelParams, Role 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | 13 | def eval_func(log: Log) -> float: 14 | from random import random 15 | from time import sleep 16 | 17 | sleep(random() * 10) 18 | return random() 19 | 20 | 21 | # annotate the function with the trace decorator and pass the evaluation function(s) 22 | @trace(eval_funcs=[eval_func]) 23 | def func(lang: str, framework: str) -> str: 24 | return p.completion( 25 | data=Completion( 26 | llm_configuration=LLMInputs( 27 | model="gpt-3.5-turbo", 28 | model_params=ModelParams(temp=1), 29 | messages=[ 30 | Message(role=Role.user, content=f"Write a hello world program in {lang} using {framework}"), 31 | ], 32 | ) 33 | ) 34 | ).content 35 | 36 | 37 | if __name__ == "__main__": 38 | p.experiment( 39 | name="Hello World Example", # this is the name of the experiment 40 | data="Hello World Example", # this is the name of your Dataset in Parea (Dataset page) 41 | func=func, 42 | ).run() 43 | 44 | # Or use a dataset using its ID instead of the name 45 | # p.experiment( 46 | # data=121, # this is the id of your Dataset in Parea (Dataset page) 47 | # func=func, 48 | # ).run(name="hello-world-example") 49 | -------------------------------------------------------------------------------- /cookbook/fetch_logs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea 6 | from parea.schemas import FilterOperator, QueryParams 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | paginated_resp = p.get_trace_logs(QueryParams(project_name="default", filter_field="trace_name", filter_operator=FilterOperator.LIKE, filter_value="llm")) 13 | print(f"Num. LLM logs fetched: {len(paginated_resp.results)} | total LLM logs: {paginated_resp.total}") 14 | -------------------------------------------------------------------------------- /cookbook/finetuning/download_as_jsonl.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import os 4 | 5 | from dotenv import load_dotenv 6 | 7 | from parea import Parea 8 | from parea.schemas import FilterOperator, QueryParams 9 | 10 | load_dotenv() 11 | 12 | 13 | project_name = "default" 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY"), project_name=project_name) 15 | 16 | 17 | def fetch_trace_logs_as_jsonl() -> List[Dict]: 18 | page_size = 100 19 | query_params = QueryParams( 20 | project_name=project_name, 21 | filter_field="trace_name", 22 | filter_value="personalize_email_german", 23 | filter_operator=FilterOperator.EQUALS, 24 | page_size=page_size, 25 | status="success", 26 | ) 27 | initial_fetch = p.get_trace_logs(query_params) 28 | fetched_trace_logs = initial_fetch.results 29 | for page in range(1, initial_fetch.total_pages): 30 | query_params.page = page 31 | fetched_trace_logs.extend(p.get_trace_logs(query_params).results) 32 | return [trace_log.convert_to_jsonl_row_for_finetuning() for trace_log in fetched_trace_logs] 33 | 34 | 35 | jsonl_rows = fetch_trace_logs_as_jsonl() 36 | -------------------------------------------------------------------------------- /cookbook/guidance/tracing_guidance.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from guidance import assistant, gen, models, user 5 | 6 | from parea import Parea, trace 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY"), project_name="testing") 11 | p.auto_trace_openai_clients() 12 | 13 | 14 | gpt = models.OpenAI("gpt-3.5-turbo") 15 | 16 | 17 | @trace 18 | def guidance_program(): 19 | 20 | with user(): 21 | lm = gpt + "What is the capital of Italy?" 22 | 23 | with assistant(): 24 | out = gen("capital") 25 | lm += out 26 | 27 | with user(): 28 | lm += "What is one short surprising fact about it?" 29 | 30 | with assistant(): 31 | lm += gen("fact") 32 | 33 | print(lm) 34 | 35 | 36 | guidance_program() 37 | -------------------------------------------------------------------------------- /cookbook/instructor/instructor_blog_example_simple.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import instructor 5 | import requests 6 | from dotenv import load_dotenv 7 | from openai import OpenAI 8 | from pydantic import BaseModel, Field, field_validator 9 | 10 | from parea import Parea 11 | 12 | load_dotenv() 13 | 14 | client = OpenAI() 15 | 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 17 | p.wrap_openai_client(client, "instructor") 18 | 19 | client = instructor.from_openai(client) 20 | 21 | 22 | class Email(BaseModel): 23 | subject: str 24 | body: str = Field( 25 | ..., 26 | description="Email body, Should contain links to instructor documentation. ", 27 | ) 28 | 29 | @field_validator("body") 30 | def check_urls(cls, v): 31 | urls = re.findall(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", v) 32 | errors = [] 33 | for url in urls: 34 | if not url.startswith("https://python.useinstructor.com"): 35 | errors.append(f"URL {url} is not from useinstructor.com, Only include URLs that include use instructor.com. ") 36 | response = requests.get(url) 37 | if response.status_code != 200: 38 | errors.append(f"URL {url} returned status code {response.status_code}. Only include valid URLs that exist.") 39 | elif "404" in response.text: 40 | errors.append(f"URL {url} contained '404' in the body. Only include valid URLs that exist.") 41 | if errors: 42 | raise ValueError("\n".join(errors)) 43 | return v 44 | 45 | 46 | def main(): 47 | email = client.messages.create( 48 | model="gpt-3.5-turbo", 49 | max_tokens=1024, 50 | max_retries=3, 51 | messages=[ 52 | { 53 | "role": "user", 54 | "content": "I'm responding to a student's question. Here is the link to the documentation: {{doc_link1}} and {{doc_link2}}", 55 | } 56 | ], 57 | template_inputs={ 58 | "doc_link1": "https://python.useinstructor.com/docs/tutorial/tutorial-1", 59 | "doc_link2": "https://jxnl.github.io/docs/tutorial/tutorial-2", 60 | }, 61 | response_model=Email, 62 | ) 63 | print(email) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /cookbook/instructor/instructor_blog_example_validation_context.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | 3 | import os 4 | import re 5 | 6 | import instructor 7 | import requests 8 | from dotenv import load_dotenv 9 | from openai import OpenAI 10 | from pydantic import AfterValidator, BaseModel, ValidationInfo 11 | 12 | from parea import Parea 13 | 14 | load_dotenv() 15 | 16 | client = OpenAI() 17 | 18 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 19 | p.wrap_openai_client(client, "instructor") 20 | 21 | client = instructor.from_openai(client) 22 | 23 | 24 | def check_urls(v, info: ValidationInfo): 25 | urls = re.findall(r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", v) 26 | domain = info.context.get("domain") if info and info.context else None 27 | errors = [] 28 | for url in urls: 29 | if domain and not url.startswith(domain): 30 | errors.append(f"URL {url} is not from useinstructor.com, Only include URLs that include use instructor.com. ") 31 | response = requests.get(url) 32 | if response.status_code != 200: 33 | errors.append(f"URL {url} returned status code {response.status_code}. Only include valid URLs that exist.") 34 | elif "404" in response.text: 35 | errors.append(f"URL {url} contained '404' in the body. Only include valid URLs that exist.") 36 | if errors: 37 | raise ValueError("\n".join(errors)) 38 | return v 39 | 40 | 41 | Body = Annotated[str, AfterValidator(check_urls)] 42 | 43 | 44 | class Email(BaseModel): 45 | subject: str 46 | body: Body 47 | 48 | 49 | def main(): 50 | email = client.messages.create( 51 | model="gpt-3.5-turbo", 52 | max_tokens=1024, 53 | max_retries=3, 54 | messages=[ 55 | { 56 | "role": "user", 57 | "content": "I'm responding to a student's question. Here is the link to the documentation: {{doc_link1}} and {{doc_link2}}", 58 | } 59 | ], 60 | template_inputs={ 61 | "doc_link1": "https://python.useinstructor.com/docs/tutorial/tutorial-1", 62 | "doc_link2": "https://jxnl.github.io/docs/tutorial/tutorial-2", 63 | }, 64 | response_model=Email, 65 | validation_context={"domain": "https://python.useinstructor.com"}, 66 | ) 67 | print(email) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /cookbook/instructor/instructor_evals.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import instructor 4 | from dotenv import load_dotenv 5 | from openai import OpenAI 6 | from pydantic import BaseModel, field_validator 7 | 8 | from parea import Parea 9 | 10 | load_dotenv() 11 | 12 | client = OpenAI() 13 | 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 15 | p.wrap_openai_client(client, "instructor") 16 | 17 | client = instructor.from_openai(client) 18 | 19 | 20 | class User(BaseModel): 21 | name: str 22 | age: int 23 | 24 | @field_validator("name") 25 | def name_is_uppercase(cls, v: str): 26 | assert v.isupper(), "Name must be uppercase" 27 | return v 28 | 29 | 30 | resp = client.messages.create( 31 | model="gpt-3.5-turbo", 32 | max_tokens=1024, 33 | max_retries=3, 34 | messages=[ 35 | { 36 | "role": "user", 37 | "content": "Extract {{name}} is {{age}} years old.", 38 | } 39 | ], 40 | template_inputs={ 41 | "name": "Bobby", 42 | "age": 18, 43 | }, 44 | response_model=User, 45 | ) 46 | 47 | assert isinstance(resp, User) 48 | assert resp.name == "BOBBY" # due to validation 49 | assert resp.age == 18 50 | print(resp) 51 | -------------------------------------------------------------------------------- /cookbook/instructor/instructor_streaming.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import anthropic 4 | import instructor 5 | from dotenv import load_dotenv 6 | from openai import AsyncOpenAI 7 | from pydantic import BaseModel 8 | 9 | from parea import Parea, trace 10 | 11 | load_dotenv() 12 | 13 | oai_aclient = AsyncOpenAI() 14 | ant_client = anthropic.AsyncClient() 15 | 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 17 | 18 | p.wrap_openai_client(oai_aclient, "instructor") 19 | p.wrap_anthropic_client(ant_client) 20 | 21 | oai_aclient = instructor.from_openai(oai_aclient) 22 | ant_client = instructor.from_anthropic(ant_client) 23 | 24 | 25 | class UserDetail(BaseModel): 26 | name: str 27 | age: str 28 | 29 | 30 | @trace 31 | async def ainner_main(): 32 | user = oai_aclient.completions.create_partial( 33 | model="gpt-4o-mini", 34 | max_tokens=1024, 35 | max_retries=3, 36 | messages=[ 37 | { 38 | "role": "user", 39 | "content": "Please create a user", 40 | } 41 | ], 42 | response_model=UserDetail, 43 | ) 44 | return user 45 | 46 | 47 | async def amain(): 48 | resp = await ainner_main() 49 | async for u in resp: 50 | print(u) 51 | 52 | 53 | @trace 54 | def inner_main(): 55 | user = ant_client.completions.create_partial( 56 | model="claude-3-5-sonnet-20240620", 57 | max_tokens=1024, 58 | max_retries=3, 59 | messages=[ 60 | { 61 | "role": "user", 62 | "content": "Please create a user", 63 | } 64 | ], 65 | response_model=UserDetail, 66 | ) 67 | return user 68 | 69 | 70 | def main(): 71 | resp = inner_main() 72 | for u in resp: 73 | print(u) 74 | 75 | 76 | if __name__ == "__main__": 77 | import asyncio 78 | 79 | asyncio.run(amain()) 80 | 81 | main() 82 | -------------------------------------------------------------------------------- /cookbook/langchain/trace_class_call_method.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from langchain_core.output_parsers import StrOutputParser 5 | from langchain_core.prompts import ChatPromptTemplate 6 | from langchain_openai import ChatOpenAI 7 | 8 | from parea import Parea, trace 9 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer 10 | 11 | load_dotenv() 12 | 13 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 14 | 15 | 16 | class LangChainModule: 17 | handler = PareaAILangchainTracer() 18 | 19 | def __init__(self): 20 | self.llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY")) 21 | 22 | def get_chain(self): 23 | prompt = ChatPromptTemplate.from_messages([("user", "{input}")]) 24 | chain = prompt | self.llm | StrOutputParser() 25 | return chain 26 | 27 | @trace(name="langchain_caller_call") 28 | def __call__(self, query: str) -> str: 29 | chain = self.get_chain() 30 | return chain.invoke({"input": query}, config={"callbacks": [self.handler]}) 31 | 32 | 33 | class LLMCaller: 34 | def __init__(self, query: str): 35 | self.client = LangChainModule() 36 | self.query = query 37 | 38 | @trace(name="llm_caller_call") 39 | def __call__(self) -> str: 40 | return self.client(query=self.query) 41 | 42 | 43 | @trace 44 | def main(query: str) -> str: 45 | caller = LLMCaller(query=query) 46 | return caller() 47 | 48 | 49 | if __name__ == "__main__": 50 | result = main("Write a Hello World program in Python using FastAPI.") 51 | print(result) 52 | -------------------------------------------------------------------------------- /cookbook/langchain/trace_langchain_anthropic_function_calling.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from langchain.chains import create_extraction_chain 5 | from langchain.schema import HumanMessage 6 | from langchain_experimental.llms.anthropic_functions import AnthropicFunctions 7 | 8 | from parea import Parea 9 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer 10 | 11 | load_dotenv() 12 | 13 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 14 | 15 | model = AnthropicFunctions(model="claude-2") 16 | 17 | functions = [ 18 | { 19 | "name": "get_current_weather", 20 | "description": "Get the current weather in a given location", 21 | "parameters": { 22 | "type": "object", 23 | "properties": { 24 | "location": { 25 | "type": "string", 26 | "description": "The city and state, e.g. San Francisco, CA", 27 | }, 28 | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, 29 | }, 30 | "required": ["location"], 31 | }, 32 | } 33 | ] 34 | 35 | 36 | schema = { 37 | "properties": { 38 | "name": {"type": "string"}, 39 | "height": {"type": "integer"}, 40 | "hair_color": {"type": "string"}, 41 | }, 42 | "required": ["name", "height"], 43 | } 44 | inp = """Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex 45 | is blonde.""" 46 | 47 | chain = create_extraction_chain(schema, model) 48 | 49 | 50 | def main(): 51 | response = model.predict_messages([HumanMessage(content="whats the weater in boston?")], functions=functions, callbacks=[PareaAILangchainTracer()]) 52 | print(response) 53 | result = chain.run(inp, callbacks=[PareaAILangchainTracer()]) 54 | print(result) 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /cookbook/langchain/trace_langchain_bedrock_rag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import boto3 4 | from dotenv import load_dotenv 5 | from langchain.document_loaders import TextLoader 6 | from langchain.llms.bedrock import Bedrock 7 | from langchain.output_parsers import XMLOutputParser 8 | from langchain.prompts import PromptTemplate 9 | from langchain.schema.output_parser import StrOutputParser 10 | from langchain.text_splitter import RecursiveCharacterTextSplitter 11 | 12 | from parea import Parea 13 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer 14 | 15 | load_dotenv() 16 | 17 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 18 | handler = PareaAILangchainTracer() 19 | 20 | 21 | def get_docs(): 22 | loader = TextLoader("../assets/data/2022-letter.txt") 23 | letter = loader.load() 24 | text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=4000, chunk_overlap=100) 25 | return text_splitter.split_documents(letter) 26 | 27 | 28 | xml_parser = XMLOutputParser(tags=["insight"]) 29 | str_parser = StrOutputParser() 30 | 31 | insight_prompt = PromptTemplate( 32 | template=""" 33 | 34 | Human: 35 | {instructions} : \"{document}\" 36 | Format help: {format_instructions}. 37 | Assistant:""", 38 | input_variables=["instructions", "document"], 39 | partial_variables={"format_instructions": xml_parser.get_format_instructions()}, 40 | ) 41 | 42 | summary_prompt = PromptTemplate( 43 | template=""" 44 | 45 | Human: 46 | {instructions} : \"{document}\" 47 | Assistant:""", 48 | input_variables=["instructions", "document"], 49 | ) 50 | 51 | docs = get_docs() 52 | bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1") 53 | bedrock_llm = Bedrock( 54 | client=bedrock_client, 55 | model_id="amazon.titan-text-express-v1", 56 | model_kwargs={"maxTokenCount": 4096, "stopSequences": [], "temperature": 0, "topP": 1}, 57 | ) 58 | 59 | insight_chain = insight_prompt | bedrock_llm | StrOutputParser() 60 | summary_chain = summary_prompt | bedrock_llm | StrOutputParser() 61 | 62 | 63 | def get_insights(docs): 64 | insights = [] 65 | for i in range(len(docs)): 66 | insight = insight_chain.invoke( 67 | {"instructions": "Provide Key insights from the following text", "document": {docs[i].page_content}}, config={"callbacks": [PareaAILangchainTracer()]} 68 | ) 69 | insights.append(insight) 70 | return insights 71 | 72 | 73 | def main(): 74 | print("Starting") 75 | insights = get_insights(docs) 76 | print(insights) 77 | summary = summary_chain.invoke( 78 | { 79 | "instructions": "You will be provided with multiple sets of insights. Compile and summarize these " 80 | "insights and provide key takeaways in one concise paragraph. Do not use the original xml " 81 | "tags. Just provide a paragraph with your compiled insights.", 82 | "document": {"\n".join(insights)}, 83 | }, 84 | config={"callbacks": [PareaAILangchainTracer()]}, 85 | ) 86 | print(summary) 87 | print("Done") 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /cookbook/langchain/trace_langchain_inside_trace_decorator.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from langchain_core.output_parsers import StrOutputParser 5 | from langchain_core.prompts import ChatPromptTemplate 6 | from langchain_openai import ChatOpenAI 7 | from openai import OpenAI 8 | 9 | from parea import Parea, trace 10 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer 11 | 12 | load_dotenv() 13 | 14 | oai_client = OpenAI() 15 | 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 17 | handler = PareaAILangchainTracer() 18 | p.wrap_openai_client(oai_client) 19 | 20 | llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY")) 21 | prompt = ChatPromptTemplate.from_messages([("user", "{input}")]) 22 | chain = prompt | llm | StrOutputParser() 23 | 24 | 25 | @trace 26 | def main(): 27 | programming_language = ( 28 | oai_client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Suggest one programming languages"}]).choices[0].message.content 29 | ) 30 | 31 | return chain.invoke( 32 | {"input": f"Write a Hello World program in {programming_language}."}, 33 | config={"callbacks": [handler]}, 34 | ) 35 | 36 | 37 | if __name__ == "__main__": 38 | print(main()) 39 | -------------------------------------------------------------------------------- /cookbook/langchain/trace_langchain_rag_agents.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from langchain.agents.agent_toolkits import create_conversational_retrieval_agent, create_retriever_tool 5 | from langchain.chat_models import ChatOpenAI 6 | from langchain.document_loaders import TextLoader 7 | from langchain.embeddings import OpenAIEmbeddings 8 | from langchain.text_splitter import CharacterTextSplitter 9 | from langchain.vectorstores import FAISS 10 | 11 | from parea import Parea 12 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer 13 | 14 | load_dotenv() 15 | 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 17 | 18 | loader = TextLoader("../assets/data/state_of_the_union.txt") 19 | 20 | 21 | documents = loader.load() 22 | text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) 23 | texts = text_splitter.split_documents(documents) 24 | embeddings = OpenAIEmbeddings() 25 | db = FAISS.from_documents(texts, embeddings) 26 | retriever = db.as_retriever() 27 | tool = create_retriever_tool( 28 | retriever, 29 | "search_state_of_union", 30 | "Searches and returns documents regarding the state-of-the-union.", 31 | ) 32 | tools = [tool] 33 | 34 | 35 | llm = ChatOpenAI(temperature=0) 36 | 37 | agent_executor = create_conversational_retrieval_agent(llm, tools) 38 | 39 | 40 | def main(): 41 | result = agent_executor({"input": "what did the president say about kentaji brown jackson in the most recent state of the union?"}, callbacks=[PareaAILangchainTracer()]) 42 | print(result) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /cookbook/langchain/trace_langchain_rag_question_answering.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import bs4 4 | from dotenv import load_dotenv 5 | from langchain import hub 6 | from langchain.document_loaders import WebBaseLoader 7 | from langchain.schema import StrOutputParser 8 | from langchain.text_splitter import RecursiveCharacterTextSplitter 9 | from langchain_community.vectorstores.faiss import FAISS 10 | from langchain_core.runnables import RunnablePassthrough 11 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 12 | 13 | from parea import Parea 14 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer 15 | 16 | load_dotenv() 17 | 18 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 19 | 20 | loader = WebBaseLoader( 21 | web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), 22 | bs_kwargs=dict(parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))), 23 | ) 24 | docs = loader.load() 25 | 26 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 27 | splits = text_splitter.split_documents(docs) 28 | 29 | vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings()) 30 | retriever = vectorstore.as_retriever() 31 | 32 | prompt = hub.pull("rlm/rag-prompt") 33 | llm = ChatOpenAI(temperature=0) 34 | 35 | 36 | def format_docs(docs): 37 | return "\n\n".join(doc.page_content for doc in docs) 38 | 39 | 40 | rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() 41 | 42 | 43 | def main(): 44 | response = rag_chain.invoke("What is Task Decomposition?", config={"callbacks": [PareaAILangchainTracer()]}) 45 | print(response) 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /cookbook/langchain/trace_langchain_simple.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from dotenv import load_dotenv 5 | from langchain_core.output_parsers import StrOutputParser 6 | from langchain_core.prompts import ChatPromptTemplate 7 | from langchain_openai import ChatOpenAI 8 | 9 | from parea import Parea 10 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer 11 | 12 | load_dotenv() 13 | 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 15 | handler = PareaAILangchainTracer() 16 | 17 | llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY")) 18 | prompt = ChatPromptTemplate.from_messages([("user", "{input}")]) 19 | chain = prompt | llm | StrOutputParser() 20 | 21 | 22 | def main(): 23 | return chain.invoke( 24 | {"input": "Write a Hello World program in Python using FastAPI."}, 25 | config={"callbacks": [PareaAILangchainTracer(session_id="123", tags=["fastapi"], metadata={"key": "value"}, end_user_identifier="user123", deployment_id="456")]}, 26 | ) 27 | 28 | 29 | async def amain(): 30 | return await chain.ainvoke( 31 | {"input": "Write a Hello World program in Python using FastAPI."}, 32 | config={"callbacks": [handler]}, 33 | ) 34 | 35 | 36 | if __name__ == "__main__": 37 | print(main()) 38 | print(asyncio.run(amain())) 39 | -------------------------------------------------------------------------------- /cookbook/langchain/trace_langchain_with_deployed_prompt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from langchain_core.output_parsers import StrOutputParser 5 | from langchain_core.prompts import ChatPromptTemplate, PromptTemplate 6 | from langchain_openai import ChatOpenAI 7 | 8 | from parea import Parea 9 | from parea.schemas import UseDeployedPrompt, UseDeployedPromptResponse 10 | from parea.utils.trace_integrations.langchain import PareaAILangchainTracer 11 | 12 | load_dotenv() 13 | 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 15 | 16 | 17 | CONTEXT = """Company: Nike. 2023 18 | FORM 10-K 35 19 | OPERATING SEGMENTS 20 | As discussed in Note 15 2014 Operating Segments and Related Information in the accompanying Notes to the Consolidated Financial Statements, our operating segments are evidence of the structure of the Company's internal organization. The NIKE Brand segments are defined by geographic regions for operations participating in NIKE Brand sales activity. 21 | The breakdown of Revenues is as follows: 22 | \n\n(Dollars in millions) 23 | \n\nFISCAL 2023 FISCAL 2022 24 | \n\n% CHANGE\n\n% CHANGE EXCLUDING CURRENCY (1) CHANGES FISCAL 2021\n\n% CHANGE\n\n 25 | North America Europe, Middle East & Africa Greater China\n\n$\n\n21,608 $ 13,418 7,248\n\n18,353 12,479 7,547\n\n18 % 8 % -4 %\n\n18 % $ 21 % 4 %\n\n17,179 11,456 8,290\n\n7 % 9 % -9 %\n\nAsia Pacific & Latin America Global Brand Divisions\n\n(3)\n\n(2)\n\n6,431 58\n\n5,955 102\n\n8 % -43 %\n\n17 % -43 %\n\n5,343 25\n\n11 % 308 %\n\nTOTAL NIKE BRAND Converse\n\n$\n\n48,763 $ 2,427\n\n44,436 2,346\n\n10 % 3 %\n\n16 % $ 8 %\n\n42,293 2,205\n\n5 % 6 %\n\n(4)\n\nCorporate TOTAL NIKE, INC. REVENUES\n\n$\n\n27\n\n51,217 $\n\n(72) 46,710\n\n— 10 %\n\n— 16 % $\n\n40 44,538\n\n— 5 %""" 26 | 27 | 28 | def get_answer_prompt() -> ChatPromptTemplate: 29 | # fetched.prompt.raw_messages = [ 30 | # { 31 | # "content": "Use the following pieces of context from Nike's financial 10k filings dataset to answer the question. " 32 | # "Do not make up an answer if no context is provided to help answer it." 33 | # "\n\nContext:\n---------\n{context}\n\n---------\nQuestion: {question}\n---------\n\nAnswer:", 34 | # "role": "user", 35 | # } 36 | # ] 37 | fetched: UseDeployedPromptResponse = p.get_prompt(UseDeployedPrompt(deployment_id="p-JTDYylldIrMbMisT70DJZ")) 38 | # use the raw messages since it has the templated variables which will be filled in when we invoke the prompt 39 | answer_prompt = ChatPromptTemplate.from_messages([(message["role"], message["content"]) for message in fetched.prompt.raw_messages]) 40 | return answer_prompt 41 | 42 | 43 | def get_summary_prompt() -> PromptTemplate: 44 | # fetched.prompt.raw_messages = [{'content': 'Compile and summarize the following content: {content}', 'role': 'user'}] 45 | fetched: UseDeployedPromptResponse = p.get_prompt(UseDeployedPrompt(deployment_id="p-OGWAo6yvVKr1hUBY6bmHw")) 46 | # use the raw messages since it has the templated variables which will be filled in when we invoke the prompt 47 | summary_prompt = PromptTemplate( 48 | template=fetched.prompt.raw_messages[0]["content"], 49 | input_variables=["content"], 50 | ) 51 | return summary_prompt 52 | 53 | 54 | def main(question): 55 | llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY")) 56 | answer_prompt = get_answer_prompt() 57 | summary_prompt = get_summary_prompt() 58 | answer_chain = answer_prompt | llm | StrOutputParser() 59 | summary_chain = summary_prompt | llm | StrOutputParser() 60 | answer = answer_chain.invoke( 61 | { 62 | "context": CONTEXT, 63 | "question": question, 64 | }, 65 | config={"callbacks": [PareaAILangchainTracer(deployment_id="p-JTDYylldIrMbMisT70DJZ")]}, 66 | ) 67 | summary = summary_chain.invoke( 68 | {"content": answer}, 69 | config={"callbacks": [PareaAILangchainTracer(deployment_id="p-OGWAo6yvVKr1hUBY6bmHw")]}, 70 | ) 71 | return summary 72 | 73 | 74 | if __name__ == "__main__": 75 | response = main(question="Which operating segment contributed least to total Nike brand revenue in fiscal 2023?") 76 | print(response) 77 | -------------------------------------------------------------------------------- /cookbook/marvin/trace_marvin.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import marvin 4 | from dotenv import load_dotenv 5 | from pydantic import BaseModel, Field 6 | 7 | from parea import Parea 8 | 9 | load_dotenv() 10 | 11 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 12 | p.auto_trace_openai_clients("marvin") 13 | 14 | 15 | class Location(BaseModel): 16 | city: str 17 | state: str = Field(description="2-letter abbreviation") 18 | 19 | 20 | result = marvin.cast("the big apple", Location) 21 | print(result) 22 | -------------------------------------------------------------------------------- /cookbook/openai/dynamic_few_shot_injection_with_evals.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import os 4 | import random 5 | 6 | from dotenv import load_dotenv 7 | from openai import OpenAI 8 | from pydantic import BaseModel 9 | 10 | from parea import Parea, get_current_trace_id, trace, trace_insert 11 | from parea.schemas import Log, TestCase 12 | 13 | load_dotenv() 14 | 15 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 17 | p.wrap_openai_client(client) 18 | 19 | NUM_INTERACTIONS = 5 20 | 21 | 22 | class Person(BaseModel): 23 | name: str 24 | email: str 25 | 26 | 27 | class Email(BaseModel): 28 | contact: Person 29 | email_sent: str 30 | 31 | 32 | mock_DB: dict[str, Email] = {} 33 | 34 | 35 | def call_llm(messages: List[dict], model: str = "gpt-4o", temperature: float = 0.0) -> str: 36 | return client.chat.completions.create(model=model, temperature=temperature, messages=messages).choices[0].message.content 37 | 38 | 39 | def eval_func(log: Log) -> float: 40 | return random.uniform(0, 1) 41 | 42 | 43 | # Imitate collecting few shot examples from prod based on user feedback 44 | @trace(eval_funcs=[eval_func]) 45 | def email_writer(main_objective: str, contact: Person, few_shot_examples: Optional[List[str]] = None) -> str: 46 | trace_insert({"end_user_identifier": contact.name, "metadata": {"has_few_shot_examples": bool(few_shot_examples)}}) 47 | 48 | few_shot_examples_prompt = ("\nHere are some examples of good emails\n" + "\n".join(few_shot_examples)) if few_shot_examples else "" 49 | messages = [ 50 | { 51 | "role": "system", 52 | "content": f"You are an AI who performs an email writing task based on the following objective: {main_objective}", 53 | }, 54 | { 55 | "role": "user", 56 | "content": f""" 57 | Your email is from: {contact.model_dump()} 58 | {few_shot_examples_prompt if few_shot_examples else ""} 59 | Email: 60 | """, 61 | }, 62 | ] 63 | response = call_llm(messages) 64 | trace_id = get_current_trace_id() 65 | # insert into mock_DB 66 | mock_DB[trace_id] = Email(contact=contact, email_sent=response) 67 | return response 68 | 69 | 70 | def mimic_prod(few_shot_limit: int = 3): 71 | contact = Person(name="John Doe", email="jdoe@email.com") 72 | dataset = p.get_collection("Good_Email_Examples") 73 | selected_few_shot_examples = None 74 | if dataset: 75 | testcases: list[TestCase] = list(dataset.test_cases.values()) 76 | few_shot_examples = [case.inputs["email"] for case in testcases if case.inputs["user"] == contact.name] 77 | # This is simply taking most recent n examples. You can imagine adding additional logic to the dataset 78 | # that allows you to rank the examples based on some criteria 79 | selected_few_shot_examples = few_shot_examples[-few_shot_limit:] if few_shot_examples else None 80 | for interaction in range(NUM_INTERACTIONS): 81 | email = email_writer("Convincing email to gym to cancel membership early.", contact, selected_few_shot_examples) 82 | print(email) 83 | 84 | 85 | def add_good_email_example_to_dataset(user_name, email): 86 | # Note: if the test case collection doesn't exist, we will create a new collection with the provided name and data 87 | p.add_test_cases([{"user": user_name, "email": email}], name="Good_Email_Examples") 88 | 89 | 90 | def mimic_prod_checking_eval_scores(): 91 | # imagine the trace_id of the email is stored in state in the UI, so when the user provides feedback, we can use it 92 | trace_ids = mock_DB.keys() 93 | for trace_id in trace_ids: 94 | scores = p.get_trace_log_scores(trace_id) 95 | for score in scores: 96 | if score.name == "eval_func" and score.score >= 0.5: 97 | add_good_email_example_to_dataset(mock_DB[trace_id].contact.name, mock_DB[trace_id].email_sent) 98 | break 99 | 100 | 101 | if __name__ == "__main__": 102 | mimic_prod() 103 | mimic_prod_checking_eval_scores() 104 | # future llm calls will now have few-shot examples from the feedback collection 105 | mimic_prod() 106 | print("Done") 107 | -------------------------------------------------------------------------------- /cookbook/openai/simple_experiment_with_openai.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import os 4 | 5 | from dotenv import load_dotenv 6 | from openai import OpenAI 7 | 8 | from parea import Parea, trace 9 | from parea.schemas import Log 10 | 11 | load_dotenv() 12 | 13 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 15 | p.wrap_openai_client(client) 16 | 17 | 18 | def eval_func(log: Log) -> float: 19 | from random import random 20 | from time import sleep 21 | 22 | sleep(random() * 10) 23 | return random() 24 | 25 | 26 | @trace(eval_funcs=[eval_func]) 27 | def func(topic: str) -> dict[str, Union[str, None]]: 28 | return { 29 | "data": ( 30 | client.chat.completions.create( 31 | model="gpt-4-turbo", 32 | messages=[ 33 | { 34 | "role": "user", 35 | "content": f"Write a short haiku about {topic}", 36 | } 37 | ], 38 | ) 39 | .choices[0] 40 | .message.content 41 | ) 42 | } 43 | 44 | 45 | if __name__ == "__main__": 46 | p.experiment( 47 | name="hello-world-example-ch", 48 | data=[{"topic": "Fish"}, {"topic": "Python"}], 49 | func=func, 50 | ).run() 51 | -------------------------------------------------------------------------------- /cookbook/openai/trace_class_call_method.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import os 4 | 5 | from dotenv import load_dotenv 6 | from openai import OpenAI 7 | 8 | from parea import Parea, trace 9 | 10 | load_dotenv() 11 | 12 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 13 | 14 | 15 | class LLMCaller: 16 | def __init__(self, messages: List[dict[str, str]]): 17 | self.messages = messages 18 | self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 19 | p.wrap_openai_client(self.client) 20 | 21 | @trace 22 | def __call__(self, model: str = "gpt-4o", temperature: float = 0.0) -> str: 23 | return self.client.chat.completions.create(model=model, temperature=temperature, messages=self.messages).choices[0].message.content 24 | 25 | 26 | @trace 27 | def main(topic: str) -> str: 28 | caller = LLMCaller( 29 | messages=[ 30 | {"role": "system", "content": "You are a debater making an argument on a topic."}, 31 | {"role": "user", "content": f"The discussion topic is {topic}"}, 32 | ] 33 | ) 34 | return caller() 35 | 36 | 37 | if __name__ == "__main__": 38 | result = main("The impact of climate change on the economy") 39 | print(result) 40 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_azure_open_ai.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from dotenv import load_dotenv 5 | from openai.lib.azure import AsyncAzureOpenAI, AzureOpenAI 6 | 7 | from cookbook.assets.data.openai_input_examples import functions_example, simple_example 8 | from parea import Parea, trace 9 | 10 | load_dotenv() 11 | 12 | client = AzureOpenAI( 13 | api_version="2023-12-01-preview", 14 | api_key=os.getenv("AZURE_OAI_API_KEY"), 15 | azure_endpoint=os.getenv("AZURE_OAI_ENDPOINT"), 16 | ) 17 | aclient = AsyncAzureOpenAI( 18 | api_version="2023-12-01-preview", 19 | api_key=os.getenv("AZURE_OAI_API_KEY"), 20 | azure_endpoint=os.getenv("AZURE_OAI_ENDPOINT"), 21 | ) 22 | 23 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 24 | p.wrap_openai_client(client) 25 | p.wrap_openai_client(aclient) 26 | 27 | 28 | @trace 29 | def call_azure(data: dict): 30 | response = client.chat.completions.create(**data) 31 | print(response) 32 | 33 | 34 | @trace 35 | def call_azure_stream(data: dict): 36 | data["stream"] = True 37 | stream = client.chat.completions.create(**data) 38 | for chunk in stream: 39 | if chunk.choices: 40 | print(chunk.choices[0].delta or "") 41 | 42 | 43 | @trace 44 | async def acall_azure(data: dict): 45 | response = await aclient.chat.completions.create(**data) 46 | print(response) 47 | 48 | 49 | @trace 50 | async def acall_azure_stream(data: dict): 51 | data["stream"] = True 52 | stream = await aclient.chat.completions.create(**data) 53 | async for chunk in stream: 54 | if chunk.choices: 55 | print(chunk.choices[0].delta or "") 56 | 57 | 58 | if __name__ == "__main__": 59 | azure_model = "AZURE_MODEL_NAME" # replace with your model name 60 | functions_example["model"] = azure_model 61 | simple_example["model"] = azure_model 62 | call_azure(functions_example) 63 | # call_azure_stream(simple_example) 64 | # call_azure_stream(functions_example) 65 | asyncio.run(acall_azure(simple_example)) 66 | # asyncio.run(acall_azure(functions_example)) 67 | # asyncio.run(acall_azure_stream(simple_example)) 68 | asyncio.run(acall_azure_stream(functions_example)) 69 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_open_ai_streams.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from dotenv import load_dotenv 5 | from openai import AsyncOpenAI, OpenAI 6 | 7 | from cookbook.assets.data.openai_input_examples import functions_example, simple_example_json 8 | from parea import Parea, trace 9 | 10 | load_dotenv() 11 | 12 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) 13 | aclient = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) 14 | 15 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 16 | p.wrap_openai_client(client) 17 | p.wrap_openai_client(aclient) 18 | 19 | 20 | @trace 21 | def _call_openai_stream(data: dict): 22 | data["stream"] = True 23 | stream = client.chat.completions.create(**data) 24 | for chunk in stream: 25 | yield chunk 26 | 27 | 28 | def call_openai_stream(data: dict): 29 | stream = _call_openai_stream(data) 30 | for chunk in stream: 31 | print(chunk.choices[0].delta or "") 32 | 33 | 34 | @trace 35 | async def acall_openai_stream(data: dict): 36 | data["stream"] = True 37 | stream = await aclient.chat.completions.create(**data) 38 | async for chunk in stream: 39 | print(chunk.choices[0].delta or "") 40 | 41 | 42 | if __name__ == "__main__": 43 | # call_openai_stream(simple_example) 44 | call_openai_stream(simple_example_json) 45 | # call_openai_stream(functions_example) 46 | # asyncio.run(acall_openai_stream(simple_example)) 47 | asyncio.run(acall_openai_stream(functions_example)) 48 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_openai_assistant_endpoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import openai 5 | from dotenv import load_dotenv 6 | from openai.pagination import SyncCursorPage 7 | from openai.types.beta import Thread 8 | from openai.types.beta.threads import Message, Run 9 | 10 | from parea import Parea, trace 11 | 12 | load_dotenv() 13 | 14 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 15 | client = openai.OpenAI() 16 | p.wrap_openai_client(client) 17 | 18 | QUESTIONS = ["I need to solve the equation `3x + 11 = 14`. Can you help me?", "Could you explain linear algebra to me?", "I don't like math. What can I do?"] 19 | 20 | 21 | def pretty_print(messages): 22 | print("# Messages") 23 | for m in messages: 24 | print(f"{m.role}: {m.content[0].text.value}") 25 | print() 26 | 27 | 28 | @trace 29 | def create_assistant(instructions: str): 30 | return client.beta.assistants.create( 31 | name="Math Tutor", 32 | instructions=instructions, 33 | tools=[{"type": "code_interpreter"}], 34 | model="gpt-4-turbo-preview", 35 | ) 36 | 37 | 38 | @trace 39 | def submit_message(assistant_id: str, thread_id: str, user_message: str) -> Run: 40 | client.beta.threads.messages.create(thread_id=thread_id, role="user", content=user_message) 41 | return client.beta.threads.runs.create( 42 | thread_id=thread_id, 43 | assistant_id=assistant_id, 44 | ) 45 | 46 | 47 | @trace 48 | def get_response(thread_id: str) -> SyncCursorPage[Message]: 49 | return client.beta.threads.messages.list(thread_id=thread_id, order="asc") 50 | 51 | 52 | @trace 53 | def create_thread_and_run(assistant_id: str, user_input: str) -> (Thread, Run): 54 | thread = client.beta.threads.create() 55 | run = submit_message(assistant_id, thread.id, user_input) 56 | return thread, run 57 | 58 | 59 | @trace 60 | def wait_on_run(run: Run, thread: Thread) -> Run: 61 | while run.status == "queued" or run.status == "in_progress": 62 | run = client.beta.threads.runs.retrieve( 63 | thread_id=thread.id, 64 | run_id=run.id, 65 | ) 66 | time.sleep(0.5) 67 | return run 68 | 69 | 70 | @trace 71 | def run_until_complete(assistant_id: str, run_instructions: str) -> SyncCursorPage[Message]: 72 | thread, run = create_thread_and_run(assistant_id, run_instructions) 73 | wait_on_run(run, thread) 74 | response = get_response(thread.id) 75 | pretty_print(response) 76 | return response 77 | 78 | 79 | @trace 80 | def main(assistant_instructions: str) -> SyncCursorPage[Message]: 81 | assistant = create_assistant(assistant_instructions) 82 | response = None 83 | for question in QUESTIONS: 84 | response = run_until_complete(assistant.id, question) 85 | return response 86 | 87 | 88 | if __name__ == "__main__": 89 | main("You are a personal math tutor. Write and run code to answer math questions.") 90 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_templated_llm_calls.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from openai import OpenAI 5 | 6 | from parea import Parea 7 | 8 | load_dotenv() 9 | 10 | client = OpenAI() 11 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 12 | p.wrap_openai_client(client) 13 | 14 | response = client.chat.completions.create( 15 | model="gpt-4", 16 | messages=[ 17 | {"role": "user", "content": "Make up {{number}} people."}, 18 | ], 19 | template_inputs={"number": "three"}, # with Parea wrapper, we can specify template_inputs which will appear as inputs and are used to fill-in the templated messages 20 | metadata={"template_id": "make-up-people-v1"}, # via Parea wrapper, can associate request with any metadata 21 | ) 22 | print(response.choices[0].message.content) 23 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_tool_calling.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from openai import OpenAI 5 | 6 | from parea import Parea 7 | 8 | load_dotenv() 9 | 10 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) 11 | 12 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 13 | p.wrap_openai_client(client) 14 | 15 | 16 | def main(): 17 | tools = [ 18 | { 19 | "type": "function", 20 | "function": { 21 | "name": "get_current_weather", 22 | "description": "Get the current weather in a given location", 23 | "parameters": { 24 | "type": "object", 25 | "properties": { 26 | "location": { 27 | "type": "string", 28 | "description": "The city and state, e.g. San Francisco, CA", 29 | }, 30 | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, 31 | }, 32 | "required": ["location"], 33 | }, 34 | }, 35 | } 36 | ] 37 | messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] 38 | completion = client.chat.completions.create( 39 | model="gpt-4o", 40 | messages=messages, 41 | tools=tools, 42 | tool_choice="auto", 43 | ) 44 | messages.append({k: v for k, v in completion.choices[0].message.model_dump().items() if v is not None}) 45 | # messages.append(completion.choices[0].message) 46 | messages.append({"role": "tool", "content": "5 Celcius", "tool_call_id": completion.choices[0].message.tool_calls[0].id}) 47 | messages.append( 48 | { 49 | "role": "user", 50 | "content": "What's the weather like in Boston today?", 51 | } 52 | ) 53 | 54 | final_completion = client.chat.completions.create( 55 | model="gpt-4o", 56 | messages=messages, 57 | tools=tools, 58 | tool_choice="auto", 59 | ) 60 | 61 | print(final_completion) 62 | 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_with_images_open_ai.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import json 4 | import os 5 | 6 | from dotenv import load_dotenv 7 | from openai import OpenAI 8 | 9 | from parea import Parea, trace, trace_insert 10 | from parea.schemas import TraceLogImage 11 | 12 | load_dotenv() 13 | 14 | 15 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 16 | 17 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 18 | p.wrap_openai_client(client) 19 | 20 | 21 | @trace 22 | def image_maker(query: str) -> str: 23 | response = client.images.generate(prompt=query, model="dall-e-3") 24 | image_url = response.data[0].url 25 | caption = {"original_prompt": query, "revised_prompt": response.data[0].revised_prompt} 26 | trace_insert({"images": [TraceLogImage(url=image_url, caption=json.dumps(caption))]}) 27 | return image_url 28 | 29 | 30 | @trace 31 | def ask_vision(image_url: str) -> Optional[str]: 32 | response = client.chat.completions.create( 33 | model="gpt-4o", 34 | messages=[ 35 | { 36 | "role": "user", 37 | "content": [ 38 | {"type": "text", "text": "What’s in this image?"}, 39 | {"type": "image_url", "image_url": {"url": image_url}}, 40 | ], 41 | } 42 | ], 43 | max_tokens=300, 44 | ) 45 | return response.choices[0].message.content 46 | 47 | 48 | @trace 49 | def main(query: str) -> str: 50 | image_url = image_maker(query) 51 | return ask_vision(image_url) 52 | 53 | 54 | if __name__ == "__main__": 55 | result = main("A cat sitting comfortably on a chair") 56 | print(result) 57 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_with_open_ai_endpoint_directly.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import os 4 | from datetime import datetime 5 | 6 | from dotenv import load_dotenv 7 | from openai import OpenAI 8 | 9 | from parea import Parea, get_current_trace_id, trace, trace_insert 10 | from parea.schemas import FeedbackRequest 11 | 12 | load_dotenv() 13 | 14 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 15 | 16 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 17 | p.wrap_openai_client(client) 18 | 19 | 20 | def call_llm(data: List[dict], model: str = "gpt-4o", temperature: float = 0.0) -> str: 21 | return client.chat.completions.create(model=model, temperature=temperature, messages=data).choices[0].message.content 22 | 23 | 24 | @trace 25 | def argumentor(query: str, additional_description: str = "") -> str: 26 | return call_llm( 27 | [ 28 | { 29 | "role": "system", 30 | "content": f"""You are a debater making an argument on a topic. {additional_description}. 31 | The current time is {datetime.now().strftime("%Y-%m-%d")}""", 32 | }, 33 | {"role": "user", "content": f"The discussion topic is {query}"}, 34 | ] 35 | ) 36 | 37 | 38 | @trace 39 | def critic(argument: str) -> str: 40 | return call_llm( 41 | [ 42 | { 43 | "role": "system", 44 | "content": f"""You are a critic. 45 | What unresolved questions or criticism do you have after reading the following argument? 46 | Provide a concise summary of your feedback.""", 47 | }, 48 | {"role": "user", "content": argument}, 49 | ] 50 | ) 51 | 52 | 53 | @trace 54 | def refiner(query: str, additional_description: str, argument: str, criticism: str) -> str: 55 | return call_llm( 56 | [ 57 | { 58 | "role": "system", 59 | "content": f"""You are a debater making an argument on a topic. {additional_description}. 60 | The current time is {datetime.now().strftime("%Y-%m-%d")}""", 61 | }, 62 | {"role": "user", "content": f"""The discussion topic is {query}"""}, 63 | {"role": "assistant", "content": argument}, 64 | {"role": "user", "content": criticism}, 65 | { 66 | "role": "system", 67 | "content": "Please generate a new argument that incorporates the feedback from the user.", 68 | }, 69 | ], 70 | ) 71 | 72 | 73 | @trace 74 | def argument_chain(query: str, additional_description: str = "") -> Tuple[str, str]: 75 | trace_id = get_current_trace_id() 76 | trace_insert({"session_id": "cus_1234", "end_user_identifier": "user_1234"}, trace_id) 77 | argument = argumentor(query, additional_description) 78 | criticism = critic(argument) 79 | refined_argument = refiner(query, additional_description, argument, criticism) 80 | return refined_argument, trace_id 81 | 82 | 83 | @trace(session_id="cus_1234", end_user_identifier="user_1234") 84 | def json_call() -> str: 85 | completion = client.chat.completions.create( 86 | model="gpt-4o", 87 | messages=[{"role": "system", "content": "You are a helpful assistant talking in JSON."}, {"role": "user", "content": "What are you?"}], 88 | response_format={"type": "json_object"}, 89 | ) 90 | return completion.choices[0].message.content 91 | 92 | 93 | if __name__ == "__main__": 94 | result, trace_id = argument_chain( 95 | "Whether sparkling wine is good for you.", 96 | additional_description="Provide a concise, few sentence argument on why sparkling wine is good for you.", 97 | ) 98 | print(result) 99 | p.record_feedback( 100 | FeedbackRequest( 101 | trace_id=trace_id, 102 | score=0.7, # 0.0 (bad) to 1.0 (good) 103 | ) 104 | ) 105 | 106 | print(json_call()) 107 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_with_openai_requests_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import httpx 4 | from dotenv import load_dotenv 5 | 6 | from cookbook.assets.data.openai_input_examples import functions_example, simple_example, tool_calling_example 7 | from parea import Parea, aprocess_stream_and_yield, convert_openai_raw_to_log, process_stream_and_yield, trace 8 | from parea.wrapper import get_formatted_openai_response 9 | 10 | load_dotenv() 11 | 12 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 13 | 14 | URL = "https://api.openai.com/v1/chat/completions" 15 | HEADERS = { 16 | "Content-Type": "application/json", 17 | "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}", 18 | } 19 | TIMEOUT = None 20 | 21 | # Sync HTTPX 22 | 23 | 24 | ## Normal 25 | @trace 26 | def call_openai_api(data: dict) -> str: 27 | with httpx.Client(timeout=TIMEOUT) as client: 28 | response = client.post(URL, json=data, headers=HEADERS) 29 | r = response.json() 30 | convert_openai_raw_to_log(r, data) # Add this line to enable tracing. Non-blocking 31 | return get_formatted_openai_response(r) # Return how you normally would 32 | 33 | 34 | ## Streaming 35 | @trace 36 | def call_openai_api_stream(data: dict): 37 | data["stream"] = True 38 | with httpx.stream("POST", URL, json=data, headers=HEADERS, timeout=TIMEOUT) as response: 39 | # Add process_stream_and_yield to enable tracing. Non-blocking 40 | for chunk in process_stream_and_yield(response, data): 41 | print(chunk) 42 | 43 | 44 | # Async HTTPX 45 | 46 | 47 | ## Normal 48 | @trace 49 | async def acall_openai_api(data: dict) -> str: 50 | async with httpx.AsyncClient(timeout=TIMEOUT) as client: 51 | response = await client.post(URL, json=data, headers=HEADERS) 52 | r = response.json() 53 | convert_openai_raw_to_log(r, data) # Add this line to enable tracing. Non-blocking 54 | return get_formatted_openai_response(r) # Return how you normally would 55 | 56 | 57 | ## Streaming 58 | @trace 59 | async def acall_openai_api_stream(data: dict): 60 | data["stream"] = True 61 | async with httpx.AsyncClient(timeout=TIMEOUT).stream("POST", URL, json=data, headers=HEADERS) as response: 62 | # Add process_stream_and_yield to enable tracing. Non-blocking 63 | async for chunk in aprocess_stream_and_yield(response, data): 64 | print(chunk) 65 | 66 | 67 | # TEST NESTED TRACING 68 | @trace 69 | def chain(): 70 | call_openai_api(simple_example) 71 | call_openai_api(functions_example) 72 | call_openai_api(tool_calling_example) 73 | call_openai_api_stream(tool_calling_example) 74 | 75 | 76 | @trace 77 | async def achain(): 78 | await acall_openai_api(simple_example) 79 | await acall_openai_api(functions_example) 80 | await acall_openai_api(tool_calling_example) 81 | await acall_openai_api_stream(tool_calling_example) 82 | 83 | 84 | if __name__ == "__main__": 85 | chain() 86 | # asyncio.run(achain()) 87 | -------------------------------------------------------------------------------- /cookbook/openai/tracing_with_openai_with_structured_output.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from openai import OpenAI 5 | from pydantic import BaseModel 6 | 7 | from parea import Parea 8 | 9 | load_dotenv() 10 | 11 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 12 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 13 | p.wrap_openai_client(client) 14 | 15 | 16 | class CalendarEvent(BaseModel): 17 | name: str 18 | date: str 19 | participants: list[str] 20 | 21 | 22 | def with_pydantic(): 23 | completion = client.beta.chat.completions.parse( 24 | model="gpt-4o-2024-08-06", 25 | messages=[ 26 | {"role": "system", "content": "Extract the event information."}, 27 | {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."}, 28 | ], 29 | response_format=CalendarEvent, 30 | ) 31 | event = completion.choices[0].message.parsed 32 | print(event) 33 | 34 | 35 | def with_json_schema(): 36 | response = client.chat.completions.create( 37 | model="gpt-4o-2024-08-06", 38 | messages=[ 39 | {"role": "system", "content": "You are a helpful math tutor. Guide the user through the solution step by step."}, 40 | {"role": "user", "content": "how can I solve 8x + 7 = -23"}, 41 | ], 42 | response_format={ 43 | "type": "json_schema", 44 | "json_schema": { 45 | "name": "math_response", 46 | "schema": { 47 | "type": "object", 48 | "properties": { 49 | "steps": { 50 | "type": "array", 51 | "items": { 52 | "type": "object", 53 | "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}}, 54 | "required": ["explanation", "output"], 55 | "additionalProperties": False, 56 | }, 57 | }, 58 | "final_answer": {"type": "string"}, 59 | }, 60 | "required": ["steps", "final_answer"], 61 | "additionalProperties": False, 62 | }, 63 | "strict": True, 64 | }, 65 | }, 66 | ) 67 | print(response.choices[0].message.content) 68 | 69 | 70 | def with_tools(): 71 | tools = [ 72 | { 73 | "type": "function", 74 | "function": { 75 | "name": "get_delivery_date", 76 | "description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'", 77 | "parameters": { 78 | "type": "object", 79 | "properties": { 80 | "order_id": { 81 | "type": "string", 82 | "description": "The customer's order ID.", 83 | }, 84 | }, 85 | "required": ["order_id"], 86 | "additionalProperties": False, 87 | }, 88 | }, 89 | "strict": True, 90 | } 91 | ] 92 | 93 | messages = [ 94 | {"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."}, 95 | {"role": "user", "content": "Hi, can you tell me the delivery date for my order with id 5?"}, 96 | ] 97 | 98 | response = client.chat.completions.create( 99 | model="gpt-4o-2024-08-06", 100 | messages=messages, 101 | tools=tools, 102 | ) 103 | print(response.choices[0].message.tool_calls) 104 | 105 | 106 | if __name__ == "__main__": 107 | with_pydantic() 108 | with_json_schema() 109 | with_tools() 110 | -------------------------------------------------------------------------------- /cookbook/parea_llm_proxy/deployments/fetching_and_using_parea_deployments.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea 6 | from parea.schemas.models import Completion, CompletionResponse, UseDeployedPrompt, UseDeployedPromptResponse 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | 13 | def main() -> CompletionResponse: 14 | return p.completion(Completion(deployment_id="p-4cbYJ0LIy0gaWb6Z819k7", llm_inputs={"x": "python", "y": "fastapi"})) 15 | 16 | 17 | def get_critic_prompt(val: str) -> UseDeployedPromptResponse: 18 | return p.get_prompt(UseDeployedPrompt(deployment_id="p-87NFVeQg30Hk2Hatw1h72", llm_inputs={"x": val})) 19 | 20 | 21 | if __name__ == "__main__": 22 | print(get_critic_prompt("Python")) 23 | # a = UseDeployedPromptResponse( 24 | # deployment_id="p-87NFVeQg30Hk2Hatw1h72", 25 | # name="deploy-test", 26 | # functions=[], 27 | # function_call=None, 28 | # prompt=Prompt( 29 | # raw_messages=[{"role": "user", "content": "Write a hello world program in {{x}}"}], 30 | # messages=[{"content": "Write a hello world program in Python", "role": "user"}], 31 | # inputs={"x": "Python"}, 32 | # ), 33 | # model="gpt-3.5-turbo-0125", 34 | # provider="openai", 35 | # model_params={"temp": 0.0, "top_p": 1.0, "max_length": None, "presence_penalty": 0.0, "frequency_penalty": 0.0}, 36 | # ) 37 | -------------------------------------------------------------------------------- /cookbook/parea_llm_proxy/dynamic_few_shot_injection.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import os 4 | import random 5 | from datetime import datetime 6 | 7 | from dotenv import load_dotenv 8 | from pydantic import BaseModel 9 | 10 | from parea import Parea, trace, trace_insert 11 | from parea.schemas import Completion, CompletionResponse, FeedbackRequest, LLMInputs, Message, Role, TestCase 12 | 13 | load_dotenv() 14 | 15 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 16 | 17 | NUM_INTERACTIONS = 5 18 | 19 | 20 | class Person(BaseModel): 21 | name: str 22 | email: str 23 | 24 | 25 | class Email(BaseModel): 26 | contact: Person 27 | email_sent: str 28 | 29 | 30 | mock_DB: dict[str, Email] = {} 31 | 32 | 33 | def call_llm(messages: List[Message]) -> CompletionResponse: 34 | return p.completion(Completion(llm_configuration=LLMInputs(model="gpt-4o", messages=messages))) 35 | 36 | 37 | # Imitate collecting few shot examples from prod based on user feedback 38 | @trace 39 | def email_writer(main_objective: str, contact: Person, few_shot_examples: Optional[List[str]] = None) -> str: 40 | trace_insert({"end_user_identifier": contact.name, "metadata": {"has_few_shot_examples": bool(few_shot_examples)}}) 41 | 42 | few_shot_examples_prompt = ("\nHere are some examples of good emails\n" + "\n".join(few_shot_examples)) if few_shot_examples else "" 43 | messages = [ 44 | Message( 45 | role=Role.system, 46 | content=f"You are an AI who performs an email writing task based on the following objective: {main_objective}", 47 | ), 48 | # added date to prompt to avoid cache 49 | Message( 50 | role=Role.user, 51 | content=f""" 52 | Your email is from: {contact.model_dump()} 53 | Today's date is: {datetime.now().isoformat()} 54 | {few_shot_examples_prompt if few_shot_examples else ""} 55 | Email: 56 | """, 57 | ), 58 | ] 59 | response: CompletionResponse = call_llm(messages) 60 | trace_id = response.inference_id 61 | # insert into mock_DB 62 | mock_DB[trace_id] = Email(contact=contact, email_sent=response.content) 63 | return response.content 64 | 65 | 66 | def mimic_prod(few_shot_limit: int = 3): 67 | contact = Person(name="John Doe", email="jdoe@email.com") 68 | dataset = p.get_collection("Good_Email_Examples") 69 | selected_few_shot_examples = None 70 | if dataset: 71 | testcases: list[TestCase] = list(dataset.test_cases.values()) 72 | few_shot_examples = [case.inputs["email"] for case in testcases if case.inputs["user"] == contact.name] 73 | # This is simply taking most recent n examples. You can imagine adding additional logic to the dataset 74 | # that allows you to rank the examples based on some criteria 75 | selected_few_shot_examples = few_shot_examples[-few_shot_limit:] if few_shot_examples else None 76 | for interaction in range(NUM_INTERACTIONS): 77 | email = email_writer("Convincing email to gym to cancel membership early.", contact, selected_few_shot_examples) 78 | print(email) 79 | 80 | 81 | def add_good_email_example_to_dataset(user_name, email): 82 | # Note: if the test case collection doesn't exist, we will create a new collection with the provided name and data 83 | p.add_test_cases([{"user": user_name, "email": email}], name="Good_Email_Examples") 84 | 85 | 86 | def mimic_prod_feedback_collection(): 87 | # imagine the trace_id of the email is stored in state in the UI, so when the user provides feedback, we can use it 88 | trace_ids = mock_DB.keys() 89 | for trace_id in trace_ids: 90 | score = random.uniform(0, 1) 91 | p.record_feedback(FeedbackRequest(trace_id=trace_id, score=score)) 92 | # if the feedback is good, add it to the dataset to use later as a few-shot example 93 | if score >= 0.5: 94 | add_good_email_example_to_dataset(mock_DB[trace_id].contact.name, mock_DB[trace_id].email_sent) 95 | 96 | 97 | if __name__ == "__main__": 98 | mimic_prod() 99 | mimic_prod_feedback_collection() 100 | # future llm calls will now have few-shot examples from the feedback collection 101 | mimic_prod() 102 | print("Done") 103 | -------------------------------------------------------------------------------- /cookbook/parea_llm_proxy/tracing_with_agent.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple 2 | 3 | import os 4 | import random 5 | from datetime import datetime 6 | 7 | import pytz 8 | from dotenv import load_dotenv 9 | 10 | from parea import Parea, get_current_trace_id, trace 11 | from parea.schemas import Completion, CompletionResponse, FeedbackRequest, LLMInputs, Message, ModelParams, Role 12 | 13 | load_dotenv() 14 | 15 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 16 | 17 | # Parea SDK makes it easy to use different LLMs with the same apis structure and standardized request/response schemas. 18 | LLM_OPTIONS = [("gpt-3.5-turbo-0125", "openai"), ("gpt-4o", "openai"), ("claude-3-haiku-20240307", "anthropic"), ("claude-3-opus-20240229", "anthropic")] 19 | LIMIT = 1 20 | 21 | 22 | def dump_task(task): 23 | d = "" 24 | for tasklet in task: 25 | d += f"\n{tasklet.get('task_name','')}" 26 | d = d.strip() 27 | return d 28 | 29 | 30 | def call_llm( 31 | data: List[Message], 32 | model: str = "gpt-3.5-turbo", 33 | provider: str = "openai", 34 | temperature: float = 0.0, 35 | ) -> CompletionResponse: 36 | return p.completion( 37 | data=Completion( 38 | llm_configuration=LLMInputs( 39 | model=model, 40 | provider=provider, 41 | model_params=ModelParams(temp=temperature), 42 | messages=data, 43 | ) 44 | ) 45 | ) 46 | 47 | 48 | @trace 49 | def expound_task(main_objective: str, current_task: str) -> List[Dict[str, str]]: 50 | prompt = [ 51 | Message( 52 | role=Role.system, 53 | content=f"You are an AI who performs one task based on the following objective: {main_objective}\n" f"Your task: {current_task}\nResponse:", 54 | ), 55 | ] 56 | response = call_llm(prompt).content 57 | new_tasks = response.split("\n") if "\n" in response else [response] 58 | return [{"task_name": task_name} for task_name in new_tasks] 59 | 60 | 61 | @trace 62 | def generate_tasks(main_objective: str, expounded_initial_task: List[Dict[str, str]]) -> List[str]: 63 | select_llm_option = random.choice(LLM_OPTIONS) 64 | task_expansion = dump_task(expounded_initial_task) 65 | prompt = [ 66 | Message( 67 | role=Role.user, 68 | content=( 69 | f"You are an AI who creates tasks based on the following MAIN OBJECTIVE: {main_objective}\n" 70 | f"Create tasks pertaining directly to your previous research here:\n" 71 | f"{task_expansion}\nResponse:" 72 | ), 73 | ), 74 | ] 75 | response = call_llm(data=prompt, model=select_llm_option[0], provider=select_llm_option[1]).content 76 | new_tasks = response.split("\n") if "\n" in response else [response] 77 | task_list = [{"task_name": task_name} for task_name in new_tasks] 78 | new_tasks_list: List[str] = [] 79 | for task_item in task_list: 80 | task_description = task_item.get("task_name") 81 | if task_description: 82 | task_parts = task_description.strip().split(".", 1) 83 | if len(task_parts) == 2: 84 | new_task = task_parts[1].strip() 85 | new_tasks_list.append(new_task) 86 | 87 | return new_tasks_list 88 | 89 | 90 | @trace(name=f"run_agent-{datetime.now(pytz.utc)}") # You can provide a custom name other than the function name 91 | def run_agent(main_objective: str, initial_task: str = "") -> Tuple[List[Dict[str, str]], str]: 92 | trace_id = get_current_trace_id() 93 | generated_tasks = [] 94 | expounded_initial_task = expound_task(main_objective, initial_task) 95 | new_tasks = generate_tasks(main_objective, expounded_initial_task) 96 | task_counter = 0 97 | for task in new_tasks or []: 98 | task_counter += 1 99 | q = expound_task(main_objective, task) 100 | exp = dump_task(q) 101 | generated_tasks.append({f"task_{task_counter}": exp}) 102 | if task_counter >= LIMIT: 103 | break 104 | return generated_tasks, trace_id 105 | 106 | 107 | if __name__ == "__main__": 108 | result, trace_id = run_agent("Become a machine learning expert.", "Learn about tensors.") 109 | print(result) 110 | p.record_feedback(FeedbackRequest(trace_id=trace_id, score=0.642)) 111 | -------------------------------------------------------------------------------- /cookbook/parea_llm_proxy/tracing_with_parea_streaming.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea, trace 6 | from parea.schemas import Completion, LLMInputs, Message, ModelParams, Role 7 | 8 | load_dotenv() 9 | 10 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 11 | 12 | completion = Completion( 13 | llm_configuration=LLMInputs( 14 | model="gpt-3.5-turbo-1106", 15 | model_params=ModelParams(temp=0.1), 16 | messages=[Message(role=Role.user, content="Write a short haiku about the moon.")], 17 | ) 18 | ) 19 | 20 | 21 | @trace 22 | def call_llm_stream(): 23 | stream = p.stream(completion) 24 | for chunk in stream: 25 | print(chunk) 26 | 27 | 28 | @trace 29 | async def acall_llm_stream(): 30 | stream = p.astream(completion) 31 | async for chunk in stream: 32 | print(chunk) 33 | 34 | 35 | if __name__ == "__main__": 36 | call_llm_stream() 37 | # asyncio.run(acall_llm_stream()) 38 | -------------------------------------------------------------------------------- /cookbook/tracing_with_threading.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import contextvars 4 | import os 5 | from concurrent.futures import ThreadPoolExecutor 6 | 7 | from dotenv import load_dotenv 8 | 9 | from parea import Parea, trace 10 | 11 | load_dotenv() 12 | 13 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 14 | 15 | 16 | @trace 17 | def llm_call(question): 18 | return f"I can't answer that question: {question}" 19 | 20 | 21 | @trace 22 | def multiple_llm_calls(question, n_calls: int = 2) -> List[str]: 23 | answers = [] 24 | with ThreadPoolExecutor(max_workers=2) as executor: 25 | for _ in range(n_calls): 26 | context = contextvars.copy_context() 27 | future = executor.submit(context.run, llm_call, question) 28 | answers.append(future.result()) 29 | return answers 30 | 31 | 32 | response = multiple_llm_calls("Who are you?") 33 | print(response) 34 | -------------------------------------------------------------------------------- /cookbook/use_dataset_for_finetuning.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | from parea import Parea 6 | 7 | load_dotenv() 8 | 9 | p = Parea(api_key=os.getenv("PAREA_API_KEY")) 10 | 11 | dataset = p.get_collection("DATASET_ID") # Replace DATASET_ID with the actual dataset ID 12 | 13 | dataset.write_to_finetune_jsonl("finetune.jsonl") 14 | -------------------------------------------------------------------------------- /cookiecutter-config-file.yml: -------------------------------------------------------------------------------- 1 | # This file contains values from Cookiecutter 2 | 3 | default_context: 4 | project_name: "parea-ai" 5 | project_description: "Parea python sdk" 6 | organization: "parea-ai" 7 | license: "Apache Software License 2.0" 8 | minimal_python_version: 3.9 9 | github_name: "parea-ai" 10 | email: "joel@parea.ai" 11 | version: "0.1.0" 12 | line_length: "180" 13 | create_example_template: "none" 14 | -------------------------------------------------------------------------------- /parea/__init__.py: -------------------------------------------------------------------------------- 1 | # type: ignore[attr-defined] 2 | # flake8: noqa 3 | 4 | """ 5 | Parea API SDK 6 | 7 | The Parea SDK allows you to interact with Parea from your product or service. 8 | To install the official [Python SDK](https://pypi.org/project/parea/), 9 | run the following command: ```bash pip install parea ```. 10 | """ 11 | import sys 12 | 13 | from parea.api_client import get_version 14 | from parea.cache import InMemoryCache 15 | from parea.client import Parea 16 | from parea.experiment.cli import experiment as _experiment_cli 17 | from parea.experiment.dvc import parea_dvc_initialized 18 | from parea.experiment.experiment import Experiment 19 | from parea.helpers import gen_trace_id, write_trace_logs_to_csv 20 | from parea.parea_logger import parea_logger 21 | from parea.utils.trace_utils import clear_trace_context, get_current_trace_id, get_root_trace_id, trace, trace_insert 22 | from parea.wrapper.openai_raw_api_tracer import aprocess_stream_and_yield, process_stream_and_yield 23 | from parea.wrapper.utils import convert_openai_raw_to_log 24 | 25 | version: str = get_version() 26 | 27 | 28 | def main(): 29 | args = sys.argv[1:] 30 | if args[0] == "experiment": 31 | _experiment_cli(args[1:]) 32 | elif args[0] == "dvc-init": 33 | parea_dvc_initialized(only_check=False) 34 | else: 35 | print(f"Unknown command: '{args[0]}'") 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /parea/cache/__init__.py: -------------------------------------------------------------------------------- 1 | from .in_memory import InMemoryCache 2 | -------------------------------------------------------------------------------- /parea/cache/cache.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from abc import ABC 4 | 5 | from parea.schemas.models import CacheRequest, TraceLog 6 | 7 | 8 | class Cache(ABC): 9 | def get(self, key: CacheRequest) -> Optional[TraceLog]: 10 | """ 11 | Get a normal response from the cache. 12 | 13 | Args: 14 | key (CacheRequest): The cache key. 15 | 16 | Returns: 17 | Optional[TraceLog]: The cached response, or None if the key was not found. 18 | 19 | # noqa: DAR202 20 | # noqa: DAR401 21 | """ 22 | raise NotImplementedError 23 | 24 | async def aget(self, key: CacheRequest) -> Optional[TraceLog]: 25 | """ 26 | Get a normal response from the cache. 27 | 28 | Args: 29 | key (CacheRequest): The cache key. 30 | 31 | Returns: 32 | Optional[TraceLog]: The cached response, or None if the key was not found. 33 | 34 | # noqa: DAR202 35 | # noqa: DAR401 36 | """ 37 | raise NotImplementedError 38 | 39 | def set(self, key: CacheRequest, value: TraceLog): 40 | """ 41 | Set a normal response in the cache. 42 | 43 | Args: 44 | key (CacheRequest): The cache key. 45 | value (TraceLog): The response to cache. 46 | 47 | # noqa: DAR401 48 | """ 49 | raise NotImplementedError 50 | 51 | async def aset(self, key: CacheRequest, value: TraceLog): 52 | """ 53 | Set a normal response in the cache. 54 | 55 | Args: 56 | key (CacheRequest): The cache key. 57 | value (TraceLog): The response to cache. 58 | 59 | # noqa: DAR401 60 | """ 61 | raise NotImplementedError 62 | 63 | def invalidate(self, key: CacheRequest): 64 | """ 65 | Invalidate a key in the cache. 66 | 67 | Args: 68 | key (CacheRequest): The cache key. 69 | 70 | # noqa: DAR401 71 | """ 72 | raise NotImplementedError 73 | 74 | async def ainvalidate(self, key: CacheRequest): 75 | """ 76 | Invalidate a key in the cache. 77 | 78 | Args: 79 | key (CacheRequest): The cache key. 80 | 81 | # noqa: DAR401 82 | """ 83 | raise NotImplementedError 84 | 85 | def log(self, value: TraceLog): 86 | """ 87 | Log a response in the cache. 88 | 89 | Args: 90 | value (TraceLog): The response to log. 91 | 92 | # noqa: DAR401 93 | """ 94 | raise NotImplementedError 95 | 96 | def read_logs(self) -> List[TraceLog]: 97 | """ 98 | Read all logs from the cache. 99 | 100 | Returns: 101 | List[TraceLog]: All logs in the cache. 102 | 103 | # noqa: DAR202 104 | # noqa: DAR401 105 | """ 106 | raise NotImplementedError 107 | -------------------------------------------------------------------------------- /parea/cache/in_memory.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from attr import asdict 4 | 5 | from parea.cache.cache import Cache 6 | from parea.schemas.models import CacheRequest, TraceLog 7 | from parea.utils.universal_encoder import json_dumps 8 | 9 | 10 | class InMemoryCache(Cache): 11 | def __init__(self): 12 | self.cache = {} 13 | self.logs = [] 14 | 15 | def get(self, key: CacheRequest) -> Optional[TraceLog]: 16 | return self.cache.get(json_dumps(asdict(key))) 17 | 18 | async def aget(self, key: CacheRequest) -> Optional[TraceLog]: 19 | return self.get(key) 20 | 21 | def set(self, key: CacheRequest, value: TraceLog): 22 | self.cache[json_dumps(asdict(key))] = value 23 | 24 | async def aset(self, key: CacheRequest, value: TraceLog): 25 | self.set(key, value) 26 | 27 | def invalidate(self, key: CacheRequest): 28 | key = json_dumps(asdict(key)) 29 | if key in self.cache: 30 | del self.cache[key] 31 | 32 | async def ainvalidate(self, key: CacheRequest): 33 | self.invalidate(key) 34 | 35 | def log(self, value: TraceLog): 36 | self.logs.append(value) 37 | 38 | def read_logs(self) -> List[TraceLog]: 39 | return self.logs.copy() 40 | -------------------------------------------------------------------------------- /parea/evals/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import EvalFuncTuple, call_openai, get_tokens, run_evals_in_thread_and_log, run_evals_synchronous 2 | -------------------------------------------------------------------------------- /parea/evals/chat/__init__.py: -------------------------------------------------------------------------------- 1 | from .goal_success_ratio import goal_success_ratio_factory 2 | -------------------------------------------------------------------------------- /parea/evals/chat/goal_success_ratio.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | import json 4 | 5 | from parea.evals.utils import call_openai 6 | from parea.schemas.log import Log 7 | 8 | 9 | def goal_success_ratio_factory( 10 | use_output: Optional[bool] = False, message_field: Optional[str] = None, model: Optional[str] = "gpt-4", is_azure: Optional[bool] = False 11 | ) -> Callable[[Log], float]: 12 | """ 13 | This factory creates an evaluation function that measures the success ratio of a goal-oriented conversation. 14 | Typically, a user interacts with a chatbot or AI assistant to achieve specific goals. 15 | This motivates to measure the quality of a chatbot by counting how many messages a user has to send before they reach their goal. 16 | One can further break this down by successful and unsuccessful goals to analyze user & LLM behavior. 17 | 18 | Concretely: 19 | 1. Delineate the conversation into segments by splitting them by the goals the user wants to achieve. 20 | 2. Assess if every goal has been reached. 21 | 3. Calculate the average number of messages sent per segment. 22 | 23 | Args: 24 | is_azure: Whether to use Azure as the model. Defaults to False. 25 | model: The model which should be used for grading. 26 | use_output (Optional[bool], optional): Whether to use the output of the log to access the messages. Defaults to False. 27 | message_field (Optional[str], optional): The name of the field in the log that contains the messages. 28 | Defaults to None. If None, the messages are taken from the configuration attribute. 29 | 30 | # noqa: DAR201 31 | # noqa: DAR401 32 | """ 33 | if use_output and message_field: 34 | raise ValueError("Only one of use_output and message_field can be set.") 35 | 36 | def goal_success_ratio(log: Log) -> float: 37 | """Returns the average amount of turns the user had to converse with the AI to reach their goals.""" 38 | if use_output: 39 | output_list_dicts = json.loads(log.output) 40 | messages = [m for m in output_list_dicts] 41 | elif message_field: 42 | messages = [m for m in log.inputs[message_field]] 43 | else: 44 | messages = [m.to_dict() for m in log.configuration.messages] 45 | if log.output: 46 | messages.append({"role": "assistant", "content": log.output}) 47 | 48 | # need to determine where does a new goal start 49 | conversation_segments = [] 50 | start_index = 0 51 | end_index = 3 52 | while end_index < len(messages): 53 | user_follows_same_goal = call_openai( 54 | [ 55 | { 56 | "role": "system", 57 | "content": "Look at the conversation and to determine if the user is still following the same goal " 58 | "or if they are following a new goal. If they are following the same goal, respond " 59 | "SAME_GOAL. Otherwise, respond NEW_GOAL. In any case do not answer the user request!", 60 | } 61 | ] 62 | + messages[start_index:end_index], 63 | model=model, 64 | is_azure=is_azure, 65 | ) 66 | 67 | if user_follows_same_goal == "SAME_GOAL": 68 | end_index += 2 69 | else: 70 | conversation_segments.append(messages[start_index : end_index - 1]) 71 | start_index = end_index - 1 72 | end_index += 2 73 | 74 | if start_index < len(messages): 75 | conversation_segments.append(messages[start_index:]) 76 | 77 | # for now assume that the user reached their goal in every segment 78 | # return the average amount of turns the user had to converse with the AI to reach their goals 79 | return sum([2 / len(segment) for segment in conversation_segments]) / len(conversation_segments) 80 | 81 | return goal_success_ratio 82 | -------------------------------------------------------------------------------- /parea/evals/dataset_level/__init__.py: -------------------------------------------------------------------------------- 1 | from .balanced_acc import balanced_acc_factory 2 | -------------------------------------------------------------------------------- /parea/evals/dataset_level/balanced_acc.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | from collections import defaultdict 4 | 5 | from parea.schemas import EvaluatedLog, EvaluationResult 6 | 7 | 8 | def balanced_acc_factory(score_name: str): 9 | def balanced_acc(logs: List[EvaluatedLog]) -> Union[EvaluationResult, None]: 10 | correct = defaultdict(int) 11 | total = defaultdict(int) 12 | for log in logs: 13 | if (eval_result := log.get_score(score_name)) is not None: 14 | correct[log.target] += int(eval_result.score) 15 | total[log.target] += 1 16 | recalls = [correct[key] / total[key] for key in correct] 17 | 18 | if len(recalls) == 0: 19 | return None 20 | 21 | return EvaluationResult(name=f"balanced_acc_{score_name}", score=sum(recalls) / len(recalls)) 22 | 23 | return balanced_acc 24 | -------------------------------------------------------------------------------- /parea/evals/general/__init__.py: -------------------------------------------------------------------------------- 1 | from .answer_matches_target_llm_grader import answer_matches_target_llm_grader_factory 2 | from .answer_matches_target_recall import answer_matches_target_recall 3 | from .answer_relevancy import answer_relevancy_factory 4 | from .levenshtein import levenshtein 5 | from .llm_grader import llm_grader_factory, llm_grader_gpt3t, llm_grader_gpt4 6 | from .lm_vs_lm import lm_vs_lm_factuality_factory, lm_vs_lm_factuality_gpt3t, lm_vs_lm_factuality_gpt4 7 | from .self_check import self_check 8 | from .semantic_similarity import semantic_similarity_factory, semantic_similarity_oai_3_large, semantic_similarity_oai_3_small, semantic_similarity_oai_ada_002 9 | -------------------------------------------------------------------------------- /parea/evals/general/answer_matches_target_llm_grader.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional, Union 2 | 3 | from parea.evals.utils import call_openai 4 | from parea.schemas.log import Log 5 | 6 | 7 | def answer_matches_target_llm_grader_factory( 8 | question_field: Optional[str] = "question", 9 | model: Optional[str] = "gpt-4", 10 | is_azure: Optional[bool] = False, 11 | ) -> Callable[[Log], Union[float, None]]: 12 | """Quantifies how much the generated answer matches the ground truth / target.""" 13 | 14 | def answer_matches_target_llm_grader(log: Log) -> Union[float, None]: 15 | question = log.inputs[question_field] 16 | output = log.output 17 | if (target := log.target) is None: 18 | return None 19 | response = call_openai( 20 | model=model, 21 | messages=[ 22 | {"role": "system", "content": "You are CompareGPT, a machine to verify the groundedness of predictions. Answer with " "only yes/no."}, 23 | { 24 | "role": "user", 25 | "content": f"""You are given a question, the corresponding ground-truth answer and a prediction from a model. Compare the "Ground-truth answer" and the "Prediction" to determine whether the prediction correctly answers the question. All information in the ground-truth answer must be present in the prediction, including numbers and dates. You must answer "no" if there are any specific details in the ground-truth answer that are not mentioned in the prediction. There should be no contradicting statements in the prediction. The prediction may contain extra information. If the prediction states something as a possibility, treat it as a definitive answer. 26 | 27 | Question: {question} 28 | Ground-truth answer: {target} 29 | Prediction: {output} 30 | 31 | CompareGPT response:""", 32 | }, 33 | ], 34 | temperature=0.0, 35 | is_azure=is_azure, 36 | ) 37 | return float("yes" in response.lower()) 38 | 39 | return answer_matches_target_llm_grader 40 | -------------------------------------------------------------------------------- /parea/evals/general/answer_matches_target_recall.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from collections import Counter 4 | 5 | from parea.evals.utils import get_tokens 6 | from parea.schemas.log import Log 7 | 8 | 9 | def answer_matches_target_recall(log: Log) -> Union[float, None]: 10 | """Prop. of tokens in target/reference answer which are also in model generation.""" 11 | if (target := log.target) is None: 12 | return None 13 | output = log.output 14 | model = log.configuration.model 15 | 16 | target_tokens = get_tokens(model, target) 17 | output_tokens = get_tokens(model, output) 18 | 19 | if len(target_tokens) == 0: 20 | return 1.0 21 | common_tokens = Counter(target_tokens) & Counter(output_tokens) 22 | num_common = sum(common_tokens.values()) 23 | return num_common / len(target_tokens) 24 | -------------------------------------------------------------------------------- /parea/evals/general/answer_relevancy.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | from parea.evals.utils import call_openai, embed 4 | from parea.schemas.log import Log 5 | 6 | 7 | def answer_relevancy_factory( 8 | question_field: str = "question", 9 | n_generations: int = 3, 10 | model: Optional[str] = "gpt-3.5-turbo-16k", 11 | embedding_model: str = "text-embedding-ada-002", 12 | is_azure: Optional[bool] = False, 13 | ) -> Callable[[Log], float]: 14 | """ 15 | This factory creates an evaluation function that measures how relevant the generated response is to the given question. 16 | It is based on the paper [RAGAS: Automated Evaluation of Retrieval Augmented Generation](https://arxiv.org/abs/2309.15217) 17 | which suggests using an LLM to generate multiple questions that fit the generated answer and measure the cosine 18 | similarity of the generated questions with the original one. 19 | 20 | Args: 21 | is_azure: Whether to use the Azure API. Defaults to False. 22 | embedding_model: The model which should be used for embedding the text. 23 | model: The model which should be used for grading. Defaults to "gpt-3.5-turbo-16k". 24 | question_field: The key name/field used for the question/query of the user. Defaults to "question". 25 | n_generations: The number of questions which should be generated. Defaults to 3. 26 | 27 | Returns: 28 | Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating 29 | if the generated response is relevant to the query. 30 | 31 | Raises: 32 | ImportError: If numpy is not installed. 33 | """ 34 | try: 35 | import numpy as np 36 | except ImportError: 37 | raise ImportError("Please install numpy to use this metric.") 38 | 39 | def answer_relevancy(log: Log) -> float: 40 | """Quantifies how much the generated answer relates to the query.""" 41 | question = log.inputs[question_field] 42 | output = log.output 43 | 44 | generated_questions = call_openai( 45 | model=model, 46 | messages=[ 47 | { 48 | "role": "user", 49 | "content": f"""\ 50 | Generate question for the given answer. 51 | Answer:\nThe PSLV-C56 mission is scheduled to be launched on Sunday, 30 July 2023 at 06:30 IST / 01:00 UTC. It will be launched from the Satish Dhawan Space Centre, Sriharikota, Andhra Pradesh, India 52 | Question: When is the scheduled launch date and time for the PSLV-C56 mission, and where will it be launched from? 53 | 54 | Answer: {output} 55 | Question:""", 56 | } 57 | ], 58 | temperature=0.0, 59 | n=n_generations, 60 | is_azure=is_azure, 61 | ) 62 | embedded_generated_questions = [embed(model=embedding_model, input=q, is_azure=is_azure) for q in generated_questions] 63 | embedded_question = embed(model=embedding_model, input=question, is_azure=is_azure) 64 | 65 | question_vec = np.asarray(embedded_question).reshape(1, -1) 66 | gen_question_vec = np.asarray(embedded_generated_questions) 67 | norm = np.linalg.norm(gen_question_vec, axis=1) * np.linalg.norm(question_vec, axis=1) 68 | return (np.dot(gen_question_vec, question_vec.T).reshape(-1) / norm).mean() 69 | 70 | return answer_relevancy 71 | -------------------------------------------------------------------------------- /parea/evals/general/levenshtein.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from Levenshtein import distance 4 | 5 | from parea.schemas import Log 6 | 7 | 8 | def levenshtein(log: Log) -> Union[float, None]: 9 | output = log.output 10 | if (target := log.target) is None: 11 | return None 12 | 13 | return levenshtein_distance(str(output), str(target)) 14 | 15 | 16 | def levenshtein_distance(output: str, target: str) -> float: 17 | max_len = max(len(x) for x in [output, target]) 18 | 19 | score = 1 20 | if max_len > 0: 21 | score = 1 - (distance(output, target) / max_len) 22 | 23 | return score 24 | -------------------------------------------------------------------------------- /parea/evals/general/llm_grader.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | import ast 4 | import re 5 | 6 | from parea.evals.utils import call_openai 7 | from parea.schemas.log import Log 8 | 9 | one_score_pattern = re.compile(r"\[\[(\d+\.?\d*)\]\]") 10 | one_score_pattern_backup = re.compile(r"\[(\d+\.?\d*)\]") 11 | 12 | 13 | def llm_grader_factory(model: str = "gpt-4", question_field: str = "question", is_azure: Optional[bool] = False) -> Callable[[Log], float]: 14 | """ 15 | This factory creates an evaluation function that uses an LLM to grade the response of an LLM to a given question. 16 | It is based on the paper [Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena](https://arxiv.org/abs/2306.05685) 17 | which introduces general-purpose zero-shot prompt to rate responses from an LLM to a given question on a scale from 1-10. 18 | They find that GPT-4's ratings agree as much with a human rater as a human annotator agrees with another one (>80%). 19 | Further, they observe that the agreement with a human annotator increases as the response rating gets clearer. 20 | Additionally, they investigated how much the evaluating LLM overestimated its responses and found that GPT-4 and 21 | Claude-1 were the only models that didn't overestimate themselves. 22 | 23 | Args: 24 | is_azure: Whether to use the Azure API. Defaults to False. 25 | model: The model which should be used for grading. Currently, only supports OpenAI chat models. 26 | question_field: The key name/field used for the question/query of the user. Defaults to "question". 27 | 28 | Returns: 29 | Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 which is the 30 | rating of the response on a scale from 1-10 divided by 10. 31 | """ 32 | 33 | def llm_grader(log: Log) -> float: 34 | question = log.inputs[question_field] 35 | output = log.output 36 | rating_response = call_openai( 37 | model=model, 38 | messages=[ 39 | {"role": "system", "content": "You are a helpful assistant."}, 40 | { 41 | "role": "user", 42 | "content": f"[Instruction]\nPlease act as an impartial judge and evaluate the quality of the response " 43 | f"provided by an AI assistant to the user question displayed below. Your evaluation should " 44 | f"consider factors such as the helpfulness, relevance, accuracy, depth, creativity, and " 45 | f"level of detail of the response. Begin your evaluation by providing a short explanation. " 46 | f"Be as objective as possible. After providing your explanation, you must rate the response " 47 | f'on a scale of 1 to 10 by strictly following this format: "[[rating]]", for example: ' 48 | f'"Rating: [[5]]".\n\n[Question]\n{question}\n\n[The Start of Assistant\'s Answer]' 49 | f"\n{output}\n[The End of Assistant's Answer]", 50 | }, 51 | ], 52 | temperature=0.0, 53 | is_azure=is_azure, 54 | ) 55 | match = re.search(one_score_pattern, rating_response) 56 | if not match: 57 | match = re.search(one_score_pattern_backup, rating_response) 58 | 59 | if match: 60 | rating = ast.literal_eval(match.groups()[0]) 61 | else: 62 | rating = 0 63 | 64 | return rating / 10.0 65 | 66 | return llm_grader 67 | 68 | 69 | llm_grader_gpt4 = llm_grader_factory("gpt-4") 70 | llm_grader_gpt3t = llm_grader_factory("gpt-3.5-turbo-16k") 71 | -------------------------------------------------------------------------------- /parea/evals/general/self_check.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from parea.evals.utils import call_openai, sent_tokenize 4 | from parea.schemas.log import Log 5 | 6 | 7 | def self_check(log: Log) -> Union[float, None]: 8 | """ 9 | Given that many API-based LLMs don't reliably give access to the log probabilities of the generated tokens, assessing 10 | the certainty of LLM predictions via perplexity isn't possible. 11 | The [SelfCheckGPT: Zero-Resource Black-Box Hallucination Detection for Generative Large Language Models](https://arxiv.org/abs/2303.08896) paper 12 | suggests measuring the average factuality of every sentence in a generated response. They generate additional responses 13 | from the LLM at a high temperature and check how much every sentence in the original answer is supported by the other generations. 14 | The intuition behind this is that if the LLM knows a fact, it's more likely to sample it. The authors find that this 15 | works well in detecting non-factual and factual sentences and ranking passages in terms of factuality. 16 | The authors noted that correlation with human judgment doesn't increase after 4-6 additional 17 | generations when using `gpt-3.5-turbo` to evaluate biography generations. 18 | 19 | Args: 20 | log (Log): The log object to of the trace evaluate. 21 | 22 | Returns: 23 | float: A score between 0 and 1 indicating the factuality of the response. 24 | """ 25 | if log.configuration is None or log.configuration.messages is None: 26 | return None 27 | 28 | messages = [m.to_dict() for m in log.configuration.messages] 29 | 30 | n_sampled_outputs = 5 31 | sampled_outputs = [] 32 | for _ in range(n_sampled_outputs): 33 | response = call_openai( 34 | messages=messages, 35 | model=log.configuration.model, 36 | temperature=1.0, 37 | max_tokens=log.configuration.model_params.max_length, 38 | top_p=log.configuration.model_params.top_p, 39 | frequency_penalty=log.configuration.model_params.frequency_penalty, 40 | presence_penalty=log.configuration.model_params.presence_penalty, 41 | response_format=log.configuration.model_params.response_format, 42 | ) 43 | sampled_outputs.append(response) 44 | 45 | sentences = sent_tokenize(log.output) 46 | 47 | if len(sentences) == 0: 48 | return 0.0 49 | 50 | sentences_scores = [] 51 | for sentence in sentences: 52 | scores = [] 53 | for sampled_output in sampled_outputs: 54 | response = call_openai( 55 | messages=[ 56 | { 57 | "role": "user", 58 | "content": f"""Context: {sampled_output} 59 | Sentence: {sentence} 60 | Is the sentence supported by the context above? 61 | Answer Yes or No:""", 62 | } 63 | ], 64 | model="gpt-3.5-turbo", 65 | temperature=0.0, 66 | ) 67 | scores.append(float("yes" in response.lower())) 68 | sentences_scores.append(sum(scores) / len(scores)) 69 | 70 | return sum(sentences_scores) / len(sentences_scores) 71 | -------------------------------------------------------------------------------- /parea/evals/general/semantic_similarity.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional, Union 2 | 3 | import numpy as np 4 | 5 | from parea.evals.utils import embed 6 | from parea.schemas import Log 7 | 8 | 9 | def semantic_similarity_factory( 10 | embd_model: str = "text-embedding-3-small", 11 | is_azure: Optional[bool] = False, 12 | ) -> Callable[[Log], Union[float, None]]: 13 | def semantic_similarity(log: Log) -> Union[float, None]: 14 | """Calculates semantic similarity between output and target""" 15 | output = log.output 16 | if (target := log.target) is None: 17 | return None 18 | 19 | output_vector = embed(model=embd_model, input=output, is_azure=is_azure) 20 | target_vector = embed(model=embd_model, input=target, is_azure=is_azure) 21 | output_vector = np.array(output_vector) 22 | target_vector = np.array(target_vector) 23 | 24 | return (np.dot(output_vector, target_vector) / (np.linalg.norm(output_vector) * np.linalg.norm(target_vector)) + 1) / 2 25 | 26 | return semantic_similarity 27 | 28 | 29 | semantic_similarity_oai_3_small = semantic_similarity_factory() 30 | semantic_similarity_oai_3_large = semantic_similarity_factory(embd_model="text-embedding-3-large") 31 | semantic_similarity_oai_ada_002 = semantic_similarity_factory(embd_model="text-embedding-ada-002") 32 | -------------------------------------------------------------------------------- /parea/evals/rag/__init__.py: -------------------------------------------------------------------------------- 1 | from .answer_context_faithfulness_binary import answer_context_faithfulness_binary_factory 2 | from .answer_context_faithfulness_precision import answer_context_faithfulness_precision_factory 3 | from .answer_context_faithfulness_statement_level import answer_context_faithfulness_statement_level_factory 4 | from .context_has_answer import context_has_answer_factory 5 | from .context_query_relevancy import context_query_relevancy_factory 6 | from .context_ranking_listwise import context_ranking_listwise_factory 7 | from .context_ranking_pointwise import context_ranking_pointwise_factory 8 | from .percent_target_supported_by_context import percent_target_supported_by_context_factory 9 | -------------------------------------------------------------------------------- /parea/evals/rag/answer_context_faithfulness_binary.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | from parea.evals.utils import call_openai 4 | from parea.schemas.log import Log 5 | 6 | 7 | def answer_context_faithfulness_binary_factory( 8 | question_field: Optional[str] = "question", context_field: Optional[str] = "context", model: Optional[str] = "gpt-3.5-turbo-16k", is_azure: Optional[bool] = False 9 | ) -> Callable[[Log], float]: 10 | """ 11 | This factory creates an evaluation function that classifies if the generated answer was faithful to the given context. 12 | It is based on the paper [Evaluating Correctness and Faithfulness of Instruction-Following Models for Question Answering](https://arxiv.org/abs/2307.16877) 13 | which suggests using an LLM to flag any information in the generated answer that cannot be deduced from the given context. 14 | They find that GPT-4 is the best model for this analysis as measured by correlation with human judgment. 15 | 16 | Args: 17 | is_azure: Whether to use the Azure API. Defaults to False. 18 | question_field: The key name/field used for the question/query of the user. Defaults to "question". 19 | context_field: The key name/field used for the retrieved context. Defaults to "context". 20 | model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-4". 21 | 22 | Returns: 23 | Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating 24 | if the generated answer was faithful to the given context. 25 | """ 26 | 27 | def answer_context_faithfulness_binary(log: Log) -> float: 28 | question = log.inputs[question_field] 29 | evidence = log.inputs[context_field] 30 | output = log.output 31 | response = call_openai( 32 | model=model, 33 | messages=[ 34 | {"role": "system", "content": "You are CompareGPT, a machine to verify the groundedness of predictions. Answer with " "only yes/no."}, 35 | { 36 | "role": "user", 37 | "content": f"You are given a question, the corresponding evidence and a prediction from a model. Compare " 38 | f'the "Prediction" and the "Evidence" to determine whether all the information of the ' 39 | f"prediction in present in the evidence or can be inferred from the evidence. You must answer " 40 | f'"no" if there are any specific details in the prediction that are not mentioned in the ' 41 | f"evidence or cannot be inferred from the evidence.\n\n" 42 | f"Question: {question}\n\nPrediction: {output}\n\nEvidence: {evidence}\n\nCompareGPT response:", 43 | }, 44 | ], 45 | temperature=0.0, 46 | is_azure=is_azure, 47 | ) 48 | return float("yes" in response.lower()) 49 | 50 | return answer_context_faithfulness_binary 51 | -------------------------------------------------------------------------------- /parea/evals/rag/answer_context_faithfulness_precision.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | from collections import Counter 4 | 5 | from parea.evals.utils import get_tokens 6 | from parea.schemas.log import Log 7 | 8 | 9 | def answer_context_faithfulness_precision_factory(context_field: Optional[str] = "context") -> Callable[[Log], float]: 10 | """ 11 | This factory creates an evaluation function that calculates the how many tokens in the generated answer are also present in the retrieved context. 12 | It is based on the paper [Evaluating Correctness and Faithfulness of Instruction-Following Models for Question Answering](https://arxiv.org/abs/2307.16877) 13 | which finds that this method only slightly lags behind GPT-4 and outperforms GPT-3.5-turbo (see Table 4 from the above paper). 14 | 15 | Args: 16 | context_field: The key name/field used for the retrieved context. Defaults to "context". 17 | 18 | Returns: 19 | Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating 20 | how many tokens in the generated answer are also present in the retrieved context. 21 | """ 22 | 23 | def answer_context_faithfulness_precision(log: Log) -> float: 24 | """Prop. of tokens in model generation which are also present in the retrieved context.""" 25 | context = log.inputs[context_field] 26 | model = log.configuration.model 27 | 28 | context_tokens = get_tokens(model, context) 29 | output_tokens = get_tokens(model, log.output) 30 | 31 | if len(context_tokens) == 0: 32 | return 1.0 33 | elif len(output_tokens) == 0: 34 | return 0.0 35 | 36 | common_tokens = Counter(context_tokens) & Counter(output_tokens) 37 | num_common = sum(common_tokens.values()) 38 | return num_common / len(output_tokens) 39 | 40 | return answer_context_faithfulness_precision 41 | -------------------------------------------------------------------------------- /parea/evals/rag/context_has_answer.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | import json 4 | 5 | from parea.evals import call_openai 6 | from parea.schemas import Log 7 | 8 | 9 | def context_has_answer_factory(question_field: Optional[str] = "question", model: Optional[str] = "gpt-3.5-turbo-0125", is_azure: Optional[bool] = False) -> Callable[[Log], bool]: 10 | """ 11 | This factory creates an evaluation metric which assess whether the given context has the answer to the given question. 12 | It is useful to measure the performance of a model in a question-answering task by measuring Hit Rate without the need to know the correct answer. 13 | 14 | Args: 15 | question_field: The key name/field used for the question/query of the user. Defaults to "question". 16 | model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-3.5-turbo-0125". 17 | is_azure: Whether to use the Azure API. Defaults to False. 18 | 19 | Returns: 20 | Callable[[Log], bool]: A function that takes a log as input and returns a boolean indicating if the context has the answer to the given question. 21 | """ 22 | 23 | def context_has_answer(log: Log) -> bool: 24 | question = log.inputs[question_field] 25 | answer = str(log.output) 26 | 27 | formatted_messages = [ 28 | { 29 | "role": "user", 30 | "content": f"""You are given a question and a list of answers. The answers were retrieved from a database which contains the question answer pairs. You need to decide if any of the given answers is the answer to the given question. 31 | 32 | Question: 33 | {question} 34 | 35 | Answers: 36 | {answer} 37 | 38 | Answer in the following JSON format: 39 | {{"thoughts": "", "final_verdict": ""}}""", 40 | } 41 | ] 42 | 43 | response = call_openai(model=model, temperature=0.0, messages=formatted_messages, response_format={"type": "json_object"}, is_azure=is_azure) 44 | final_verdict = json.loads(response).get("final_verdict", "").lower() 45 | return final_verdict == "true" 46 | 47 | return context_has_answer 48 | -------------------------------------------------------------------------------- /parea/evals/rag/context_query_relevancy.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List, Optional 2 | 3 | from parea.evals.utils import call_openai, get_context, sent_tokenize 4 | from parea.schemas.log import Log 5 | 6 | 7 | def context_query_relevancy_factory( 8 | question_field: str = "question", context_fields: Optional[List[str]] = None, model: Optional[str] = "gpt-3.5-turbo-16k", is_azure: Optional[bool] = False 9 | ) -> Callable[[Log], float]: 10 | """ 11 | This factory creates an evaluation function that measures how relevant the retrieved context is to the given question. 12 | It is based on the paper [RAGAS: Automated Evaluation of Retrieval Augmented Generation](https://arxiv.org/abs/2309.15217) 13 | which suggests using an LLM to extract any sentence from the retrieved context relevant to the query. Then, calculate 14 | the ratio of relevant sentences to the total number of sentences in the retrieved context. 15 | 16 | Args: 17 | is_azure: Whether to use the Azure API. Defaults to False. 18 | model: The model which should be used for grading. Defaults to "gpt-3.5-turbo-16k". 19 | question_field: The key name/field used for the question/query of the user. Defaults to "question". 20 | context_fields: An optional list of key names/fields used for the retrieved contexts in the input to function. If empty list or None, it will use the output field of the log as context. Defaults to None. 21 | 22 | Returns: 23 | Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating 24 | if the retrieved context is relevant to the query. 25 | """ 26 | 27 | def context_query_relevancy(log: Log) -> float: 28 | """Quantifies how much the retrieved context relates to the query.""" 29 | question = log.inputs[question_field] 30 | context = get_context(log, context_fields) 31 | 32 | extracted_sentences = call_openai( 33 | model=model, 34 | messages=[ 35 | { 36 | "role": "user", 37 | "content": f"""\ 38 | Please extract relevant sentences from the provided context that is absolutely required answer the following question. If no relevant sentences are found, or if you believe the question cannot be answered from the given context, return the phrase "Insufficient Information". While extracting candidate sentences you're not allowed to make any changes to sentences from given context. 39 | 40 | question:{question} 41 | context:\n{context} 42 | candidate sentences:\n""", 43 | } 44 | ], 45 | temperature=0.0, 46 | is_azure=is_azure, 47 | ).strip() 48 | if "insufficient information" in extracted_sentences.lower() and abs(len(extracted_sentences) - len("insufficient information")) < 10: 49 | return 0.0 50 | else: 51 | n_extracted_sentences = len(sent_tokenize(extracted_sentences)) 52 | n_context_sentences = len(sent_tokenize(context)) 53 | return n_extracted_sentences / n_context_sentences 54 | 55 | return context_query_relevancy 56 | -------------------------------------------------------------------------------- /parea/evals/summary/__init__.py: -------------------------------------------------------------------------------- 1 | from .factual_inconsistency_binary import factual_inconsistency_binary_factory 2 | from .factual_inconsistency_scale import factual_inconsistency_scale_factory 3 | from .likert_scale import likert_scale_factory 4 | -------------------------------------------------------------------------------- /parea/evals/summary/factual_inconsistency_binary.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | from parea.evals.utils import call_openai 4 | from parea.schemas.log import Log 5 | 6 | 7 | def factual_inconsistency_binary_factory(article_field: Optional[str] = "article", model: Optional[str] = "gpt-4", is_azure: Optional[bool] = False) -> Callable[[Log], float]: 8 | """ 9 | This factory creates an evaluation function that classifies if a summary is factually inconsistent with the original text. 10 | It is based on the paper [ChatGPT as a Factual Inconsistency Evaluator for Text Summarization](https://arxiv.org/abs/2303.15621) 11 | which suggests using an LLM to assess the factuality of a summary by measuring how consistent the summary is with 12 | the original text, posed as a binary classification. They find that `gpt-3.5-turbo-0301` outperforms 13 | baseline methods such as SummaC and QuestEval when identifying factually inconsistent summaries. 14 | 15 | Args: 16 | article_field: The key name/field used for the content which should be summarized. Defaults to "article". 17 | model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-4". 18 | is_azure: Whether to use the Azure API. Defaults to False. 19 | 20 | Returns: 21 | Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating 22 | if the generated summary is factually consistent with the original text. 23 | """ 24 | 25 | def factual_inconsistency_binary(log: Log) -> float: 26 | article = log.inputs[article_field] 27 | output = log.output 28 | prompt = f"""Decide if the following summary is consistent with the corresponding article. Note that consistency means all information in the summary is supported by the article. 29 | Article: {article} 30 | Summary: {output} 31 | Explain your reasoning step by step then answer (yes or no) the question:""" 32 | response = call_openai( 33 | model=model, 34 | messages=[ 35 | {"role": "user", "content": prompt}, 36 | ], 37 | temperature=0.0, 38 | is_azure=is_azure, 39 | ) 40 | return float("yes" in response.lower()) 41 | 42 | return factual_inconsistency_binary 43 | -------------------------------------------------------------------------------- /parea/evals/summary/factual_inconsistency_scale.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | import re 4 | 5 | from parea.evals.utils import call_openai 6 | from parea.schemas.log import Log 7 | 8 | 9 | def factual_inconsistency_scale_factory(article_field: Optional[str] = "article", model: Optional[str] = "gpt-4", is_azure: Optional[bool] = False) -> Callable[[Log], float]: 10 | """ 11 | This factory creates an evaluation function that grades the factual consistency of a summary with the article on a scale from 1 to 10. 12 | It is based on the paper [ChatGPT as a Factual Inconsistency Evaluator for Text Summarization](https://arxiv.org/abs/2303.15621) 13 | which finds that using `gpt-3.5-turbo-0301` leads to a higher correlation with human expert judgment when grading 14 | the factuality of summaries on a scale from 1 to 10 than baseline methods such as SummaC and QuestEval. 15 | 16 | Args: 17 | article_field: The key name/field used for the content which should be summarized. Defaults to "article". 18 | model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-4". 19 | is_azure: Whether to use the Azure API. Defaults to False. 20 | 21 | Returns: 22 | Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating 23 | if the generated summary is factually consistent with the original text. 24 | """ 25 | 26 | def factual_inconsistency_scale(log: Log) -> float: 27 | article = log.inputs[article_field] 28 | output = log.output 29 | prompt = f"""Score the following summary given the corresponding article with respect to consistency from 1 to 10. Note that consistency measures how much information included in the summary is present in the source article. 10 points indicate the summary contains only statements that are entailed by the source document. 30 | Article: {article} 31 | Summary: {output} 32 | Marks: """ 33 | response = call_openai( 34 | model=model, 35 | messages=[ 36 | {"role": "user", "content": prompt}, 37 | ], 38 | temperature=0.0, 39 | is_azure=is_azure, 40 | ) 41 | 42 | pattern = re.compile(r"\d+") 43 | match = pattern.search(response) 44 | if match: 45 | score = match.group() 46 | else: 47 | score = 0 48 | 49 | return float(score) / 10.0 50 | 51 | return factual_inconsistency_scale 52 | -------------------------------------------------------------------------------- /parea/evals/summary/likert_scale.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | import re 4 | 5 | from parea.evals.utils import call_openai 6 | from parea.schemas.log import Log 7 | 8 | 9 | def likert_scale_factory(article_field: Optional[str] = "article", model: Optional[str] = "gpt-4", is_azure: Optional[bool] = False) -> Callable[[Log], float]: 10 | """ 11 | This factory creates an evaluation function that grades the quality of a summary on a Likert scale from 1-5 along 12 | the dimensions of relevance, consistency, fluency, and coherence. It is based on the paper 13 | [Human-like Summarization Evaluation with ChatGPT](https://arxiv.org/abs/2304.02554) which finds that using `gpt-3.5-0301` 14 | leads to a highest correlation with human expert judgment when grading summaries on a Likert scale from 1-5 than baseline 15 | methods. Noteworthy is that [BARTScore](https://arxiv.org/abs/2106.11520) was very competitive to `gpt-3.5-0301`. 16 | 17 | Args: 18 | is_azure: Whether to use the Azure API. Defaults to False. 19 | article_field: The key name/field used for the content which should be summarized. Defaults to "article". 20 | model: The model which should be used for grading. Currently, only supports OpenAI chat models. Defaults to "gpt-4". 21 | 22 | Returns: 23 | Callable[[Log], float]: A function that takes a log as input and returns a score between 0 and 1 indicating 24 | the quality of the summary on a Likert scale from 1-5 along the dimensions of relevance, consistency, fluency, and coherence. 25 | """ 26 | 27 | def likert_scale(log: Log) -> float: 28 | article = log.inputs[article_field] 29 | output = log.output 30 | prompt = f"""Evaluate the quality of summaries written for a news article. Rate each summary on four dimensions: relevance, consistency, fluency, and coherence. You should rate on a scale from 1 (worst) to 5 (best). 31 | 32 | Definitions are as follows: 33 | Relevance: The rating measures how well the summary captures the key points of the article. Consider whether all and only the important aspects are contained in the summary. 34 | Consistency: The rating measures whether the facts in the summary are consistent with the facts in the original article. Consider whether the summary does reproduce all facts accurately and does not make up untrue information. 35 | Fluency: This rating measures the quality of individual sentences, whether they are well-written and grammatically correct. Consider the quality of individual sentences. 36 | Coherence: The rating measures the quality of all sentences collectively, to fit together and sound natural. Consider the quality of the summary as a whole. 37 | 38 | The article and the summary are given below: 39 | Article: {article} 40 | Summary: {output}""" 41 | response = call_openai( 42 | model=model, 43 | messages=[ 44 | {"role": "user", "content": prompt}, 45 | ], 46 | temperature=0.0, 47 | is_azure=is_azure, 48 | ) 49 | 50 | # extract the scores 51 | pattern = re.compile(r"\d+") 52 | matches = pattern.findall(response) 53 | if matches: 54 | scores = matches 55 | else: 56 | scores = [0, 0, 0, 0] 57 | 58 | # normalize the scores 59 | scores = [float(score) / 5.0 for score in scores] 60 | 61 | # average the scores 62 | return sum(scores) / len(scores) 63 | 64 | return likert_scale 65 | -------------------------------------------------------------------------------- /parea/experiment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/parea/experiment/__init__.py -------------------------------------------------------------------------------- /parea/experiment/cli.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import argparse 4 | import csv 5 | import os 6 | import sys 7 | import traceback 8 | from importlib import util 9 | 10 | from .experiment import _experiments 11 | 12 | 13 | def load_from_path(module_path): 14 | # Ensure the directory of user-provided script is in the system path 15 | dir_name = os.path.dirname(module_path) 16 | if dir_name not in sys.path: 17 | sys.path.insert(0, dir_name) 18 | 19 | module_name = os.path.basename(module_path) 20 | spec = util.spec_from_file_location(module_name, module_path) 21 | module = util.module_from_spec(spec) 22 | spec.loader.exec_module(module) 23 | 24 | if spec.name not in sys.modules: 25 | sys.modules[spec.name] = module 26 | 27 | 28 | def read_input_file(file_path) -> List[dict]: 29 | with open(file_path) as file: 30 | reader = csv.DictReader(file) 31 | inputs = list(reader) 32 | return inputs 33 | 34 | 35 | def experiment(args): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument("file", help="Path to the experiment", type=str) 38 | parser.add_argument("--run_name", help="Name of the experiment run", type=str, default=None) 39 | 40 | parsed_args = parser.parse_args(args) 41 | 42 | try: 43 | load_from_path(parsed_args.file) 44 | except Exception as e: 45 | print(f"Error loading function: {e}\n", file=sys.stderr) 46 | traceback.print_exc() 47 | sys.exit(1) 48 | 49 | for _experiment in _experiments: 50 | _experiment.run(parsed_args.run_name) 51 | -------------------------------------------------------------------------------- /parea/experiment/datasets.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from parea.helpers import gen_random_name 4 | from parea.schemas.models import CreateTestCase, CreateTestCaseCollection 5 | from parea.utils.universal_encoder import json_dumps 6 | 7 | 8 | def create_test_collection(data: List[Dict[str, Any]], name: Optional[str] = None) -> CreateTestCaseCollection: 9 | """Create a test case collection from a dictionary of test cases. 10 | Args: 11 | data: list of key-value pairs where keys represent input names. 12 | Each item in the list represent a test case row. 13 | Target and Tags are reserved keys. There can only be one target and tags key per dict item. 14 | If target is present it will represent the target/expected response for the inputs. 15 | If tags are present they must be a list of json_serializable values. 16 | name: A unique name for the test collection. If not provided a random name will be generated. 17 | 18 | Returns: CreateTestCaseCollection 19 | """ 20 | if not name: 21 | name = gen_random_name() 22 | 23 | column_names = list({k for row in data for k in row.keys() if k not in ["target", "tags"]}) 24 | test_cases = create_test_cases(data) 25 | 26 | return CreateTestCaseCollection(name=name, column_names=column_names, test_cases=test_cases) 27 | 28 | 29 | def create_test_cases(data: List[Dict[str, Any]]) -> List[CreateTestCase]: 30 | """Create a list of test cases from a dictionary. 31 | Args: 32 | data: list of key-value pairs where keys represent input names. 33 | Each item in the list represent a test case row. 34 | Target and Tags are reserved keys. There can only be one target and tags key per dict item. 35 | If target is present it will represent the target/expected response for the inputs. 36 | If tags are present they must be a list of json_serializable values. 37 | 38 | Returns: List[CreateTestCase] 39 | """ 40 | test_cases: List[CreateTestCase] = [] 41 | for row in data: 42 | inputs: Dict[str, str] = {} 43 | target: Optional[str] = None 44 | tags: list = [] 45 | for k, v in row.items(): 46 | if k == "target": 47 | if target is not None: 48 | print("There can only be one target key per test case. Only the first target will be used.") 49 | target = json_dumps(v) 50 | elif k == "tags": 51 | if not isinstance(v, list): 52 | raise ValueError("Tags must be a list of json serializable values.") 53 | if tags: 54 | print("There can only be one tags key per test case. Only the first set of tags will be used.") 55 | tags = [tag if isinstance(tag, str) else json_dumps(tag) for tag in v] 56 | else: 57 | inputs[k] = v if isinstance(v, str) else json_dumps(v) 58 | test_cases.append(CreateTestCase(inputs=inputs, target=target, tags=tags)) 59 | 60 | return test_cases 61 | -------------------------------------------------------------------------------- /parea/experiment/dvc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | from parea.constants import PAREA_DVC_DIR, PAREA_DVC_METRICS_FILE, PAREA_DVC_YAML_FILE 5 | from parea.utils.universal_encoder import json_dumps 6 | 7 | 8 | def is_git_repo(): 9 | try: 10 | subprocess.check_output(["git", "branch"], stderr=subprocess.STDOUT) 11 | return True 12 | except: 13 | return False 14 | 15 | 16 | def save_results_to_dvc_if_init(experiment_name: str, metrics: dict): 17 | if not parea_dvc_initialized(only_check=True): 18 | return 19 | write_metrics_to_dvc(metrics) 20 | try: 21 | subprocess.run(["dvc", "exp", "save", "-n", experiment_name], check=True) 22 | except subprocess.CalledProcessError as e: 23 | print(f"Failed to save results to DVC: {e}") 24 | 25 | 26 | def write_metrics_to_dvc(metrics: dict): 27 | git_root = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], text=True, stderr=subprocess.STDOUT).strip() 28 | with open(os.path.join(git_root, PAREA_DVC_METRICS_FILE), "w") as f: 29 | f.write(json_dumps(metrics, indent=2)) 30 | 31 | 32 | def _check_has_been_committed(git_root: str, file: str) -> bool: 33 | output = subprocess.check_output(["git", "log", "--", file], cwd=git_root, text=True, stderr=subprocess.STDOUT) 34 | return output and len(output) > 0 35 | 36 | 37 | def parea_dvc_initialized(only_check: bool) -> bool: 38 | print_fn = print if not only_check else lambda *args, **kwargs: None 39 | 40 | if not is_git_repo(): 41 | print_fn("Git repository is not found. Please run `git init` to initialize a git repository.") 42 | return False 43 | 44 | git_root = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], text=True, stderr=subprocess.STDOUT).strip() 45 | 46 | # make sure DVC is initialized 47 | if not os.path.exists(os.path.join(git_root, ".dvc")): 48 | print_fn("DVC is not initialized. Please run `dvc init` to initialize DVC.") 49 | return False 50 | 51 | # make sure dvc.yaml and metrics.json exist in .parea directory 52 | if not os.path.exists(os.path.join(git_root, PAREA_DVC_YAML_FILE)): 53 | if only_check: 54 | return False 55 | else: 56 | print_fn(f"{PAREA_DVC_YAML_FILE} is not found. Creating the file.") 57 | if not os.path.exists(os.path.join(git_root, PAREA_DVC_DIR)): 58 | os.mkdir(os.path.join(git_root, PAREA_DVC_DIR)) 59 | with open(os.path.join(git_root, PAREA_DVC_YAML_FILE), "w") as f: 60 | f.write("metrics:\n - metrics.json\n") 61 | subprocess.run(["git", "add", PAREA_DVC_YAML_FILE], cwd=git_root, check=True) 62 | if not os.path.exists(os.path.join(git_root, PAREA_DVC_METRICS_FILE)): 63 | if only_check: 64 | return False 65 | else: 66 | print_fn(f"{PAREA_DVC_METRICS_FILE} is not found. Creating the file.") 67 | if not os.path.exists(os.path.join(git_root, PAREA_DVC_DIR)): 68 | os.mkdir(os.path.join(git_root, PAREA_DVC_DIR)) 69 | write_metrics_to_dvc({}) 70 | subprocess.run(["git", "add", PAREA_DVC_METRICS_FILE], cwd=git_root, check=True) 71 | 72 | # make sure dvc.yaml and metrics.json are committed 73 | dvc_yaml_file_missing = not _check_has_been_committed(git_root, PAREA_DVC_YAML_FILE) 74 | dvc_metrics_file_missing = not _check_has_been_committed(git_root, PAREA_DVC_METRICS_FILE) 75 | if dvc_metrics_file_missing: 76 | print_fn(f"{PAREA_DVC_METRICS_FILE} is not committed. Please to commit the file to your git history.") 77 | if dvc_yaml_file_missing: 78 | print_fn(f"{PAREA_DVC_YAML_FILE} is not committed. Please to commit the file to your git history.") 79 | if dvc_metrics_file_missing or dvc_yaml_file_missing: 80 | return False 81 | 82 | print_fn("Parea's DVC integration is initialized.") 83 | return True 84 | -------------------------------------------------------------------------------- /parea/parea_logger.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | 3 | import json 4 | import logging 5 | import os 6 | 7 | from attrs import asdict, define, field 8 | from cattrs import structure 9 | 10 | from parea.api_client import HTTPClient 11 | from parea.constants import PAREA_OS_ENV_EXPERIMENT_UUID 12 | from parea.helpers import serialize_metadata_values 13 | from parea.schemas.log import TraceIntegrations 14 | from parea.schemas.models import CreateGetProjectResponseSchema, TraceLog, UpdateLog 15 | from parea.utils.trace_integrations.langchain_utils import _dumps_json 16 | from parea.utils.universal_encoder import json_dumps 17 | 18 | logger = logging.getLogger() 19 | 20 | LOG_ENDPOINT = "/trace_log" 21 | VENDOR_LOG_ENDPOINT = "/trace_log/{vendor}" 22 | 23 | 24 | @define 25 | class PareaLogger: 26 | _client: HTTPClient = field(init=False, default=None) 27 | _project_uuid: str = field(init=False, default=None) 28 | _project_name: str = field(init=False, default=None) 29 | 30 | def set_client(self, client: HTTPClient) -> None: 31 | self._client = client 32 | 33 | def set_project_uuid(self, project_uuid: str, project_name: str) -> None: 34 | self._project_uuid = project_uuid 35 | self._project_name = project_name 36 | 37 | def _get_project_uuid(self) -> str: 38 | try: 39 | if not self._project_uuid: 40 | self._project_uuid = self._create_or_get_project(self._project_name or "default").uuid 41 | return self._project_uuid 42 | except Exception as e: 43 | logger.error(f"PareaLogger: Error getting project uuid for project {self._project_name}: {e}") 44 | raise 45 | 46 | def _create_or_get_project(self, name: str) -> CreateGetProjectResponseSchema: 47 | r = self._client.request( 48 | "POST", 49 | "/project", 50 | data={"name": name}, 51 | ) 52 | return structure(r.json(), CreateGetProjectResponseSchema) 53 | 54 | def update_log(self, data: UpdateLog) -> None: 55 | data = serialize_metadata_values(data) 56 | self._client.request( 57 | "PUT", 58 | LOG_ENDPOINT, 59 | data=asdict(data), 60 | ) 61 | 62 | def record_log(self, data: TraceLog) -> None: 63 | data = serialize_metadata_values(data) 64 | data.project_uuid = self._get_project_uuid() 65 | self._client.request( 66 | "POST", 67 | LOG_ENDPOINT, 68 | data=asdict(data), 69 | ) 70 | 71 | async def arecord_log(self, data: TraceLog) -> None: 72 | data = serialize_metadata_values(data) 73 | data.project_uuid = self._get_project_uuid() 74 | await self._client.request_async( 75 | "POST", 76 | LOG_ENDPOINT, 77 | data=asdict(data), 78 | ) 79 | 80 | def default_log(self, data: TraceLog) -> None: 81 | if self._client: 82 | if data.target: 83 | data.target = json_dumps(data.target) 84 | self.record_log(data) 85 | 86 | def record_vendor_log(self, data: Dict[str, Any], vendor: TraceIntegrations) -> None: 87 | data["project_uuid"] = self._get_project_uuid() 88 | if experiment_uuid := os.getenv(PAREA_OS_ENV_EXPERIMENT_UUID, None): 89 | data["experiment_uuid"] = experiment_uuid 90 | self._client.add_integration("langchain") 91 | self._client.request( 92 | "POST", 93 | VENDOR_LOG_ENDPOINT.format(vendor=vendor.value), 94 | data=json.loads(_dumps_json(data)), # uuid is not serializable 95 | ) 96 | 97 | async def arecord_vendor_log(self, data: Dict[str, Any], vendor: TraceIntegrations) -> None: 98 | data["project_uuid"] = self._get_project_uuid() 99 | if experiment_uuid := os.getenv(PAREA_OS_ENV_EXPERIMENT_UUID, None): 100 | data["experiment_uuid"] = experiment_uuid 101 | self._client.add_integration("langchain") 102 | await self._client.request_async( 103 | "POST", 104 | VENDOR_LOG_ENDPOINT.format(vendor=vendor.value), 105 | data=json.loads(_dumps_json(data)), # uuid is not serializable 106 | ) 107 | 108 | 109 | parea_logger = PareaLogger() 110 | -------------------------------------------------------------------------------- /parea/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | from .log import * 2 | from .models import * 3 | -------------------------------------------------------------------------------- /parea/types.py: -------------------------------------------------------------------------------- 1 | from openai import AsyncStream, Stream 2 | 3 | 4 | class OpenAIAsyncStreamWrapper: 5 | def __init__(self, async_stream: AsyncStream, accumulator, info_from_response, update_accumulator_streaming, final_processing_and_logging): 6 | self._async_stream = async_stream 7 | self._final_processing_and_logging = final_processing_and_logging 8 | self._update_accumulator_streaming = update_accumulator_streaming 9 | self._accumulator = accumulator 10 | self._info_from_response = info_from_response 11 | 12 | def __getattr__(self, attr): 13 | # delegate attribute access to the original async_stream 14 | return getattr(self._async_stream, attr) 15 | 16 | async def __aiter__(self): 17 | async for chunk in self._async_stream: 18 | self._update_accumulator_streaming(self._accumulator, self._info_from_response, chunk) 19 | yield chunk 20 | 21 | self._final_processing_and_logging(self._accumulator, self._info_from_response) 22 | 23 | 24 | class OpenAIStreamWrapper: 25 | def __init__(self, stream: Stream, accumulator, info_from_response, update_accumulator_streaming, final_processing_and_logging): 26 | self._stream = stream 27 | self._final_processing_and_logging = final_processing_and_logging 28 | self._update_accumulator_streaming = update_accumulator_streaming 29 | self._accumulator = accumulator 30 | self._info_from_response = info_from_response 31 | 32 | def __getattr__(self, attr): 33 | # delegate attribute access to the original async_stream 34 | return getattr(self._stream, attr) 35 | 36 | def __iter__(self): 37 | for chunk in self._stream: 38 | self._update_accumulator_streaming(self._accumulator, self._info_from_response, chunk) 39 | yield chunk 40 | 41 | self._final_processing_and_logging(self._accumulator, self._info_from_response) 42 | -------------------------------------------------------------------------------- /parea/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/parea/utils/__init__.py -------------------------------------------------------------------------------- /parea/utils/trace_integrations/langchain.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Union 2 | 3 | import logging 4 | from uuid import UUID 5 | 6 | from langchain_core.tracers import BaseTracer 7 | from langchain_core.tracers.schemas import ChainRun, LLMRun, Run, ToolRun 8 | 9 | from parea.helpers import is_logging_disabled 10 | from parea.parea_logger import parea_logger 11 | from parea.schemas import UpdateTraceScenario 12 | from parea.schemas.log import TraceIntegrations 13 | from parea.utils.trace_utils import fill_trace_data, get_current_trace_id, get_root_trace_id 14 | 15 | logger = logging.getLogger() 16 | 17 | 18 | class PareaAILangchainTracer(BaseTracer): 19 | parent_trace_id: UUID 20 | _parea_root_trace_id: str = None 21 | _parea_parent_trace_id: str = None 22 | _session_id: Optional[str] = None 23 | _tags: List[str] = [] 24 | _metadata: Dict[str, Any] = {} 25 | _end_user_identifier: Optional[str] = None 26 | _deployment_id: Optional[str] = None 27 | _log_sample_rate: Optional[float] = 1.0 28 | 29 | def __init__( 30 | self, 31 | session_id: Optional[str] = None, 32 | tags: Optional[List[str]] = None, 33 | metadata: Optional[Dict[str, Any]] = None, 34 | end_user_identifier: Optional[str] = None, 35 | deployment_id: Optional[str] = None, 36 | log_sample_rate: Optional[float] = 1.0, 37 | **kwargs: Any, 38 | ): 39 | super().__init__(**kwargs) 40 | self._session_id = session_id 41 | self._end_user_identifier = end_user_identifier 42 | self._deployment_id = deployment_id 43 | self._log_sample_rate = log_sample_rate 44 | if tags: 45 | self._tags = tags 46 | if metadata: 47 | self._metadata = metadata 48 | 49 | def _persist_run(self, run: Union[Run, LLMRun, ChainRun, ToolRun]) -> None: 50 | if is_logging_disabled(): 51 | return 52 | try: 53 | self.parent_trace_id = run.id 54 | # using .dict() since langchain Run class currently set to Pydantic v1 55 | data = run.dict() 56 | data["_parea_root_trace_id"] = self._parea_root_trace_id or None 57 | data["_session_id"] = self._session_id 58 | data["_tags"] = self._tags 59 | data["_metadata"] = self._metadata 60 | data["_end_user_identifier"] = self._end_user_identifier 61 | data["_deployment_id"] = self._deployment_id 62 | data["_log_sample_rate"] = self._log_sample_rate 63 | # check if run has an attribute execution order 64 | if (hasattr(run, "execution_order") and run.execution_order == 1) or run.parent_run_id is None: 65 | data["_parea_parent_trace_id"] = self._parea_parent_trace_id or None 66 | parea_logger.record_vendor_log(data, TraceIntegrations.LANGCHAIN) 67 | except Exception as e: 68 | logger.exception(f"Error occurred while logging langchain run: {e}", stack_info=True) 69 | 70 | def get_parent_trace_id(self) -> UUID: 71 | return self.parent_trace_id 72 | 73 | def _on_run_create(self, run: Run) -> None: 74 | if (hasattr(run, "execution_order") and run.execution_order == 1) or run.parent_run_id is None: 75 | # need to check if any traces already exist 76 | self._parea_root_trace_id = get_root_trace_id() 77 | if parent_trace_id := get_current_trace_id(): 78 | self._parea_parent_trace_id = parent_trace_id 79 | fill_trace_data(str(run.id), {"parent_trace_id": parent_trace_id}, UpdateTraceScenario.LANGCHAIN_CHILD) 80 | 81 | def _on_llm_end(self, run: Run) -> None: 82 | self._persist_run(run) 83 | 84 | def _on_chain_end(self, run: Run) -> None: 85 | self._persist_run(run) 86 | -------------------------------------------------------------------------------- /parea/utils/trace_integrations/wrapt_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from copy import copy, deepcopy 4 | 5 | from wrapt import BoundFunctionWrapper, FunctionWrapper 6 | 7 | 8 | class CopyableBoundFunctionWrapper(BoundFunctionWrapper): # type: ignore 9 | """ 10 | A bound function wrapper that can be copied and deep-copied. When used to 11 | wrap a class method, this allows the entire class to be copied and 12 | deep-copied. 13 | 14 | For reference, see 15 | https://github.com/GrahamDumpleton/wrapt/issues/86#issuecomment-426161271 16 | and 17 | https://wrapt.readthedocs.io/en/master/wrappers.html#custom-function-wrappers 18 | """ 19 | 20 | def __copy__(self) -> "CopyableBoundFunctionWrapper": 21 | return CopyableBoundFunctionWrapper(copy(self.__wrapped__), self._self_instance, self._self_wrapper) 22 | 23 | def __deepcopy__(self, memo: Dict[Any, Any]) -> "CopyableBoundFunctionWrapper": 24 | return CopyableBoundFunctionWrapper(deepcopy(self.__wrapped__, memo), self._self_instance, self._self_wrapper) 25 | 26 | 27 | class CopyableFunctionWrapper(FunctionWrapper): # type: ignore 28 | """ 29 | A function wrapper that can be copied and deep-copied. When used to wrap a 30 | class method, this allows the entire class to be copied and deep-copied. 31 | 32 | For reference, see 33 | https://github.com/GrahamDumpleton/wrapt/issues/86#issuecomment-426161271 34 | and 35 | https://wrapt.readthedocs.io/en/master/wrappers.html#custom-function-wrappers 36 | """ 37 | 38 | __bound_function_wrapper__ = CopyableBoundFunctionWrapper 39 | 40 | def __copy__(self) -> "CopyableFunctionWrapper": 41 | return CopyableFunctionWrapper(copy(self.__wrapped__), self._self_wrapper) 42 | 43 | def __deepcopy__(self, memo: Dict[Any, Any]) -> "CopyableFunctionWrapper": 44 | return CopyableFunctionWrapper(deepcopy(self.__wrapped__, memo), self._self_wrapper) 45 | -------------------------------------------------------------------------------- /parea/wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | from parea.wrapper.openai.openai import OpenAIWrapper 2 | 3 | from .openai_raw_api_tracer import get_formatted_openai_response 4 | from .wrapper import Wrapper 5 | -------------------------------------------------------------------------------- /parea/wrapper/anthropic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/parea/wrapper/anthropic/__init__.py -------------------------------------------------------------------------------- /parea/wrapper/anthropic/stream_wrapper.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from types import TracebackType 4 | from typing import Callable 5 | 6 | from anthropic import AsyncMessageStreamManager, AsyncStream, MessageStreamManager, Stream 7 | from anthropic.types import Message 8 | 9 | 10 | class AnthropicStreamWrapper: 11 | def __init__(self, stream: Stream, accumulator, info_from_response, update_accumulator_streaming, final_processing_and_logging): 12 | self._stream = stream 13 | self._final_processing_and_logging = final_processing_and_logging 14 | self._update_accumulator_streaming = update_accumulator_streaming 15 | self._accumulator = accumulator 16 | self._info_from_response = info_from_response 17 | 18 | def __getattr__(self, attr): 19 | # delegate attribute access to the original stream 20 | return getattr(self._stream, attr) if hasattr(self._stream, attr) else None 21 | 22 | def __iter__(self): 23 | for chunk in self._stream: 24 | self._update_accumulator_streaming(self._accumulator, self._info_from_response, chunk) 25 | yield chunk 26 | 27 | self._final_processing_and_logging(self._accumulator, self._info_from_response) 28 | 29 | 30 | class AnthropicAsyncStreamWrapper: 31 | def __init__(self, stream: AsyncStream, accumulator, info_from_response, update_accumulator_streaming, final_processing_and_logging): 32 | self._stream = stream 33 | self._final_processing_and_logging = final_processing_and_logging 34 | self._update_accumulator_streaming = update_accumulator_streaming 35 | self._accumulator = accumulator 36 | self._info_from_response = info_from_response 37 | 38 | def __getattr__(self, attr): 39 | # delegate attribute access to the original async_stream 40 | return getattr(self._stream, attr) if hasattr(self._stream, attr) else None 41 | 42 | async def __aiter__(self): 43 | async for chunk in self._stream: 44 | self._update_accumulator_streaming(self._accumulator, self._info_from_response, chunk) 45 | yield chunk 46 | 47 | self._final_processing_and_logging(self._accumulator, self._info_from_response) 48 | 49 | 50 | class MessageStreamManagerWrapper(MessageStreamManager): 51 | def __init__(self, msm_instance: MessageStreamManager, resolve_and_log: Callable): 52 | self._msm_instance = msm_instance 53 | self._resolve_and_log = resolve_and_log 54 | 55 | def __getattr__(self, attr): 56 | if attr != "_private_stream": 57 | return getattr(self._msm_instance, attr) 58 | else: 59 | return self._private_stream 60 | 61 | def __enter__(self): 62 | self._private_stream = self._msm_instance.__enter__() 63 | return self._private_stream 64 | 65 | def __exit__( 66 | self, 67 | exc_type: type[BaseException] | None, 68 | exc: BaseException | None, 69 | exc_tb: TracebackType | None, 70 | ) -> None: 71 | m: Message = self._private_stream.get_final_message() 72 | self._resolve_and_log(m) 73 | return super().__exit__(exc_type, exc, exc_tb) 74 | 75 | 76 | class MessageAsyncStreamManagerWrapper(AsyncMessageStreamManager): 77 | def __init__(self, msm_instance: AsyncMessageStreamManager, resolve_and_log: Callable): 78 | self._msm_instance = msm_instance 79 | self._resolve_and_log = resolve_and_log 80 | 81 | def __getattr__(self, attr): 82 | if attr != "_private_stream": 83 | return getattr(self._msm_instance, attr) 84 | else: 85 | return self._private_stream 86 | 87 | async def __aenter__(self): 88 | self._private_stream = await self._msm_instance.__aenter__() 89 | return self._private_stream 90 | 91 | async def __aexit__( 92 | self, 93 | exc_type: type[BaseException] | None, 94 | exc: BaseException | None, 95 | exc_tb: TracebackType | None, 96 | ) -> None: 97 | m: Message = await self._private_stream.get_final_message() 98 | self._resolve_and_log(m) 99 | return await super().__aexit__(exc_type, exc, exc_tb) 100 | -------------------------------------------------------------------------------- /parea/wrapper/openai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parea-ai/parea-sdk-py/968486f9bf7aa4741bb307739f9dec573b30bc95/parea/wrapper/openai/__init__.py -------------------------------------------------------------------------------- /parea/wrapper/openai_raw_api_tracer.py: -------------------------------------------------------------------------------- 1 | from typing import Any, AsyncGenerator, Generator 2 | 3 | import json 4 | from collections import defaultdict 5 | 6 | from parea.constants import CHUNK_DONE_SENTINEL 7 | from parea.utils.trace_utils import get_current_trace_id 8 | from parea.utils.universal_encoder import json_dumps 9 | from parea.wrapper.utils import convert_openai_raw_stream_to_log 10 | 11 | 12 | def process_stream_and_yield(response, data: dict) -> Generator: 13 | trace_id = get_current_trace_id() 14 | accumulated_content = [] 15 | accumulated_tools = defaultdict(lambda: {"function": {"arguments": [], "name": ""}}) 16 | 17 | for chunk in response.iter_lines(): 18 | format_and_accumulate_streaming_chunk(trace_id, accumulated_content, accumulated_tools, data, chunk) 19 | yield chunk 20 | 21 | 22 | async def aprocess_stream_and_yield(response, data: dict) -> AsyncGenerator: 23 | trace_id = get_current_trace_id() 24 | accumulated_content = [] 25 | accumulated_tools = defaultdict(lambda: {"function": {"arguments": [], "name": ""}}) 26 | 27 | async for chunk in response.aiter_lines(): 28 | format_and_accumulate_streaming_chunk(trace_id, accumulated_content, accumulated_tools, data, chunk) 29 | yield chunk 30 | 31 | 32 | def format_and_accumulate_streaming_chunk(trace_id: str, accumulated_content: list, accumulated_tools: dict, data: dict, chunk: Any) -> None: 33 | from openai.types.chat import ChatCompletionChunk 34 | 35 | try: 36 | chunk = chunk.decode("utf-8") 37 | except AttributeError: 38 | pass 39 | if chunk == CHUNK_DONE_SENTINEL: 40 | # when done send accumulated content to be logged in background thread 41 | convert_openai_raw_stream_to_log(accumulated_content, accumulated_tools, data, trace_id) 42 | else: 43 | chunk_data = raw_chunk_to_chat_completion_chunk(chunk) 44 | if isinstance(chunk_data, ChatCompletionChunk): 45 | for choice in chunk_data.choices or []: 46 | delta = choice.delta 47 | 48 | if delta.content: 49 | accumulated_content.append(delta.content) 50 | 51 | if delta.function_call: 52 | accumulated_tools[0]["function"]["name"] = delta.function_call.name or accumulated_tools[0]["function"]["name"] 53 | if delta.function_call.arguments: 54 | accumulated_tools[0]["function"]["arguments"].append(delta.function_call.arguments) 55 | 56 | for tool_call in delta.tool_calls or []: 57 | tool_id = tool_call.index 58 | accumulated_tools[tool_id]["function"]["name"] = tool_call.function.name or accumulated_tools[tool_id]["function"]["name"] 59 | if tool_call.function.arguments: 60 | accumulated_tools[tool_id]["function"]["arguments"].append(tool_call.function.arguments) 61 | 62 | 63 | def raw_chunk_to_chat_completion_chunk(chunk: str): 64 | from openai.types.chat import ChatCompletionChunk 65 | 66 | try: 67 | return ChatCompletionChunk(**json.loads(chunk[6:].strip())) 68 | except json.JSONDecodeError: 69 | return chunk 70 | 71 | 72 | def get_formatted_openai_response(r): 73 | # helper function to format the response from OpenAI 74 | if r["choices"][0]["message"].get("content"): 75 | return r["choices"][0]["message"]["content"].strip() 76 | elif r["choices"][0]["message"].get("function_call"): 77 | function_call = r["choices"][0]["message"]["function_call"] 78 | formatted_function_call = { 79 | "name": function_call["name"], 80 | "arguments": json.loads(function_call["arguments"]), 81 | } 82 | return json_dumps(formatted_function_call, indent=4) 83 | elif r["choices"][0]["message"].get("tool_calls"): 84 | formatted_tool_calls = [] 85 | tool_calls = r["choices"][0]["message"]["tool_calls"] 86 | for tool_call in tool_calls: 87 | formatted_tool_call = { 88 | "name": tool_call["function"]["name"], 89 | "arguments": json.loads(tool_call["function"]["arguments"]), 90 | } 91 | formatted_tool_calls.append(formatted_tool_call) 92 | return json_dumps(formatted_tool_calls, indent=4) 93 | return json_dumps(r, indent=4) 94 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [darglint] 2 | # https://github.com/terrencepreilly/darglint 3 | strictness = long 4 | docstring_style = google 5 | -------------------------------------------------------------------------------- /tests/test_import.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import pkgutil 3 | 4 | import pytest 5 | 6 | 7 | def test_imports(): 8 | try: 9 | package = importlib.import_module("parea") 10 | for _, module_name, _ in pkgutil.iter_modules(package.__path__): 11 | importlib.import_module(f"parea.{module_name}") 12 | except ImportError: 13 | pytest.fail("Import failed", pytrace=False) 14 | --------------------------------------------------------------------------------