├── tests ├── __init__.py ├── conftest.py ├── show_coverage.py ├── test_json_encoder.py └── test_plugin.py ├── .python-version ├── .gitignore ├── src └── pytest_evals │ ├── __init__.py │ ├── models.py │ ├── json_encoder.py │ ├── ipython_extension.py │ └── plugin.py ├── .pre-commit-config.yaml ├── example ├── t.ipynb ├── example_test.py ├── example_judge_test.py ├── example_notebook.ipynb └── example_notebook_advanced.ipynb ├── LICENSE ├── .github └── workflows │ ├── test.yaml │ ├── pr-triage.yaml │ └── publish.yaml ├── pyproject.toml ├── CHANGELOG.md ├── CONTRIBUTING.md └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | pytest_plugins = ["pytester"] 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python-generated files 2 | __pycache__/ 3 | *.py[oc] 4 | build/ 5 | dist/ 6 | wheels/ 7 | *.egg-info 8 | 9 | coverage/ 10 | .coverage 11 | *.py,cover 12 | coverage.xml 13 | junit/ 14 | test-out/ 15 | 16 | .ipynb_checkpoints/ 17 | 18 | # Virtual environments 19 | .venv 20 | 21 | # IDE and system files 22 | .idea/ 23 | .vscode/ 24 | .DS_Store 25 | 26 | 27 | .env 28 | example/experiment_results.csv -------------------------------------------------------------------------------- /src/pytest_evals/__init__.py: -------------------------------------------------------------------------------- 1 | """A pytest plugin for running and analyzing LLM evaluation tests.""" 2 | 3 | from .plugin import ( 4 | eval_bag, 5 | eval_bag_results, 6 | eval_results, 7 | eval_analysis_marker, 8 | eval_marker, 9 | out_path, 10 | ) 11 | from .models import EvalResults, EvalBag 12 | from .ipython_extension import load_ipython_extension 13 | 14 | __all__ = [ 15 | # Core functionality 16 | "EvalResults", 17 | "EvalBag", 18 | "eval_bag", 19 | "eval_bag_results", 20 | "eval_results", 21 | "out_path", 22 | # Marker utilities 23 | "eval_analysis_marker", 24 | "eval_marker", 25 | # Extensions 26 | "load_ipython_extension", 27 | ] 28 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.3.0 4 | hooks: 5 | - id: ruff 6 | args: [--fix] 7 | - id: ruff-format 8 | 9 | - repo: https://github.com/RobertCraigie/pyright-python 10 | rev: v1.1.392.post0 11 | hooks: 12 | - id: pyright 13 | 14 | - repo: local 15 | hooks: 16 | - id: uv-sync 17 | name: UV sync dependencies 18 | entry: uv sync --all-extras --dev 19 | language: system 20 | pass_filenames: false 21 | 22 | - id: pytest 23 | name: Run tests with coverage 24 | entry: coverage run --source=pytest_evals -m pytest 25 | language: system 26 | pass_filenames: false 27 | stages: [pre-commit] 28 | types: [python] 29 | 30 | default_install_hook_types: [pre-commit, commit-msg] 31 | default_stages: [pre-commit, pre-push] -------------------------------------------------------------------------------- /example/t.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "metadata": {}, 5 | "cell_type": "code", 6 | "outputs": [], 7 | "execution_count": null, 8 | "source": "%load_ext pytest_evals", 9 | "id": "ec80ea2a86a4bd55" 10 | }, 11 | { 12 | "metadata": {}, 13 | "cell_type": "code", 14 | "outputs": [], 15 | "execution_count": null, 16 | "source": [ 17 | "%%ipytest_evals --run-eval\n", 18 | "from time import sleep\n", 19 | "\n", 20 | "import pytest\n", 21 | "\n", 22 | "\n", 23 | "@pytest.mark.eval(name=\"computer_classifier\")\n", 24 | "@pytest.mark.parametrize(\"n\", range(1,5))\n", 25 | "def test_classifier(n):\n", 26 | " sleep(10*n)" 27 | ], 28 | "id": "7e540c467649a289" 29 | }, 30 | { 31 | "metadata": {}, 32 | "cell_type": "code", 33 | "outputs": [], 34 | "execution_count": null, 35 | "source": "", 36 | "id": "b7e2bf97100392a3" 37 | } 38 | ], 39 | "metadata": {}, 40 | "nbformat": 5, 41 | "nbformat_minor": 9 42 | } 43 | -------------------------------------------------------------------------------- /src/pytest_evals/models.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | from typing import Any, Mapping 3 | 4 | from pytest_harvest import ResultsBag 5 | 6 | 7 | class EvalBag(ResultsBag): 8 | pass 9 | 10 | 11 | @dataclasses.dataclass 12 | class EvalResults: 13 | """Data class representing an evaluation result.""" 14 | 15 | eval_name: str 16 | status: str 17 | duration_ms: float 18 | test_params: dict[str, Any] 19 | test_name: str 20 | result: EvalBag 21 | 22 | @classmethod 23 | def from_result_bag(cls, item: Mapping[str, Any]) -> "EvalResults": 24 | """Create an EvalResult instance from a result bag item.""" 25 | return cls( 26 | eval_name=item["eval_name"], 27 | status=item["status"], 28 | duration_ms=item["duration_ms"], 29 | test_params=item["params"], 30 | test_name=item["pytest_obj_name"], 31 | result=EvalBag(item["fixtures"]["eval_bag"]) 32 | if "eval_bag" in item["fixtures"] 33 | else EvalBag(), 34 | ) 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Almog Baku 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - '**.py' 7 | workflow_call: 8 | 9 | permissions: 10 | contents: write 11 | issues: read 12 | checks: write 13 | pull-requests: write 14 | 15 | jobs: 16 | tests: 17 | name: "Run tests" 18 | runs-on: ubuntu-latest 19 | steps: 20 | - name: Checkout repository 21 | uses: actions/checkout@v4 22 | - name: Install uv 23 | uses: astral-sh/setup-uv@v5 24 | with: 25 | enable-cache: true 26 | - name: "Set up Python" 27 | uses: actions/setup-python@v5 28 | with: 29 | python-version-file: ".python-version" 30 | - name: Install the project 31 | run: uv sync --all-extras --dev 32 | - name: Test with pytest 33 | run: | 34 | uv run coverage run --source=pytest_evals -m pytest --junitxml=junit/test-results.xml 35 | uv run coverage xml 36 | - name: Get Cover 37 | uses: orgoro/coverage@v3 38 | if: github.event_name == 'pull_request' 39 | with: 40 | coverageFile: coverage.xml 41 | token: ${{ secrets.GITHUB_TOKEN }} 42 | - name: Publish Test Results 43 | uses: EnricoMi/publish-unit-test-result-action/macos@v2 44 | if: always() 45 | with: 46 | files: | 47 | junit/**/*.xml 48 | junit/**/*.trx 49 | junit/**/*.json 50 | -------------------------------------------------------------------------------- /src/pytest_evals/json_encoder.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import json 3 | from enum import Enum 4 | from typing import Callable 5 | 6 | try: 7 | from pydantic import BaseModel 8 | 9 | HAVE_PYDANTIC = True 10 | except ImportError: 11 | HAVE_PYDANTIC = False 12 | BaseModel = type(None) # Create a dummy type that won't match anything 13 | 14 | try: 15 | import pandas as pd 16 | 17 | def is_series(obj): # pyright: ignore [reportRedeclaration] 18 | return isinstance(obj, pd.Series) 19 | 20 | def is_dataframe(obj): # pyright: ignore [reportRedeclaration] 21 | return isinstance(obj, pd.DataFrame) 22 | 23 | HAVE_PANDAS = True 24 | except ImportError: 25 | HAVE_PANDAS = False 26 | 27 | def is_series(obj): 28 | return False 29 | 30 | def is_dataframe(obj): 31 | return False 32 | 33 | 34 | class AdvancedJsonEncoder(json.JSONEncoder): 35 | """JSON encoder that handles Pydantic models (if installed) and other special types.""" 36 | 37 | # noinspection PyBroadException 38 | def default(self, o): 39 | if HAVE_PYDANTIC and isinstance(o, BaseModel): 40 | return json.loads(o.model_dump_json()) # type: ignore 41 | if dataclasses.is_dataclass(o): 42 | return dataclasses.asdict(o) # type: ignore 43 | if isinstance(o, Enum): 44 | return o.value 45 | if isinstance(o, Callable): 46 | try: 47 | return f"<{o.__module__}.{o.__name__}>" 48 | except Exception: 49 | try: 50 | return f"<{o.__module__}.{o.__class__.__name__}>" 51 | except Exception: 52 | return repr(o) 53 | if isinstance(o, type(None)): 54 | return None 55 | if HAVE_PANDAS and is_series(o): 56 | return o.to_dict() 57 | if HAVE_PANDAS and is_dataframe(o): 58 | return o.to_dict(orient="records") 59 | if hasattr(o, "__repr__"): 60 | return repr(o) 61 | return super().default(o) 62 | -------------------------------------------------------------------------------- /.github/workflows/pr-triage.yaml: -------------------------------------------------------------------------------- 1 | name: "Pull Request Triage" 2 | on: 3 | # NB: using `pull_request_target` runs this in the context of 4 | # the base repository, so it has permission to upload to the checks API. 5 | # This means changes won't kick in to this file until merged onto the 6 | # main branch. 7 | pull_request_target: 8 | types: [ opened, edited, reopened, synchronize ] 9 | 10 | permissions: 11 | contents: read 12 | pull-requests: write 13 | issues: write 14 | 15 | jobs: 16 | triage: 17 | name: "Triage Pull Request" 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: codelytv/pr-size-labeler@v1 21 | with: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | xs_label: 'size/xs' 24 | xs_max_size: '15' 25 | s_label: 'size/s' 26 | s_max_size: '100' 27 | m_label: 'size/m' 28 | m_max_size: '500' 29 | l_label: 'size/l' 30 | l_max_size: '1000' 31 | xl_label: 'size/xl' 32 | fail_if_xl: 'false' 33 | message_if_xl: | 34 | This PR exceeds the recommended size of 1000 lines. 35 | Please make sure you are NOT addressing multiple issues with one PR. 36 | Note this PR might be rejected due to its size. 37 | files_to_ignore: '' 38 | # - name: "Check for PR body length" 39 | # shell: bash 40 | # env: 41 | # PR_BODY: ${{ github.event.pull_request.body }} 42 | # run: | 43 | # if [ ${#PR_BODY} -lt 80 ]; then 44 | # echo "::error title=PR body is too short::Your PR is probably isn't descriptive enough.\nYou should give a description that highlights both what you're doing it and *why* you're doing it. Someone reading the PR description without clicking any issue links should be able to roughly understand what's going on." 45 | # exit 1 46 | # fi 47 | - uses: amannn/action-semantic-pull-request@v5 48 | env: 49 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 50 | with: 51 | disallowScopes: | 52 | release -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pytest-evals" 3 | version = "0.0.0-dev" 4 | description = "A pytest plugin for running and analyzing LLM evaluation tests" 5 | authors = [ 6 | { name = "Almog Baku", email = "almog.baku@gmail.com" }, 7 | ] 8 | dependencies = [ 9 | "pytest>=7.0.0", 10 | "pytest-harvest>=1.0.0", 11 | "cloudpickle>=2.0.0", 12 | ] 13 | requires-python = ">=3.9" 14 | readme = "README.md" 15 | license = "MIT" 16 | keywords = ["pytest", "evaluations", "llm", "eval", "openai", "anthropic", "gpt", "pytest-evals"] 17 | 18 | [project.urls] 19 | Homepage = "https://github.com/AlmogBaku/pytest-evals" 20 | Repository = "https://github.com/AlmogBaku/pytest-evals" 21 | Issues = "https://github.com/AlmogBaku/pytest-evals/issues" 22 | 23 | [build-system] 24 | requires = ["hatchling"] 25 | build-backend = "hatchling.build" 26 | 27 | [project.entry-points.pytest11] 28 | evals = "pytest_evals.plugin" 29 | 30 | [tool.hatch.build.targets.wheel] 31 | packages = ["src/pytest_evals"] 32 | 33 | [tool.pytest.ini_options] 34 | addopts = "-ra -q --cov-report=term-missing" 35 | testpaths = ["tests"] 36 | 37 | filterwarnings = [ 38 | "ignore::pytest.PytestDeprecationWarning:pytest_harvest.*", 39 | ] 40 | 41 | [dependency-groups] 42 | dev = [ 43 | "ipytest>=0.14.2", 44 | "matplotlib>=3.9.4", 45 | "notebook>=7.3.2", 46 | "openai>=1.59.6", 47 | "pandas>=2.2.3", 48 | "pytest-cov>=6.0.0", 49 | "pytest-xdist>=3.6.1", 50 | "seaborn>=0.13.2", 51 | ] 52 | 53 | [tool.coverage.report] 54 | exclude_also = [ 55 | "def __repr__", 56 | "raise AssertionError", 57 | "raise NotImplementedError", 58 | "if __name__ == .__main__.:", 59 | "if TYPE_CHECKING:", 60 | "class .*\\bProtocol\\):", 61 | "@(abc\\.)?abstractmethod", 62 | "pragma: no cover", 63 | "import *", 64 | # 1. Exclude an except clause of a specific form: 65 | "except ValueError:\\n\\s*assume\\(False\\)", 66 | # 2. Comments to turn coverage on and off: 67 | "no cover: start(?s:.)*?no cover: stop", 68 | # 3. A pragma comment that excludes an entire file: 69 | "\\A(?s:.*# pragma: exclude file.*)\\Z", 70 | "@pytest\\.fixture.*", 71 | "@pytest\\.mark.*", 72 | ] 73 | 74 | omit = [ 75 | "tests/*", 76 | "/tmp/*", 77 | "/private/*", 78 | "*/temporary/*" 79 | ] 80 | 81 | [tool.pyright] 82 | venvPath = "." 83 | venv = ".venv" -------------------------------------------------------------------------------- /example/example_test.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import pytest 3 | 4 | # Simple test data 5 | TEST_DATA = [ 6 | {"text": "I need to debug this Python code", "label": True}, 7 | {"text": "The cat jumped over the lazy dog", "label": False}, 8 | {"text": "My monitor keeps flickering", "label": True}, 9 | ] 10 | 11 | 12 | @pytest.fixture 13 | def classifier(): 14 | def _classify(text: str) -> bool: 15 | """Simple LLM agent that classifies text as computer-related or not.""" 16 | resp = openai.chat.completions.create( 17 | model="gpt-4o-mini", 18 | messages=[ 19 | { 20 | "role": "system", 21 | "content": "Is this text about a computer-related subject?" 22 | "Reply ONLY with either true or false.", 23 | }, 24 | {"role": "user", "content": text}, 25 | ], 26 | ) 27 | return resp.choices[0].message.content.lower() == "true" # type: ignore 28 | 29 | return _classify 30 | 31 | 32 | @pytest.mark.eval(name="computer_classifier") 33 | @pytest.mark.parametrize("case", TEST_DATA) 34 | def test_classifier(case: dict, eval_bag, classifier) -> None: 35 | # Store input and prediction for analysis 36 | eval_bag.input_text = case["text"] 37 | eval_bag.label = case["label"] 38 | eval_bag.prediction = classifier(case["text"]) 39 | 40 | # Basic assertion 41 | assert eval_bag.prediction == eval_bag.label 42 | 43 | 44 | @pytest.mark.eval_analysis(name="computer_classifier") 45 | def test_analysis(eval_results): 46 | # Calculate true positives, false positives, and false negatives 47 | true_positives = sum( 48 | 1 for r in eval_results if r.result.prediction and r.result.label 49 | ) 50 | false_positives = sum( 51 | 1 for r in eval_results if r.result.prediction and not r.result.label 52 | ) 53 | false_negatives = sum( 54 | 1 for r in eval_results if not r.result.prediction and r.result.label 55 | ) 56 | 57 | total_predictions = len(eval_results) 58 | correct_predictions = sum( 59 | 1 for r in eval_results if r.result.prediction == r.result.label 60 | ) 61 | 62 | # Calculate metrics 63 | accuracy = correct_predictions / total_predictions if total_predictions else 0 64 | precision = ( 65 | true_positives / (true_positives + false_positives) 66 | if (true_positives + false_positives) 67 | else 0 68 | ) 69 | recall = ( 70 | true_positives / (true_positives + false_negatives) 71 | if (true_positives + false_negatives) 72 | else 0 73 | ) 74 | f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0 75 | 76 | print(f"Accuracy: {accuracy:.2%}") 77 | print(f"Precision: {precision:.2%}") 78 | print(f"Recall: {recall:.2%}") 79 | print(f"F1: {f1:.2%}") 80 | 81 | assert f1 >= 0.7 82 | -------------------------------------------------------------------------------- /src/pytest_evals/ipython_extension.py: -------------------------------------------------------------------------------- 1 | # pragma: exclude file 2 | import shlex 3 | from pathlib import Path 4 | 5 | try: 6 | from IPython.core.magic import Magics, magics_class, cell_magic # type: ignore 7 | except ImportError: 8 | 9 | def magics_class(cls): 10 | pass 11 | 12 | class Magics: 13 | def __init__(self, shell): 14 | pass 15 | 16 | def cell_magic(func): 17 | pass 18 | 19 | 20 | @magics_class 21 | class EvalsMagics(Magics): 22 | def __init__(self, shell): 23 | super().__init__(shell) 24 | try: 25 | import ipytest 26 | 27 | ipytest.autoconfig( 28 | run_in_thread=True, # pyright: ignore [reportArgumentType] 29 | addopts=[ # pyright: ignore [reportArgumentType] 30 | "--assert=plain", 31 | "-s", # Don't capture output 32 | "--log-cli-level=ERROR", 33 | ], 34 | ) 35 | except ImportError: 36 | raise ImportError( 37 | "⚠️ `ipytest` is required to use `pytest-evals` in notebooks.\n" 38 | " ↳ Please install it with: `pip install ipytest`" 39 | ) 40 | 41 | # noinspection PyProtectedMember 42 | @staticmethod 43 | def cleanup_ipytest_env(): 44 | import ipytest 45 | 46 | if getattr(ipytest._impl.random_module_path, "_filename", None): 47 | if Path(ipytest._impl.random_module_path._filename).exists(): # pyright: ignore [reportFunctionMemberAccess] 48 | try: 49 | Path(ipytest._impl.random_module_path._filename).unlink() # pyright: ignore [reportFunctionMemberAccess] 50 | del ipytest._impl.random_module_path._filename # pyright: ignore [reportFunctionMemberAccess] 51 | except Exception: 52 | pass 53 | ipytest.clean() 54 | 55 | @cell_magic 56 | def ipytest_evals(self, line, cell): 57 | """ 58 | Execute pytest evaluations in the current IPython cell. 59 | 60 | Usage: 61 | %%pytest_evals [optional arguments] 62 | def test_something(): 63 | assert True 64 | """ 65 | # Force reload to ensure fresh test environment 66 | from pytest_harvest import FIXTURE_STORE 67 | from IPython.core.getipython import get_ipython 68 | import ipytest 69 | 70 | FIXTURE_STORE.clear() 71 | 72 | run_args = shlex.split(line) 73 | 74 | if "--run-eval" not in run_args and "--run-eval-analysis" not in run_args: 75 | run_args.append("--run-eval") 76 | run_args.append("--run-eval-analysis") 77 | 78 | if "-n" in run_args: 79 | raise ValueError( 80 | "The `-n` flag is not supported with `ipytest` (in notebooks)." 81 | ) 82 | 83 | self.cleanup_ipytest_env() 84 | 85 | try: 86 | get_ipython().run_cell(cell) # pyright: ignore [reportOptionalMemberAccess] 87 | 88 | except TypeError as e: 89 | if "raw_cell" in str(e): 90 | raise RuntimeError( 91 | "The ipytest magic cannot evaluate the cell. Most likely you " 92 | "are running a modified ipython version. Consider using " 93 | "`ipytest.run` and `ipytest.clean` directly.", 94 | ) from e 95 | 96 | raise e 97 | 98 | try: 99 | ipytest.run(*run_args) 100 | except KeyboardInterrupt: 101 | self.cleanup_ipytest_env() 102 | raise 103 | 104 | 105 | def load_ipython_extension(ipython): 106 | """ 107 | Register the magic when the extension loads. 108 | """ 109 | ipython.register_magics(EvalsMagics) 110 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPi 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | paths: 8 | - '**.py' 9 | workflow_dispatch: 10 | inputs: 11 | skip_on_empty_changelog: 12 | description: "Skip the release if the changelog is empty" 13 | required: false 14 | default: false 15 | type: boolean 16 | 17 | permissions: 18 | contents: write 19 | issues: read 20 | checks: write 21 | pull-requests: write 22 | 23 | jobs: 24 | tests: 25 | uses: ./.github/workflows/test.yaml 26 | version: 27 | runs-on: ubuntu-latest 28 | outputs: 29 | version: ${{ steps.changelog.outputs.version }} 30 | tag: ${{ steps.changelog.outputs.tag }} 31 | changelog: ${{ steps.changelog.outputs.changelog }} 32 | clean_changelog: ${{ steps.changelog.outputs.clean_changelog }} 33 | skipped: ${{ steps.changelog.outputs.skipped }} 34 | steps: 35 | - name: Checkout repository 36 | uses: actions/checkout@v4 37 | with: 38 | fetch-depth: 0 39 | - name: Conventional Changelog Action 40 | id: changelog 41 | uses: TriPSs/conventional-changelog-action@v6 42 | with: 43 | release-count: '1' 44 | output-file: "false" 45 | skip-version-file: 'true' 46 | skip-commit: 'true' 47 | skip-git-pull: 'true' 48 | git-push: 'false' 49 | skip-on-empty: ${{ github.event_name == 'workflow_dispatch' && inputs.skip_on_empty_changelog || false }} 50 | fallback-version: '0.1.0' 51 | release: 52 | name: "Release and publish the version" 53 | needs: [ tests, version ] 54 | if: ${{ needs.version.outputs.skipped != 'true' }} 55 | runs-on: ubuntu-latest 56 | environment: 57 | name: pypi 58 | url: https://pypi.org/p/pytest-evals 59 | permissions: 60 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing 61 | contents: write 62 | steps: 63 | - name: Checkout repository 64 | uses: actions/checkout@v4 65 | - name: Install uv 66 | uses: astral-sh/setup-uv@v5 67 | with: 68 | enable-cache: true 69 | - name: "Set up Python" 70 | uses: actions/setup-python@v5 71 | with: 72 | python-version-file: ".python-version" 73 | - name: Set version 74 | env: 75 | BUILD_VERSION: "${{ needs.version.outputs.version }}" 76 | run: sed -i "s/version = \"0.0.0-dev\"/version = \"${BUILD_VERSION}\"/g" pyproject.toml 77 | - name: Update changelog 78 | shell: bash 79 | run: | 80 | touch CHANGELOG.md 81 | echo -e "${{ needs.version.outputs.changelog }}\n\n$(cat CHANGELOG.md)" > CHANGELOG.md 82 | - name: Build 83 | run: uv build 84 | - name: Publish 85 | run: uv publish 86 | - name: Update changelog 87 | shell: bash 88 | run: | 89 | git config user.name github-actions 90 | git config user.email github-actions@github.com 91 | 92 | git add CHANGELOG.md 93 | git commit -m "chore(release): ${{ needs.version.outputs.version }}" CHANGELOG.md 94 | git push 95 | - name: Tag 96 | uses: actions/github-script@v7 97 | with: 98 | script: | 99 | github.rest.git.createRef({ 100 | owner: context.repo.owner, 101 | repo: context.repo.repo, 102 | ref: 'refs/tags/${{ needs.version.outputs.tag }}', 103 | sha: context.sha 104 | }) 105 | - name: Release on GitHub 106 | uses: softprops/action-gh-release@v1 107 | with: 108 | tag_name: ${{ needs.version.outputs.tag }} 109 | files: dist/* 110 | body: | 111 | Released to https://pypi.org/project/pytest-evals/${{ needs.version.outputs.version }}/ 112 | --- 113 | ${{ needs.version.outputs.clean_changelog }} 114 | prerelease: ${{ inputs.prerelease }} 115 | name: Version ${{ needs.version.outputs.version }} 116 | generate_release_notes: false -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [0.3.4](https://github.com/AlmogBaku/pytest-evals/compare/v0.3.3...v0.3.4) (2025-02-02) 2 | 3 | 4 | ### Bug Fixes 5 | 6 | * minor bug with model ([ea01570](https://github.com/AlmogBaku/pytest-evals/commit/ea015706e6ba7f9e3e9761f6c289417f34dfd217)) 7 | 8 | 9 | 10 | 11 | 12 | ## [0.3.3](https://github.com/AlmogBaku/pytest-evals/compare/v0.3.2...v0.3.3) (2025-01-29) 13 | 14 | 15 | ### Bug Fixes 16 | 17 | * minor bug ([8bfa9ae](https://github.com/AlmogBaku/pytest-evals/commit/8bfa9aed673e2133949d24333d4ad49635c2596b)) 18 | 19 | 20 | 21 | 22 | 23 | ## [0.3.2](https://github.com/AlmogBaku/pytest-evals/compare/v0.3.1...v0.3.2) (2025-01-28) 24 | 25 | 26 | ### Bug Fixes 27 | 28 | * rare notebook race condition ([b56a3a7](https://github.com/AlmogBaku/pytest-evals/commit/b56a3a73758bd27428f5959f6a296c622b4ae5e6)) 29 | 30 | 31 | 32 | 33 | 34 | ## [0.3.1](https://github.com/AlmogBaku/pytest-evals/compare/v0.3.0...v0.3.1) (2025-01-27) 35 | 36 | 37 | ### Bug Fixes 38 | 39 | * ipytest bugs recovery ([0e47bfd](https://github.com/AlmogBaku/pytest-evals/commit/0e47bfd5716f660b8061e546bca4ef90ee544c3d)) 40 | * json encoder bu ([5df0582](https://github.com/AlmogBaku/pytest-evals/commit/5df05822b53d6218c5fbac077fa6b3042375c6e4)) 41 | 42 | 43 | 44 | 45 | 46 | # [0.3.0](https://github.com/AlmogBaku/pytest-evals/compare/v0.2.7...v0.3.0) (2025-01-21) 47 | 48 | 49 | ### Bug Fixes 50 | 51 | * more tests for encoding pd ([281d3f2](https://github.com/AlmogBaku/pytest-evals/commit/281d3f2a0989698136989fdc4b3a6095ed039d8b)) 52 | 53 | 54 | ### Features 55 | 56 | * save eval cases as csv ([23e5c6c](https://github.com/AlmogBaku/pytest-evals/commit/23e5c6c7cce8b1a4214a1ca5f8969c7da9d06005)) 57 | 58 | 59 | 60 | 61 | 62 | ## [0.2.7](https://github.com/AlmogBaku/pytest-evals/compare/v0.2.6...v0.2.7) (2025-01-21) 63 | 64 | 65 | ### Bug Fixes 66 | 67 | * allow splitting tests in notebooks ([ba4b450](https://github.com/AlmogBaku/pytest-evals/commit/ba4b450ded4a7123ceb943bb6876d5d82d3454a3)) 68 | 69 | 70 | 71 | 72 | 73 | ## [0.2.6](https://github.com/AlmogBaku/pytest-evals/compare/v0.2.5...v0.2.6) (2025-01-20) 74 | 75 | 76 | ### Bug Fixes 77 | 78 | * json encoding of some objects ([46d017e](https://github.com/AlmogBaku/pytest-evals/commit/46d017ef1aa2c1b3fbca3e4fdddb5911b1268bf7)) 79 | * use EvalBag type ([5537e4d](https://github.com/AlmogBaku/pytest-evals/commit/5537e4d0a4f92b130013b596c2f19b3796265bbe)) 80 | 81 | 82 | 83 | 84 | 85 | ## [0.2.5](https://github.com/AlmogBaku/pytest-evals/compare/v0.2.4...v0.2.5) (2025-01-16) 86 | 87 | 88 | ### Bug Fixes 89 | 90 | * add comment ([faa48a4](https://github.com/AlmogBaku/pytest-evals/commit/faa48a4fbd5affe1fb21c13461a2330632dc969a)) 91 | 92 | 93 | 94 | 95 | 96 | ## [0.2.4](https://github.com/AlmogBaku/pytest-evals/compare/v0.2.3...v0.2.4) (2025-01-15) 97 | 98 | 99 | ### Bug Fixes 100 | 101 | * better import and error ([4b981a8](https://github.com/AlmogBaku/pytest-evals/commit/4b981a8654f429b09292426986697feb8eeed72a)) 102 | 103 | 104 | 105 | 106 | 107 | ## [0.2.3](https://github.com/AlmogBaku/pytest-evals/compare/v0.2.2...v0.2.3) (2025-01-14) 108 | 109 | 110 | ### Bug Fixes 111 | 112 | * add custom ipython extension to make it work smoothly in notebooks :) ([9e5c897](https://github.com/AlmogBaku/pytest-evals/commit/9e5c897a47971e36ca9b1c41c89674301de995fe)) 113 | * better tests + coverage ([f87d99f](https://github.com/AlmogBaku/pytest-evals/commit/f87d99f7a50a2630a421a39e3e9927b65a75a2e4)) 114 | 115 | 116 | 117 | 118 | 119 | ## [0.2.2](https://github.com/AlmogBaku/pytest-evals/compare/v0.2.1...v0.2.2) (2025-01-14) 120 | 121 | 122 | ### Bug Fixes 123 | 124 | * add tests, and fix bugs :) ([7d29898](https://github.com/AlmogBaku/pytest-evals/commit/7d2989838a8f0010f4f97c58e9ad3b0f5735c1fc)) 125 | 126 | 127 | 128 | 129 | 130 | ## [0.2.1](https://github.com/AlmogBaku/pytest-evals/compare/v0.2.0...v0.2.1) (2025-01-13) 131 | 132 | 133 | ### Bug Fixes 134 | 135 | * pypi links and metadata ([61adaaa](https://github.com/AlmogBaku/pytest-evals/commit/61adaaaeb8487a68609374f7cc9a77b16d9727e6)) 136 | 137 | 138 | 139 | 140 | 141 | # [0.2.0](https://github.com/AlmogBaku/pytest-evals/compare/v0.1.0...v0.2.0) (2025-01-13) 142 | 143 | 144 | ### Features 145 | 146 | * readme in pypi ([ab87991](https://github.com/AlmogBaku/pytest-evals/commit/ab8799158c256daeb47c4f7e7e3f26471b926dab)) 147 | 148 | 149 | 150 | 151 | 152 | # 0.1.0 (2025-01-13) 153 | -------------------------------------------------------------------------------- /example/example_judge_test.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import pytest 3 | 4 | # Simple test data 5 | TEST_DATA = [ 6 | { 7 | "text": "I am experiencing a frustrating issue with my Python code where the variables keep returning undefined values and the loops aren't terminating properly. I need to debug this to find the root cause.", 8 | "label": "debugging Python code with undefined variables and non-terminating loops", 9 | }, 10 | { 11 | "text": "In a display of remarkable agility, the swift orange cat gracefully propelled itself over the sleeping brown dog, who remained completely undisturbed by this acrobatic feat.", 12 | "label": "agile orange cat jumping over a sleeping brown dog", 13 | }, 14 | { 15 | "text": "The LCD display on my desktop computer has been exhibiting concerning behavior lately - the screen keeps flickering intermittently and displaying random artifacts, making it very difficult to work.", 16 | "label": "LCD monitor displaying intermittent flickering and artifacts", 17 | }, 18 | ] 19 | 20 | 21 | @pytest.fixture 22 | def summarizer(): 23 | def _summarize(text: str) -> str: 24 | """Simple LLM agent that summarizes text""" 25 | res = openai.chat.completions.create( 26 | model="gpt-4o-mini", 27 | messages=[ 28 | { 29 | "role": "system", 30 | "content": "Write a concise summary of the text.", 31 | }, 32 | {"role": "user", "content": text}, 33 | ], 34 | ) 35 | return res.choices[0].message.content # type: ignore 36 | 37 | return _summarize 38 | 39 | 40 | @pytest.fixture 41 | def judge(): 42 | def _judge(text, summary, main_subject) -> bool: 43 | """LLM as a judge that determines if the summary is about the main subject""" 44 | resp = openai.chat.completions.create( 45 | model="gpt-4o", 46 | messages=[ 47 | { 48 | "role": "system", 49 | "content": "Decide whether the summary is about the main subject. " 50 | "Reply ONLY with either true or false.", 51 | }, 52 | { 53 | "role": "user", 54 | "content": f"Original Text: {text}\nSummary: {summary}\nMain Subject: {main_subject}", 55 | }, 56 | ], 57 | ) 58 | return resp.choices[0].message.content.lower() == "true" # type: ignore 59 | 60 | return _judge 61 | 62 | 63 | @pytest.mark.eval(name="summary") 64 | @pytest.mark.parametrize("case", TEST_DATA) 65 | def test_classifier(case: dict, eval_bag, summarizer, judge) -> None: 66 | # Store input and prediction for analysis 67 | eval_bag.input_text = case["text"] 68 | eval_bag.label = case["label"] # the label is the main subject of the text 69 | eval_bag.prediction = summarizer(case["text"]) 70 | eval_bag.judgement = judge(eval_bag.input_text, eval_bag.prediction, eval_bag.label) 71 | 72 | # Basic assertion 73 | assert eval_bag.judgement # Assert that the summary is about the main subject 74 | 75 | 76 | @pytest.mark.eval_analysis(name="summary") 77 | def test_analysis(eval_results): 78 | # Calculate various metrics 79 | total_samples = len(eval_results) 80 | 81 | # Subject relevance (based on judge's assessment) 82 | relevant_summaries = sum(1 for r in eval_results if r.result.judgement) 83 | subject_accuracy = relevant_summaries / total_samples if total_samples else 0 84 | 85 | # Length analysis 86 | avg_summary_length = ( 87 | sum(len(r.result.prediction.split()) for r in eval_results) / total_samples 88 | if total_samples 89 | else 0 90 | ) 91 | avg_input_length = ( 92 | sum(len(r.result.input_text.split()) for r in eval_results) / total_samples 93 | if total_samples 94 | else 0 95 | ) 96 | compression_ratio = avg_summary_length / avg_input_length if avg_input_length else 0 97 | 98 | # Print metrics 99 | print(f"Subject Accuracy: {subject_accuracy:.2%}") 100 | print(f"Average Summary Length: {avg_summary_length:.1f} words") 101 | print(f"Average Input Length: {avg_input_length:.1f} words") 102 | print(f"Compression Ratio: {compression_ratio:.2f}") 103 | 104 | # Basic quality assertions 105 | assert subject_accuracy >= 0.7, "Subject accuracy below threshold" 106 | assert 0.2 <= compression_ratio <= 0.8, "Compression ratio outside acceptable range" 107 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to pytest-evals 2 | 3 | Thank you for considering contributing to `pytest-evals`! 🎉 4 | 5 | Whether you're reporting bugs, improving docs, or suggesting features - every contribution matters and helps make 6 | testing better for the Python community. No contribution is too small, and we're excited to help you get started! 7 | 8 | ## Show Us How You Use It! 9 | 10 | Share your experiences! Whether it's evaluation patterns, example notebooks, or testing approaches - your real-world 11 | usage helps others get started. Even a simple write-up of how you use pytest-evals makes a difference! 🚀 12 | 13 | ## Prerequisites 14 | 15 | - Python 3.9 or higher ([python.org/downloads](https://www.python.org/downloads/)) 16 | - [uv](https://github.com/astral/uv) for Python package and environment management 17 | - [pre-commit](https://pre-commit.com/) for git hooks management 18 | 19 | ## Development Setup 20 | 21 | 1. Clone your fork: 22 | ```bash 23 | git clone git@github.com:AlmogBaku/pytest-evals.git 24 | ``` 25 | 26 | 2. Set up development environment: 27 | ```bash 28 | # Install all dependencies including dev extras 29 | uv sync --all-extras --dev 30 | 31 | # Install pre-commit hooks 32 | pre-commit install 33 | ``` 34 | 35 | ## Before Submitting a PR 36 | 37 | 1. Run pre-commit hooks: 38 | ```bash 39 | pre-commit run --all-files 40 | ``` 41 | 42 | 2. Run tests with coverage: 43 | ```bash 44 | coverage run --source=pytest_evals -m pytest 45 | coverage report 46 | ``` 47 | 48 | ## Testing Guidelines 49 | 50 | We value testing to keep pytest-evals reliable and maintainable. When adding new features or fixing bugs: 51 | 52 | - Include tests that cover the new functionality or reproduce the bug 53 | - Aim for clear, readable test cases that help document behavior 54 | - Consider edge cases and error conditions 55 | - Use the existing test suite as a guide for style and structure 56 | 57 | If you need help with testing, feel free to ask in your PR - we're here to help! 58 | 59 | To run the test suite: 60 | 61 | ```bash 62 | # Run tests with coverage reporting 63 | coverage run --source=pytest_evals -m pytest 64 | coverage report 65 | ``` 66 | 67 | Remember: if you're adding new functionality, including tests helps everyone understand how your code works and ensures 68 | it keeps working as the project evolves. If you're stuck with testing, don't hesitate to ask for help in your PR - we're 69 | here to help! 70 | 71 | ## PR Process 72 | 73 | Individual commits should not be tagged separately, but will generally be assumed to match the PR. For instance, if you 74 | have a bugfix in with a breaking change, it's generally encouraged to submit the bugfix separately, but if you must put 75 | them in one PR, mark the commit separately. 76 | 77 | ### Commit Message Format 78 | 79 | We are using [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) to standardize our commit messages. 80 | This allows us to automatically generate changelogs and release notes, to create a more readable git history and to 81 | automatically trigger semantic versioning. 82 | 83 | Please make sure to follow the following format when writing commit messages and PR titles: 84 | 85 | ``` 86 | (): 87 | │ │ │ 88 | │ │ └─⫸ Summary in present tense 89 | │ │ 90 | │ └─⫸ [optional] Commit Scope: ipython, eval, analysis, etc. 91 | │ 92 | └─⫸ Commit Type: build|ci|docs|feat|fix|perf|refactor|test 93 | ``` 94 | 95 | We support the following types: 96 | 97 | | Type | Description | 98 | |------------|-----------------------------------------------------------------------| 99 | | `feat` | A new feature (correlates with `MINOR` in semantic versioning) | 100 | | `fix` | A bug fix | 101 | | `docs` | Documentation only changes | 102 | | `style` | Changes that do not affect code meaning (whitespace, formatting, etc) | 103 | | `refactor` | Code change that neither fixes a bug nor adds a feature | 104 | | `perf` | Code change that improves performance | 105 | | `test` | Adding or correcting tests | 106 | | `build` | Changes affecting build system or dependencies | 107 | | `ci` | Changes to CI configuration | 108 | | `chore` | Other changes that don't modify src or test files | 109 | 110 | Examples: 111 | 112 | ``` 113 | fix: correct metric calculation in eval_results 114 | feat(core): add support for parallel evaluation runs 115 | refactor!: change the evaluation API 116 | docs(readme): clarify usage instructions 117 | ``` 118 | 119 | ### Breaking changes 120 | 121 | Breaking changes should be marked with a `!` after the type/scope. This will trigger a `MAJOR` version bump when the 122 | commit is merged. For example: 123 | 124 | ``` 125 | refactor!: change the evaluation API 126 | ``` 127 | 128 | Breaking changes should be avoided if possible. When necessary, they must be properly documented in the PR description 129 | with: 130 | 131 | - What changed 132 | - Why it was necessary 133 | - Migration instructions for users 134 | 135 | ## Where the CI Tests are configured 136 | 137 | Check the [GitHub Actions workflows](.github/workflows) directory, particularly: 138 | 139 | - `test.yaml` for the main test suite 140 | - `publish.yaml` for the release process 141 | - `pr-triage.yaml` for PR automation 142 | 143 | ## Code of conduct 144 | 145 | Participation in the pytest-evals community is governed by 146 | the [Python Community Code of Conduct](https://www.python.org/psf/conduct/). -------------------------------------------------------------------------------- /tests/show_coverage.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a utility to format the coverage report in markdown format - useful for working with LLMs. 3 | 4 | To use: 5 | ```console 6 | pytest --junitxml=- --cov=./ --cov-report=xml | python tests/show_coverage.py 7 | coverage run --source=pytest_evals -m pytest tests/ && coverage xml && python tests/show_coverage.py 8 | ``` 9 | """ 10 | 11 | import xml.etree.ElementTree as ET 12 | from collections import defaultdict 13 | from dataclasses import dataclass 14 | from typing import List, Tuple 15 | 16 | 17 | @dataclass 18 | class CoverageCase: 19 | """Represents a continuous block of uncovered code""" 20 | 21 | start_line: int 22 | end_line: int 23 | code_lines: List[str] 24 | context: str = "" # Can be used to store function/class name 25 | 26 | 27 | class CoverageReport: 28 | def __init__(self, coverage_xml: str = "coverage.xml"): 29 | self.coverage_xml = coverage_xml 30 | self.files_with_uncovered = 0 31 | self.total_uncovered_lines = 0 32 | 33 | def _group_continuous_lines( 34 | self, lines: List[Tuple[int, str]] 35 | ) -> List[CoverageCase]: 36 | """Group continuous line numbers into cases""" 37 | if not lines: 38 | return [] 39 | 40 | cases = [] 41 | current_case = None 42 | 43 | for line_num, code in lines: 44 | if current_case is None: 45 | current_case = CoverageCase(line_num, line_num, [code]) 46 | elif line_num == current_case.end_line + 1: 47 | current_case.end_line = line_num 48 | current_case.code_lines.append(code) 49 | else: 50 | cases.append(current_case) 51 | current_case = CoverageCase(line_num, line_num, [code]) 52 | 53 | if current_case: 54 | cases.append(current_case) 55 | 56 | return cases 57 | 58 | def _detect_context(self, lines: List[str], start_line: int) -> str: 59 | """Try to detect the context (function/class) for a block of code""" 60 | # Look up to 5 lines before the uncovered block for context 61 | context_range = range(max(0, start_line - 5), start_line) 62 | for i in reversed(context_range): 63 | line = lines[i].strip() 64 | if line.startswith("def ") or line.startswith("class "): 65 | return line.split("(")[0].strip() 66 | return "" 67 | 68 | def format_markdown(self) -> str: 69 | """Format the coverage report in markdown with grouped cases""" 70 | try: 71 | root = ET.parse(self.coverage_xml).getroot() 72 | output = ["# Coverage Report\n"] 73 | 74 | files_report = defaultdict(list) 75 | 76 | for class_elem in root.findall(".//class"): 77 | filename = class_elem.attrib["filename"] 78 | 79 | try: 80 | with open(filename, "r") as f: 81 | file_lines = f.readlines() 82 | 83 | # Get uncovered lines with their code 84 | uncovered_lines = [ 85 | ( 86 | int(line.attrib["number"]), 87 | file_lines[int(line.attrib["number"]) - 1].rstrip(), 88 | ) 89 | for line in class_elem.findall('./lines/line[@hits="0"]') 90 | if file_lines[int(line.attrib["number"]) - 1].strip() 91 | ] 92 | 93 | if uncovered_lines: 94 | self.files_with_uncovered += 1 95 | self.total_uncovered_lines += len(uncovered_lines) 96 | 97 | # Group into cases 98 | cases = self._group_continuous_lines(uncovered_lines) 99 | 100 | # Add context to each case 101 | for case in cases: 102 | case.context = self._detect_context( 103 | file_lines, case.start_line 104 | ) 105 | 106 | files_report[filename].extend(cases) 107 | 108 | except FileNotFoundError: 109 | output.append(f"⚠️ Could not find source file: {filename}\n") 110 | 111 | # Format the report 112 | for filename, cases in files_report.items(): 113 | output.append(f"## {filename}\n") 114 | 115 | for i, case in enumerate(cases, 1): 116 | context = f" ({case.context})" if case.context else "" 117 | output.append(f"### Case {i}{context}\n") 118 | 119 | if case.start_line == case.end_line: 120 | output.append(f"Line {case.start_line}:\n") 121 | else: 122 | output.append(f"Lines {case.start_line}-{case.end_line}:\n") 123 | 124 | output.append("```python") 125 | for line_num, code in zip( 126 | range(case.start_line, case.end_line + 1), case.code_lines 127 | ): 128 | output.append(f"{line_num}: {code}") 129 | output.append("```\n") 130 | 131 | # Add summary 132 | output.extend( 133 | [ 134 | "## Summary\n", 135 | f"- Files with uncovered lines: {self.files_with_uncovered}", 136 | f"- Total uncovered lines: {self.total_uncovered_lines}", 137 | f"- Total cases: {sum(len(cases) for cases in files_report.values())}", 138 | ] 139 | ) 140 | 141 | return "\n".join(output) 142 | 143 | except FileNotFoundError: 144 | return "❌ Error: coverage.xml not found. Run coverage xml first." 145 | except Exception as e: 146 | return f"❌ Error: {str(e)}" 147 | 148 | 149 | def main(): 150 | report = CoverageReport() 151 | print(report.format_markdown()) 152 | 153 | 154 | if __name__ == "__main__": 155 | main() 156 | -------------------------------------------------------------------------------- /tests/test_json_encoder.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from dataclasses import dataclass 4 | from enum import Enum 5 | from unittest.mock import patch 6 | 7 | import pandas as pd 8 | from pydantic import BaseModel 9 | 10 | from pytest_evals.json_encoder import AdvancedJsonEncoder 11 | 12 | 13 | # Test structures 14 | @dataclass 15 | class Person: 16 | name: str 17 | age: int 18 | 19 | 20 | class Color(Enum): 21 | RED = "red" 22 | BLUE = "blue" 23 | 24 | 25 | class User(BaseModel): 26 | name: str 27 | age: int 28 | 29 | 30 | def test_advanced_json_encoder(): 31 | """Test all AdvancedJsonEncoder functionality""" 32 | # Setup test data 33 | person = Person(name="John", age=30) 34 | data = { 35 | "person": person, 36 | "color": Color.RED, 37 | "basic": {"num": 42, "list": [1, 2]}, 38 | } 39 | 40 | # Test encoding and decoding 41 | encoded = json.dumps(data, cls=AdvancedJsonEncoder) 42 | decoded = json.loads(encoded) 43 | 44 | # Verify results 45 | assert decoded["person"] == {"name": "John", "age": 30} 46 | assert decoded["color"] == "red" 47 | assert decoded["basic"] == {"num": 42, "list": [1, 2]} 48 | 49 | 50 | def test_pydantic_encoding(): 51 | """Test Pydantic model encoding""" 52 | user = User(name="John", age=30) 53 | encoded = json.dumps(user, cls=AdvancedJsonEncoder) 54 | assert json.loads(encoded) == {"name": "John", "age": 30} 55 | 56 | 57 | def test_function_encoding(): 58 | """Test error on unsupported type""" 59 | assert ( 60 | json.dumps(lambda x: x, cls=AdvancedJsonEncoder) 61 | == '">"' 62 | ) 63 | 64 | 65 | def test_dataframe_encoding(): 66 | """Test DataFrame encoding""" 67 | assert ( 68 | json.dumps(pd.DataFrame([{"field": "value"}]), cls=AdvancedJsonEncoder) 69 | == '[{"field": "value"}]' 70 | ) 71 | 72 | 73 | def test_series_encoding(): 74 | """Test Series encoding""" 75 | assert ( 76 | json.dumps(pd.Series([1, 2, 3]), cls=AdvancedJsonEncoder) 77 | == '{"0": 1, "1": 2, "2": 3}' 78 | ) 79 | 80 | 81 | def test_none_encoding(): 82 | """Test None type encoding""" 83 | data = {"null_value": None} 84 | encoded = json.dumps(data, cls=AdvancedJsonEncoder) 85 | assert json.loads(encoded) == {"null_value": None} 86 | 87 | 88 | def test_unsupported_type_fallback(): 89 | """Test fallback to default encoder for unsupported types""" 90 | 91 | class UnsupportedType: 92 | pass 93 | 94 | assert ".UnsupportedType object" in json.dumps( 95 | UnsupportedType(), cls=AdvancedJsonEncoder 96 | ) 97 | 98 | 99 | # Test for json_encoder.py ImportError case 100 | def test_pydantic_import_error(): 101 | with patch.dict(sys.modules, {"pydantic": None}): 102 | # Force reload of the module to trigger ImportError 103 | import importlib 104 | import pytest_evals.json_encoder 105 | 106 | importlib.reload(pytest_evals.json_encoder) 107 | 108 | assert not pytest_evals.json_encoder.HAVE_PYDANTIC 109 | assert pytest_evals.json_encoder.BaseModel is type(None) 110 | 111 | 112 | def test_pandas_import_error(): 113 | """Test the JSON encoder when pandas is not available""" 114 | with patch.dict(sys.modules, {"pandas": None}): 115 | # Force reload of the module to trigger ImportError 116 | import importlib 117 | import pytest_evals.json_encoder 118 | 119 | importlib.reload(pytest_evals.json_encoder) 120 | 121 | # Verify pandas-related flags and functions 122 | assert not pytest_evals.json_encoder.HAVE_PANDAS 123 | 124 | # Test is_series function 125 | class MockObject: 126 | pass 127 | 128 | mock_obj = MockObject() 129 | assert not pytest_evals.json_encoder.is_series(mock_obj) 130 | 131 | # Test is_dataframe function 132 | assert not pytest_evals.json_encoder.is_dataframe(mock_obj) 133 | 134 | 135 | def test_none_type_variations(): 136 | """Test different scenarios involving None type""" 137 | # Test None in different contexts 138 | test_cases = [ 139 | {"direct_none": None}, 140 | {"nested_none": {"key": None}}, 141 | {"none_in_list": [1, None, 3]}, 142 | {"multiple_nones": [None, None]}, 143 | None, 144 | ] 145 | 146 | for case in test_cases: 147 | encoded = json.dumps(case, cls=AdvancedJsonEncoder) 148 | decoded = json.loads(encoded) 149 | assert decoded == case 150 | 151 | 152 | def test_mixed_none_with_other_types(): 153 | """Test None combined with other supported types""" 154 | 155 | @dataclass 156 | class DataWithNone: 157 | value: None 158 | name: str 159 | 160 | data = DataWithNone(value=None, name="test") 161 | encoded = json.dumps(data, cls=AdvancedJsonEncoder) 162 | decoded = json.loads(encoded) 163 | 164 | assert decoded == {"value": None, "name": "test"} 165 | 166 | # Test with enum 167 | class StatusEnum(Enum): 168 | NONE = None 169 | ACTIVE = "active" 170 | 171 | data = {"status": StatusEnum.NONE} 172 | encoded = json.dumps(data, cls=AdvancedJsonEncoder) 173 | decoded = json.loads(encoded) 174 | 175 | assert decoded == {"status": None} 176 | 177 | 178 | def test_explicit_none_handling(): 179 | """Test the explicit None handling in the default method of AdvancedJsonEncoder""" 180 | 181 | class CustomNone: 182 | """A custom class that returns None from its default encoding""" 183 | 184 | def __repr__(self): 185 | return "None" 186 | 187 | # Create an instance and encode it directly to trigger the default method 188 | encoder = AdvancedJsonEncoder() 189 | result = encoder.default( 190 | type(None)() 191 | ) # This explicitly calls default() with None type 192 | 193 | assert result is None 194 | 195 | # Test in context 196 | data = {"null_value": type(None)()} 197 | encoded = json.dumps(data, cls=AdvancedJsonEncoder) 198 | decoded = json.loads(encoded) 199 | 200 | assert decoded == {"null_value": None} 201 | 202 | 203 | def test_callable_encoding_edge_cases(): 204 | """Test various edge cases in callable encoding""" 205 | 206 | def simple_callable(): 207 | pass 208 | 209 | encoded = json.dumps(simple_callable, cls=AdvancedJsonEncoder) 210 | assert '""' == encoded 211 | 212 | # Test case for when o.__module__ exists but o.__name__ raises an exception 213 | class ComplexCallable: 214 | def __call__(self, *args, **kwargs): 215 | pass 216 | 217 | complex_callable = ComplexCallable() 218 | encoded = json.dumps(complex_callable, cls=AdvancedJsonEncoder) 219 | assert '""' == encoded 220 | -------------------------------------------------------------------------------- /example/example_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "367182cdbb25b3e5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Computer Topic Classifier Evaluation\n", 9 | "\n", 10 | "This notebook demonstrates how to evaluate a simple LLM-based classifier that determines whether a piece of text is about computer-related topics or not. We use `pytest-evals` to run our evaluation and analyze the results.\n", 11 | "\n", 12 | "## Setup\n", 13 | "First, we'll load the required extensions and import necessary libraries." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "id": "ca67ed74e03caa26", 19 | "metadata": { 20 | "ExecuteTime": { 21 | "end_time": "2025-01-21T08:33:23.260489Z", 22 | "start_time": "2025-01-21T08:33:23.177044Z" 23 | } 24 | }, 25 | "source": [ 26 | "%load_ext pytest_evals" 27 | ], 28 | "outputs": [], 29 | "execution_count": 1 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "aa0e586aef4e19e0", 34 | "metadata": {}, 35 | "source": [ 36 | "## Classifier Implementation\n", 37 | "\n", 38 | "Below is our classifier implementation that uses GPT-4 to determine if text is computer-related. The classifier returns a boolean value:\n", 39 | "- `True`: Text is computer-related\n", 40 | "- `False`: Text is not computer-related" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "id": "c957edc789a1fda2", 46 | "metadata": { 47 | "ExecuteTime": { 48 | "end_time": "2025-01-21T08:33:24.345743Z", 49 | "start_time": "2025-01-21T08:33:24.129214Z" 50 | } 51 | }, 52 | "source": [ 53 | "import openai\n", 54 | "\n", 55 | "\n", 56 | "def classify(text: str) -> bool:\n", 57 | " \"\"\"Classify text as computer-related or not using GPT-4.\n", 58 | "\n", 59 | " Args:\n", 60 | " text (str): The input text to classify\n", 61 | "\n", 62 | " Returns:\n", 63 | " bool: True if the text is computer-related, False otherwise\n", 64 | " \"\"\"\n", 65 | " resp = openai.chat.completions.create(\n", 66 | " model=\"gpt-4o-mini\",\n", 67 | " messages=[\n", 68 | " {\n", 69 | " \"role\": \"system\",\n", 70 | " \"content\": \"Is this text about a computer-related subject? \"\n", 71 | " \"Reply ONLY with either true or false.\",\n", 72 | " },\n", 73 | " {\"role\": \"user\", \"content\": text},\n", 74 | " ],\n", 75 | " )\n", 76 | " return resp.choices[0].message.content.lower() == \"true\"" 77 | ], 78 | "outputs": [], 79 | "execution_count": 2 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "id": "132870eda457b817", 84 | "metadata": {}, 85 | "source": [ 86 | "## Test Data\n", 87 | "\n", 88 | "We define a set of test cases to evaluate our classifier. Each test case contains:\n", 89 | "- `text`: The input text to classify\n", 90 | "- `label`: The expected classification (True for computer-related, False otherwise)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "id": "f071596bb3a7d1d5", 96 | "metadata": { 97 | "ExecuteTime": { 98 | "end_time": "2025-01-21T08:33:25.289251Z", 99 | "start_time": "2025-01-21T08:33:25.286883Z" 100 | } 101 | }, 102 | "source": [ 103 | "TEST_DATA = [\n", 104 | " {\"text\": \"I need to debug this Python code\", \"label\": True},\n", 105 | " {\"text\": \"The cat jumped over the lazy dog\", \"label\": False},\n", 106 | " {\"text\": \"My monitor keeps flickering\", \"label\": True},\n", 107 | "]" 108 | ], 109 | "outputs": [], 110 | "execution_count": 3 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "id": "3f5ff914f7ba5295", 115 | "metadata": {}, 116 | "source": [ 117 | "## Evaluation Tests\n", 118 | "\n", 119 | "We use pytest-evals to:\n", 120 | "1. Run individual test cases and collect results\n", 121 | "2. Analyze the overall performance of our classifier\n", 122 | "\n", 123 | "The evaluation requires:\n", 124 | "- Accuracy >= 70%\n", 125 | "- All test cases must match their expected labels" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "id": "4caa983f934e9d8f", 131 | "metadata": { 132 | "ExecuteTime": { 133 | "end_time": "2025-01-21T08:33:30.840853Z", 134 | "start_time": "2025-01-21T08:33:29.078515Z" 135 | } 136 | }, 137 | "source": [ 138 | "%%ipytest_evals\n", 139 | "import pytest\n", 140 | "\n", 141 | "@pytest.mark.eval(name=\"computer_classifier\")\n", 142 | "@pytest.mark.parametrize(\"case\", TEST_DATA)\n", 143 | "def test_classifier(case: dict, eval_bag):\n", 144 | " \"\"\"Test individual classification cases.\n", 145 | " \n", 146 | " Args:\n", 147 | " case (dict): Test case containing text and expected label\n", 148 | " eval_bag: Container for test results\n", 149 | " \"\"\"\n", 150 | " # Store inputs and results in eval_bag for analysis\n", 151 | " eval_bag.input_text = case[\"text\"]\n", 152 | " eval_bag.label = case[\"label\"]\n", 153 | " eval_bag.prediction = classify(case[\"text\"])\n", 154 | "\n", 155 | " # Log results for visibility\n", 156 | " print(f\"Input: {eval_bag.input_text}\")\n", 157 | " print(f\"Prediction: {eval_bag.prediction}\")\n", 158 | "\n", 159 | " assert eval_bag.prediction == eval_bag.label\n", 160 | "\n", 161 | "\n", 162 | "@pytest.mark.eval_analysis(name=\"computer_classifier\")\n", 163 | "def test_analysis(eval_results):\n", 164 | " \"\"\"Analyze overall classifier performance.\n", 165 | " \n", 166 | " Args:\n", 167 | " eval_results: Collection of all test results\n", 168 | " \"\"\"\n", 169 | " total = len(eval_results)\n", 170 | " correct = sum(1 for r in eval_results if r.result.prediction == r.result.label)\n", 171 | " accuracy = correct / total\n", 172 | "\n", 173 | " print(f\"Accuracy: {accuracy:.2%}\")\n", 174 | " assert accuracy >= 0.7 # Require at least 70% accuracy" 175 | ], 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "\n", 182 | "t_fe596c0d68894784969f18775cec634a.py::test_classifier[case0] Input: I need to debug this Python code\n", 183 | "Prediction: True\n", 184 | "\u001B[32mPASSED\u001B[0m\n", 185 | "t_fe596c0d68894784969f18775cec634a.py::test_classifier[case1] Input: The cat jumped over the lazy dog\n", 186 | "Prediction: False\n", 187 | "\u001B[32mPASSED\u001B[0m\n", 188 | "t_fe596c0d68894784969f18775cec634a.py::test_classifier[case2] Input: My monitor keeps flickering\n", 189 | "Prediction: True\n", 190 | "\u001B[32mPASSED\u001B[0m\n", 191 | "t_fe596c0d68894784969f18775cec634a.py::test_analysis Accuracy: 100.00%\n", 192 | "\u001B[32mPASSED\u001B[0m\n", 193 | "\n", 194 | "\u001B[32m======================================== \u001B[32m\u001B[1m4 passed\u001B[0m\u001B[32m in 1.64s\u001B[0m\u001B[32m =========================================\u001B[0m\n" 195 | ] 196 | } 197 | ], 198 | "execution_count": 4 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 2 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython2", 217 | "version": "2.7.6" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 5 222 | } 223 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # `pytest-evals` 🚀 4 | 5 | Test your LLM outputs against examples - no more manual checking! A (minimalistic) pytest plugin that helps you to 6 | evaluate that your LLM is giving good answers. 7 | 8 | [![PyPI version](https://img.shields.io/pypi/v/pytest-evals.svg)](https://pypi.org/p/pytest-evals) 9 | [![License](https://img.shields.io/github/license/AlmogBaku/pytest-evals.svg)](https://github.com/AlmogBaku/pytest-evals/blob/main/LICENSE) 10 | [![Issues](https://img.shields.io/github/issues/AlmogBaku/pytest-evals.svg)](https://github.com/AlmogBaku/pytest-evals/issues) 11 | [![Stars](https://img.shields.io/github/stars/AlmogBaku/pytest-evals.svg)](https://github.com/AlmogBaku/pytest-evals/stargazers) 12 | 13 | # 🧐 Why pytest-evals? 14 | 15 | Building LLM applications is exciting, but how do you know they're actually working well? `pytest-evals` helps you: 16 | 17 | - 🎯 **Test & Evaluate:** Run your LLM prompt against many cases 18 | - 📈 **Track & Measure:** Collect metrics and analyze the overall performance 19 | - 🔄 **Integrate Easily:** Works with pytest, Jupyter notebooks, and CI/CD pipelines 20 | - ✨ **Scale Up:** Run tests in parallel with [`pytest-xdist`](https://pytest-xdist.readthedocs.io/) and 21 | asynchronously with [`pytest-asyncio`](https://pytest-asyncio.readthedocs.io/). 22 | 23 | # 🚀 Getting Started 24 | 25 | To get started, install `pytest-evals` and write your tests: 26 | 27 | ```bash 28 | pip install pytest-evals 29 | ``` 30 | 31 | #### ⚡️ Quick Example 32 | 33 | For example, say you're building a support ticket classifier. You want to test cases like: 34 | 35 | | Input Text | Expected Classification | 36 | |--------------------------------------------------------|-------------------------| 37 | | My login isn't working and I need to access my account | account_access | 38 | | Can I get a refund for my last order? | billing | 39 | | How do I change my notification settings? | settings | 40 | 41 | `pytest-evals` helps you automatically test how your LLM perform against these cases, track accuracy, and ensure it 42 | keeps working as expected over time. 43 | 44 | ```python 45 | # Predict the LLM performance for each case 46 | @pytest.mark.eval(name="my_classifier") 47 | @pytest.mark.parametrize("case", TEST_DATA) 48 | def test_classifier(case: dict, eval_bag, classifier): 49 | # Run predictions and store results 50 | eval_bag.prediction = classifier(case["Input Text"]) 51 | eval_bag.expected = case["Expected Classification"] 52 | eval_bag.accuracy = eval_bag.prediction == eval_bag.expected 53 | 54 | 55 | # Now let's see how our app performing across all cases... 56 | @pytest.mark.eval_analysis(name="my_classifier") 57 | def test_analysis(eval_results): 58 | accuracy = sum([result.accuracy for result in eval_results]) / len(eval_results) 59 | print(f"Accuracy: {accuracy:.2%}") 60 | assert accuracy >= 0.7 # Ensure our performance is not degrading 🫢 61 | ``` 62 | 63 | Then, run your evaluation tests: 64 | 65 | ```bash 66 | # Run test cases 67 | pytest --run-eval 68 | 69 | # Analyze results 70 | pytest --run-eval-analysis 71 | ``` 72 | 73 | ## 😵‍💫 Why Another Eval Tool? 74 | 75 | **Evaluations are just tests.** No need for complex frameworks or DSLs. `pytest-evals` is minimalistic by design: 76 | 77 | - Use `pytest` - the tool you already know 78 | - Keep tests and evaluations together 79 | - Focus on logic, not infrastructure 80 | 81 | It just collects your results and lets you analyze them as a whole. Nothing more, nothing less. 82 |

(back to top)

83 | 84 | # 📚 User Guide 85 | 86 | Check out our detailed guides and examples: 87 | 88 | - [Basic evaluation](example/example_test.py) 89 | - [Basic of LLM as a judge evaluation](example/example_judge_test.py) 90 | - [Notebook example](example/example_notebook.ipynb) 91 | - [Advanced notebook example](example/example_notebook_advanced.ipynb) 92 | 93 | ## 🤔 How It Works 94 | 95 | Built on top of [pytest-harvest](https://smarie.github.io/python-pytest-harvest/), `pytest-evals` splits evaluation into 96 | two phases: 97 | 98 | 1. **Evaluation Phase**: Run all test cases, collecting results and metrics in `eval_bag`. The results are saved in a 99 | temporary file to allow the analysis phase to access them. 100 | 2. **Analysis Phase**: Process all results at once through `eval_results` to calculate final metrics 101 | 102 | This split allows you to: 103 | 104 | - Run evaluations in parallel (since the analysis test MUST run after all cases are done, we must run them separately) 105 | - Make pass/fail decisions on the overall evaluation results instead of individual test failures (by passing the 106 | `--supress-failed-exit-code --run-eval` flags) 107 | - Collect comprehensive metrics 108 | 109 | **Note**: When running evaluation tests, the rest of your test suite will not run. This is by design to keep the results 110 | clean and focused. 111 | 112 | ## 💾 Saving case results 113 | By default, `pytest-evals` saves the results of each case in a json file to allow the analysis phase to access them. 114 | However, this might not be a friendly format for deeper analysis. To save the results in a more friendly format, as a 115 | CSV file, use the `--save-evals-csv` flag: 116 | 117 | ```bash 118 | pytest --run-eval --save-evals-csv 119 | ``` 120 | 121 | ## 📝 Working with a notebook 122 | 123 | It's also possible to run evaluations from a notebook. To do that, simply 124 | install [ipytest](https://github.com/chmp/ipytest), and load the extension: 125 | 126 | ```python 127 | %load_ext pytest_evals 128 | ``` 129 | 130 | Then, use the magic commands `%%ipytest_eval` in your cell to run evaluations. This will run the evaluation phase and 131 | then the analysis phase. By default, using this magic will run both `--run-eval` and `--run-eval-analysis`, but you can 132 | specify your own flags by passing arguments right after the magic command (e.g., `%%ipytest_eval --run-eval`). 133 | 134 | ```python 135 | %%ipytest_eval 136 | import pytest 137 | 138 | 139 | @pytest.mark.eval(name="my_eval") 140 | def test_agent(eval_bag): 141 | eval_bag.prediction = agent.run(case["input"]) 142 | 143 | 144 | @pytest.mark.eval_analysis(name="my_eval") 145 | def test_analysis(eval_results): 146 | print(f"F1 Score: {calculate_f1(eval_results):.2%}") 147 | ``` 148 | 149 | You can see an example of this in the [`example/example_notebook.ipynb`](example/example_notebook.ipynb) notebook. Or 150 | look at the [advanced example](example/example_notebook_advanced.ipynb) for a more complex example that tracks multiple 151 | experiments. 152 |

(back to top)

153 | 154 | ## 🏗️ Production Use 155 | 156 | ### 📚 Managing Test Data (Evaluation Set) 157 | 158 | It's recommended to use a CSV file to store test data. This makes it easier to manage large datasets and allows you to 159 | communicate with non-technical stakeholders. 160 | 161 | To do this, you can use `pandas` to read the CSV file and pass the test cases as parameters to your tests using 162 | `@pytest.mark.parametrize` 🙃 : 163 | 164 | ```python 165 | import pandas as pd 166 | import pytest 167 | 168 | test_data = pd.read_csv("tests/testdata.csv") 169 | 170 | 171 | @pytest.mark.eval(name="my_eval") 172 | @pytest.mark.parametrize("case", test_data.to_dict(orient="records")) 173 | def test_agent(case, eval_bag, agent): 174 | eval_bag.prediction = agent.run(case["input"]) 175 | ``` 176 | 177 | In case you need to select a subset of the test data (e.g., a golden set), you can simply define an environment variable 178 | to indicate that, and filter the data with `pandas`. 179 | 180 | ### 🔀 CI Integration 181 | 182 | Run tests and analysis as separate steps: 183 | 184 | ```yaml 185 | evaluate: 186 | steps: 187 | - run: pytest --run-eval -n auto --supress-failed-exit-code # Run cases in parallel 188 | - run: pytest --run-eval-analysis # Analyze results 189 | ``` 190 | 191 | Use `--supress-failed-exit-code` with `--run-eval` - let the analysis phase determine success/failure. **If all your 192 | cases pass, your evaluation set is probably too small!** 193 | 194 | ### ⚡️ Parallel Testing 195 | 196 | As your evaluation set grows, you may want to run your test cases in parallel. To do this, install 197 | [`pytest-xdist`](https://pytest-xdist.readthedocs.io/). `pytest-evals` will support that out of the box 🚀. 198 | 199 | ```bash 200 | run: pytest --run-eval -n auto 201 | ``` 202 | 203 |

(back to top)

204 | 205 | # 👷 Contributing 206 | 207 | Contributions make the open-source community a fantastic place to learn, inspire, and create. Any contributions you make 208 | are **greatly appreciated** (not only code! but also documenting, blogging, or giving us feedback) 😍. 209 | 210 | Please fork the repo and create a pull request if you have a suggestion. You can also simply open an issue to give us 211 | some feedback. 212 | 213 | **Don't forget to give the project [a star](#top)! ⭐️** 214 | 215 | For more information about contributing code to the project, read the [CONTRIBUTING.md](CONTRIBUTING.md) guide. 216 | 217 | # 📃 License 218 | 219 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 220 |

(back to top)

-------------------------------------------------------------------------------- /src/pytest_evals/plugin.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from collections import defaultdict 4 | from os.path import isabs 5 | from pathlib import Path 6 | from typing import Any, List, Mapping, cast 7 | 8 | import cloudpickle 9 | import pytest 10 | from pytest_harvest import create_results_bag_fixture, get_session_results_dct 11 | 12 | from .json_encoder import AdvancedJsonEncoder 13 | from .models import EvalResults, EvalBag 14 | 15 | # Constants 16 | EVAL_MARK_NAME = "eval" # pragma: no cover 17 | EVAL_ANALYSIS_MARK_NAME = "eval_analysis" # pragma: no cover 18 | 19 | # Fixtures 20 | eval_bag = create_results_bag_fixture( 21 | "fixture_store", name="eval_bag", bag_type=EvalBag 22 | ) # pragma: no cover 23 | 24 | 25 | @pytest.fixture(scope="function") 26 | def eval_bag_results(request, out_path) -> Mapping[str, Mapping[str, Any]]: 27 | """Fixture that provides access to evaluation results.""" 28 | ret = cast(dict, simple_eval_results(request.session)) 29 | 30 | if not request.session.config.getoption("--run-eval"): 31 | raw = out_path / "eval-results-raw.json" 32 | if raw.exists(): 33 | with open(raw, "r") as f: 34 | ret.update(json.load(f)) 35 | return ret 36 | 37 | 38 | @pytest.fixture(scope="function") 39 | def eval_results(request, eval_bag_results) -> List[EvalResults]: 40 | """Fixture that provides access to evaluation results as EvalResult objects.""" 41 | marker = eval_analysis_marker(request.node.own_markers) 42 | if not marker: 43 | raise ValueError( 44 | f"Only tests marked with {EVAL_ANALYSIS_MARK_NAME} can use the eval_results fixture" 45 | ) 46 | 47 | return [ 48 | EvalResults.from_result_bag(v) 49 | for k, v in eval_bag_results.items() 50 | if v["eval_name"] == marker.kwargs["name"] 51 | ] 52 | 53 | 54 | def pytest_addoption(parser, pluginmanager): 55 | """Add options to the pytest CLI.""" 56 | group = parser.getgroup("Evals", "Evals configuration") 57 | group.addoption( 58 | "--out-path", 59 | action="store", 60 | default="./test-out/", 61 | help="Path to store test artifacts", 62 | ) 63 | group.addoption( 64 | "--supress-failed-exit-code", 65 | action="store_true", 66 | default=False, 67 | help="Supress failed exit code. Useful for CI/CD with a separate step for test analysis", 68 | ) 69 | group.addoption( 70 | "--run-eval", 71 | action="store_true", 72 | default=False, 73 | help="Run evaluation tests(mark with @pytest.mark.eval)", 74 | ) 75 | group.addoption( 76 | "--run-eval-analysis", 77 | action="store_true", 78 | default=False, 79 | help="Run evaluation analysis tests(mark with @pytest.mark.eval_analysis)", 80 | ) 81 | group.addoption( 82 | "--save-evals-csv", 83 | action="store_true", 84 | default=False, 85 | help="Save evaluation cases results to a CSV file", 86 | ) 87 | 88 | 89 | def pytest_configure(config): 90 | """Configure the pytest session with the options.""" 91 | config.addinivalue_line( 92 | "markers", 93 | "eval: mark test as evaluation test. Evaluation tests will only run when --run-eval is passed", 94 | ) 95 | config.addinivalue_line( 96 | "markers", 97 | "eval_analysis: mark test as an evaluation analysis. Analysis tests MUST run after all other tests. Analysis tests will only run when --run_eval-analysis is passed", 98 | ) 99 | 100 | out_path = Path(config.getoption("--out-path")) 101 | if not isabs(out_path): 102 | out_path = Path(config.invocation_dir / out_path) 103 | config.out_path = out_path 104 | config.out_path.mkdir(exist_ok=True) 105 | 106 | if config.getoption("--save-evals-csv") and not config.getoption("--run-eval"): 107 | raise ValueError( 108 | "The --save-evals-csv option can only be used with the --run-eval option" 109 | ) 110 | 111 | 112 | @pytest.fixture 113 | def out_path(request) -> Path: 114 | """Get the output storage path. This is useful for storing test artifacts such as results.""" 115 | return request.config.out_path 116 | 117 | 118 | def is_xdist_session(config): 119 | """Check if the session is a xdist session.""" 120 | return ( 121 | hasattr(config, "workerinput") 122 | or hasattr(config, "workerid") 123 | or config.getoption("dist", "no") != "no" 124 | ) 125 | 126 | 127 | def eval_analysis_marker(markers: list[pytest.Mark]) -> pytest.Mark | None: 128 | """Get the eval_analysis marker if present.""" 129 | m = next((m for m in markers if m.name == EVAL_ANALYSIS_MARK_NAME), None) 130 | if m and "name" not in m.kwargs: 131 | raise ValueError( 132 | f"Marker {EVAL_ANALYSIS_MARK_NAME} must have a 'name' argument" 133 | ) 134 | return m 135 | 136 | 137 | def eval_marker(markers: list[pytest.Mark]) -> pytest.Mark | None: 138 | """Get the eval marker if present.""" 139 | m = next((m for m in markers if m.name == EVAL_MARK_NAME), None) 140 | if m and "name" not in m.kwargs: 141 | raise ValueError(f"Marker {EVAL_MARK_NAME} must have a 'name' argument") 142 | return m 143 | 144 | 145 | def pytest_collection_modifyitems(config, items): 146 | """Modify the collection of items.""" 147 | if ( 148 | is_xdist_session(config) 149 | and config.getoption("--run-eval") 150 | and config.getoption("--run-eval-analysis") 151 | ): 152 | raise ValueError( 153 | "In xdist sessions, evaluation analysis must run after the evaluation tests " 154 | "(as a separated execution). Therefore, --run-eval and --run-eval-analysis " 155 | "cannot be used together" 156 | ) 157 | 158 | run_eval = config.getoption("--run-eval") 159 | run_analysis = config.getoption("--run-eval-analysis") 160 | skip_eval = pytest.mark.skip(reason="need --run-eval option to run") 161 | skip_analysis = pytest.mark.skip(reason="need --run-eval-analysis option to run") 162 | 163 | for item in items[:]: 164 | is_eval = eval_marker(item.own_markers) is not None 165 | is_analysis = eval_analysis_marker(item.own_markers) is not None 166 | 167 | if is_analysis and is_eval: 168 | raise ValueError( 169 | f"{item.nodeid} is marked as both `{EVAL_MARK_NAME}` and " 170 | f"`{EVAL_ANALYSIS_MARK_NAME}`." 171 | ) 172 | 173 | if run_eval or run_analysis: 174 | if is_eval and not run_eval: 175 | item.add_marker(skip_eval) 176 | elif is_analysis and not run_analysis: 177 | item.add_marker(skip_analysis) 178 | elif not is_eval and not is_analysis: 179 | items.remove(item) 180 | else: 181 | if is_eval: 182 | item.add_marker(skip_eval) # pragma: no cover 183 | if is_analysis: 184 | item.add_marker(skip_analysis) # pragma: no cover 185 | 186 | 187 | def pytest_sessionfinish(session): 188 | """Handle session finish.""" 189 | orig_exitstatus = getattr(session, "exitstatus", 0) 190 | if ( 191 | session.config.getoption("--supress-failed-exit-code") 192 | and orig_exitstatus != pytest.ExitCode.INTERNAL_ERROR 193 | ): 194 | session.exitstatus = 0 195 | 196 | if hasattr(session.config, "workerinput"): 197 | return 198 | 199 | if ( 200 | session.config.getoption("--run-eval") 201 | and orig_exitstatus != pytest.ExitCode.INTERNAL_ERROR 202 | ): 203 | res = simple_eval_results(session) 204 | with open(session.config.out_path / "eval-results-raw.json", "w") as f: 205 | json.dump(res, f, cls=AdvancedJsonEncoder) # noqa: ignore 206 | 207 | if session.config.getoption("--save-evals-csv"): 208 | try: 209 | import pandas as pd 210 | except ImportError: 211 | raise ImportError( 212 | "The --save-evals-csv option requires the pandas library" 213 | ) 214 | 215 | results_df = pd.json_normalize( 216 | [ 217 | { 218 | "test_id": name, 219 | "status": data["status"], 220 | "duration_ms": data["duration_ms"], 221 | "pytest_obj_name": data["pytest_obj_name"], 222 | "eval_name": data["eval_name"], 223 | "params": json.loads( 224 | json.dumps(data["params"], cls=AdvancedJsonEncoder) 225 | ), 226 | "eval_bag": json.loads( 227 | json.dumps( 228 | data["fixtures"].get("eval_bag", {}), 229 | cls=AdvancedJsonEncoder, 230 | ) 231 | ), 232 | } 233 | for name, data in res.items() 234 | ] 235 | ) 236 | if not results_df.empty: 237 | results_df = results_df.set_index("test_id") 238 | results_df.to_csv(session.config.out_path / "eval-results-raw.csv") 239 | 240 | 241 | def simple_eval_results(session) -> Mapping[str, Mapping[str, Any]]: 242 | """Get simple evaluation results from the session.""" 243 | res = get_session_results_dct(session, results_bag_fixture_name="eval_bag") 244 | 245 | ret = defaultdict(dict) 246 | for k, v in res.items(): 247 | obj = v.get("pytest_obj", None) 248 | if not obj or not hasattr(obj, "pytestmark"): 249 | continue # pragma: no cover 250 | 251 | e_marker = eval_marker(obj.pytestmark) 252 | if not e_marker: 253 | continue # pragma: no cover 254 | 255 | ret[k] = {k1: v1 for k1, v1 in v.items() if k1 != "pytest_obj"} 256 | ret[k]["pytest_obj_name"] = v["pytest_obj"].__name__ 257 | ret[k]["eval_name"] = e_marker.kwargs["name"] 258 | 259 | return ret 260 | 261 | 262 | # no cover: start 263 | 264 | # XDist harvesting configuration 265 | XDIST_HARVESTED_PATH = Path("./.xdist_harvested/") 266 | 267 | 268 | def pytest_harvest_xdist_worker_dump(worker_id, session_items, fixture_store) -> bool: 269 | """Dump worker results using cloudpickle.""" 270 | with open(XDIST_HARVESTED_PATH / f"{worker_id}.pkl", "wb") as f: 271 | try: 272 | cloudpickle.dump((session_items, fixture_store), f) 273 | except Exception as e: 274 | logging.warning( 275 | f"Error while pickling worker {worker_id}'s harvested results: [{e.__class__}] {e}" 276 | ) 277 | return True 278 | 279 | 280 | def pytest_harvest_xdist_load(): 281 | """Load worker results using cloudpickle.""" 282 | workers_saved_material = dict() 283 | for pkl_file in XDIST_HARVESTED_PATH.glob("*.pkl"): 284 | wid = pkl_file.stem 285 | with pkl_file.open("rb") as f: 286 | workers_saved_material[wid] = cloudpickle.load(f) 287 | return workers_saved_material 288 | 289 | 290 | # no cover: stop 291 | -------------------------------------------------------------------------------- /tests/test_plugin.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from unittest.mock import Mock, patch 4 | 5 | import pytest 6 | 7 | from pytest_evals import eval_analysis_marker 8 | from pytest_evals import plugin 9 | 10 | 11 | def test_eval_marker_configuration(pytester): 12 | """Test basic eval marker functionality 13 | 14 | Verifies that a test with properly configured eval marker: 15 | - Is collected when --run-eval is used 16 | - Successfully executes and passes 17 | """ 18 | pytester.makepyfile(""" 19 | import pytest 20 | 21 | @pytest.mark.eval(name="test_eval") 22 | def test_simple(): 23 | assert True 24 | """) 25 | 26 | result = pytester.runpytest("--run-eval") 27 | result.assert_outcomes(passed=1) 28 | 29 | 30 | def test_eval_analysis_marker_configuration(pytester): 31 | """Test that tests are properly selected/skipped based on eval/eval-analysis markers 32 | 33 | Verifies: 34 | - With --run-eval: eval_analysis tests are skipped 35 | - With --run-eval-analysis: eval tests are skipped 36 | """ 37 | 38 | pytester.makepyfile(""" 39 | import pytest 40 | 41 | @pytest.mark.eval_analysis(name="test_eval") 42 | def test_analysis(eval_results): 43 | assert len(eval_results) == 0 44 | """) 45 | 46 | result = pytester.runpytest("--run-eval-analysis") 47 | result.assert_outcomes(passed=1) 48 | 49 | 50 | def test_missing_name_in_eval_marker(pytester): 51 | """Test that eval marker requires name parameter""" 52 | pytester.makepyfile(""" 53 | import pytest 54 | 55 | @pytest.mark.eval 56 | def test_simple(): 57 | assert True 58 | """) 59 | 60 | result = pytester.runpytest("--run-eval") 61 | assert result.ret != 0 62 | 63 | 64 | # Comprehensive workflow test 65 | def test_complete_evaluation_workflow(pytester): 66 | """Test complete evaluation workflow including fixture behavior""" 67 | pytester.makepyfile(""" 68 | import pytest 69 | 70 | TEST_DATA = [ 71 | {"input": "test1", "expected": True}, 72 | {"input": "test2", "expected": False}, 73 | ] 74 | 75 | @pytest.fixture 76 | def mock_classifier(): 77 | def classify(text: str) -> bool: 78 | return "test1" in text 79 | return classify 80 | 81 | # Evaluation phase with fixture usage 82 | @pytest.mark.eval(name="test_classifier") 83 | @pytest.mark.parametrize("case", TEST_DATA) 84 | def test_classifier(case, eval_bag, mock_classifier): 85 | eval_bag.input = case["input"] 86 | eval_bag.expected = case["expected"] 87 | eval_bag.prediction = mock_classifier(case["input"]) 88 | eval_bag.metadata = {"test_type": "classification"} 89 | assert eval_bag.prediction == case["expected"] 90 | 91 | # Analysis phase with enhanced fixture verification 92 | @pytest.mark.eval_analysis(name="test_classifier") 93 | def test_analysis(eval_results): 94 | assert len(eval_results) == 2 95 | 96 | # Verify fixture data preservation 97 | for result in eval_results: 98 | assert hasattr(result.result, "metadata") 99 | assert result.result.metadata["test_type"] == "classification" 100 | 101 | # Verify analysis results 102 | correct = sum(1 for r in eval_results 103 | if r.result.prediction == r.result.expected) 104 | accuracy = correct / len(eval_results) 105 | assert accuracy == 1.0 106 | """) 107 | 108 | # Run evaluation phase 109 | result_eval = pytester.runpytest("--run-eval") 110 | result_eval.assert_outcomes(passed=2, skipped=1) 111 | 112 | # Run analysis phase 113 | result_analysis = pytester.runpytest("--run-eval-analysis") 114 | result_analysis.assert_outcomes(passed=1, skipped=2) 115 | 116 | 117 | def test_output_file_creation(pytester, tmp_path): 118 | """Test that results are properly saved to output file""" 119 | out_dir = tmp_path / "test-output" 120 | out_dir.mkdir(exist_ok=True) 121 | 122 | pytester.makepyfile(""" 123 | import pytest 124 | 125 | @pytest.mark.eval(name="test_eval") 126 | def test_simple(eval_bag): 127 | eval_bag.result = "test_value" 128 | assert True 129 | """) 130 | 131 | result = pytester.runpytest("--run-eval", f"--out-path={out_dir}", "-v") 132 | result.assert_outcomes(passed=1) 133 | 134 | results_file = Path(out_dir) / "eval-results-raw.json" 135 | assert results_file.exists() 136 | 137 | with open(results_file) as f: 138 | results = json.load(f) 139 | assert any( 140 | "test_value" in str(v.get("fixtures").get("eval_bag")) 141 | for v in results.values() 142 | ) 143 | 144 | 145 | def test_eval_marker_collection_scenarios(pytester): 146 | """Test different scenarios for eval marker collection""" 147 | pytester.makepyfile(""" 148 | import pytest 149 | from pytest_harvest import get_session_results_dct 150 | 151 | # Case 1: No pytestmark attribute 152 | def test_no_pytestmark(): 153 | assert True 154 | 155 | # Case 2: Has pytestmark but not the eval mark 156 | @pytest.mark.skip 157 | def test_other_mark(): 158 | assert True 159 | 160 | # Case 3: Class without pytestmark 161 | class TestClass: 162 | def test_method(self): 163 | assert True 164 | 165 | # Case 4: Class with non-eval pytestmark 166 | class TestClassWithMark: 167 | pytestmark = [pytest.mark.skip] 168 | def test_no_eval_marker(self): 169 | assert True 170 | 171 | # Case 5: Test with eval mark (should be included) 172 | @pytest.mark.eval(name="test") 173 | def test_with_eval(eval_bag): 174 | eval_bag.value = 42 175 | assert True 176 | """) 177 | 178 | result = pytester.runpytest("--run-eval") 179 | result.assert_outcomes(passed=1) 180 | 181 | 182 | @pytest.mark.parametrize( 183 | "scenario", 184 | [ 185 | # Empty file scenario - expect empty results 186 | ("empty_file", {}), 187 | # Valid data scenario - expect one result with specific values 188 | ( 189 | "valid_data", 190 | { 191 | "test_1": { 192 | "eval_name": "sample_eval", 193 | "fixtures": {"eval_bag": {"value": 42}}, 194 | } 195 | }, 196 | ), 197 | # Missing file scenario - expect empty results 198 | ("missing_file", None), 199 | ], 200 | ) 201 | def test_eval_bag_results_scenarios(pytester, tmp_path, scenario): 202 | """Test eval_bag_results behavior with different results file states 203 | 204 | Parameters: 205 | scenario: Tuple of (scenario_name, file_content) where: 206 | - empty_file: Results file exists but is empty ({}) 207 | - valid_data: Results file exists with valid test data 208 | - missing_file: Results file does not exist (None) 209 | 210 | Each scenario should handle the case gracefully and provide appropriate results. 211 | """ 212 | scenario_name, file_content = scenario 213 | out_dir = tmp_path / "test-out" 214 | out_dir.mkdir(parents=True) 215 | results_file = out_dir / "eval-results-raw.json" 216 | 217 | if file_content is not None: 218 | results_file.parent.mkdir(exist_ok=True) 219 | results_file.write_text(json.dumps(file_content)) 220 | 221 | pytester.makepyfile(f""" 222 | def test_results(eval_bag_results): 223 | if "{scenario_name}" == "empty_file": 224 | assert len(eval_bag_results) == 0 225 | elif "{scenario_name}" == "valid_data": 226 | assert len(eval_bag_results) == 1 227 | assert "test_1" in eval_bag_results 228 | assert eval_bag_results["test_1"]["eval_name"] == "sample_eval" 229 | else: # missing_file 230 | assert len(eval_bag_results) == 0 231 | """) 232 | 233 | result = pytester.runpytest(f"--out-path={out_dir}") 234 | result.assert_outcomes(passed=1) 235 | 236 | 237 | # Error handling and configuration tests 238 | def test_invalid_marker_combination(pytester): 239 | """Test that a test cannot have both eval and eval_analysis markers""" 240 | pytester.makepyfile(""" 241 | import pytest 242 | 243 | @pytest.mark.eval(name="test") 244 | @pytest.mark.eval_analysis(name="test") 245 | def test_invalid(): 246 | assert True 247 | """) 248 | 249 | result = pytester.runpytest("--run-eval") 250 | assert result.ret != 0 251 | 252 | 253 | def test_suppress_failed_exit_code_scenarios(pytester): 254 | """Test all scenarios related to suppressing failed exit codes""" 255 | pytester.makepyfile(""" 256 | import pytest 257 | 258 | @pytest.mark.eval(name="test_eval") 259 | def test_failing(): 260 | assert False 261 | 262 | @pytest.mark.eval(name="test_eval") 263 | def test_internal_error(): 264 | raise pytest.UsageError("Internal error") 265 | """) 266 | 267 | # Case 1: Without suppress flag - should fail with non-zero exit code 268 | result1 = pytester.runpytest("--run-eval") 269 | result1.assert_outcomes(failed=2) 270 | assert result1.ret != 0 271 | 272 | # Case 2: With suppress flag - expect zero exit code despite failures 273 | result2 = pytester.runpytest("--run-eval", "--supress-failed-exit-code") 274 | result2.assert_outcomes(failed=2) 275 | assert result2.ret == 0 276 | 277 | 278 | def test_xdist_eval_flags_unit(): 279 | """Unit test for xdist session with both eval and eval-analysis flags""" 280 | config = Mock() 281 | config.getoption.side_effect = lambda x: x in ["--run-eval", "--run-eval-analysis"] 282 | 283 | with patch.object(plugin, "is_xdist_session", return_value=True): 284 | with pytest.raises(ValueError, match="cannot be used together"): 285 | plugin.pytest_collection_modifyitems(config, []) 286 | 287 | 288 | def test_xdist_eval_flags_integration(pytester): 289 | """Integration test for xdist compatibility with eval flags 290 | 291 | Verifies that attempting to run eval and eval-analysis tests together 292 | in distributed mode raises appropriate error with explanation message 293 | """ 294 | pytester.makepyfile(""" 295 | import pytest 296 | 297 | @pytest.mark.eval(name="test") 298 | def test_eval(): 299 | assert True 300 | 301 | @pytest.mark.eval_analysis(name="test") 302 | def test_analysis(eval_results): 303 | assert True 304 | """) 305 | 306 | result = pytester.runpytest("--run-eval", "--run-eval-analysis", "-n", "2") 307 | assert result.ret != 0 308 | result.stdout.fnmatch_lines( 309 | "*evaluation analysis must run after the evaluation tests*" 310 | ) 311 | 312 | 313 | def test_marker_basic_cases(): 314 | """Test eval_analysis_marker validation logic 315 | 316 | Tests multiple marker scenarios: 317 | - Valid marker with name parameter 318 | - Invalid marker missing name parameter 319 | - No markers present 320 | - Other unrelated markers 321 | - Mixed markers (eval with other marks) 322 | 323 | Verifies proper marker validation and selection in each case. 324 | """ 325 | # Valid marker 326 | valid = pytest.mark.eval_analysis(name="test") 327 | assert eval_analysis_marker([valid.mark]) == valid.mark 328 | 329 | # Missing name param 330 | invalid = pytest.mark.eval_analysis() 331 | with pytest.raises(ValueError, match="must have a 'name' argument"): 332 | eval_analysis_marker([invalid.mark]) 333 | 334 | # No markers 335 | assert eval_analysis_marker([]) is None 336 | 337 | # Other markers 338 | other = pytest.mark.skip(reason="skip") 339 | assert eval_analysis_marker([other.mark]) is None 340 | 341 | # Mixed markers 342 | mixed = [other.mark, valid.mark] 343 | assert eval_analysis_marker(mixed) == valid.mark 344 | 345 | 346 | def test_eval_analysis_marker_selection(pytester): 347 | """Test marker skipping behavior""" 348 | pytester.makepyfile(""" 349 | import pytest 350 | 351 | @pytest.mark.eval(name="test") 352 | def test_eval(): 353 | pass 354 | 355 | @pytest.mark.eval_analysis(name="test") 356 | def test_analysis(): 357 | pass 358 | """) 359 | 360 | # Test with run_eval 361 | result1 = pytester.runpytest("--run-eval") 362 | result1.assert_outcomes(skipped=1, passed=1) 363 | 364 | # Test with run_eval_analysis 365 | result2 = pytester.runpytest("--run-eval-analysis") 366 | result2.assert_outcomes(skipped=1, passed=1) 367 | 368 | 369 | def test_worker_session_finish(pytestconfig): 370 | """Test worker session finish handling""" 371 | 372 | class WorkerSession: 373 | class Config: 374 | workerinput = {} 375 | 376 | def getoption(self, *args, **kwargs): 377 | return False 378 | 379 | config = Config() 380 | exitstatus = 0 381 | 382 | assert plugin.pytest_sessionfinish(WorkerSession()) is None 383 | 384 | 385 | def test_save_evals_csv_option(pytester, tmp_path): 386 | """Test the --save-evals-csv option with various scenarios""" 387 | out_dir = tmp_path / "test-output" 388 | out_dir.mkdir(exist_ok=True) 389 | 390 | # Create test file with evaluation test 391 | pytester.makepyfile(""" 392 | import pytest 393 | 394 | @pytest.mark.eval(name="test_eval") 395 | def test_simple(eval_bag): 396 | eval_bag.result = "test_value" 397 | eval_bag.metadata = {"key": "value"} 398 | assert True 399 | """) 400 | 401 | result1 = pytester.runpytest(f"--out-path={out_dir}", "--save-evals-csv") 402 | assert result1.ret != 0 403 | result1.stderr.fnmatch_lines( 404 | "*--save-evals-csv option can only be used with the --run-eval option*" 405 | ) 406 | 407 | # Case 2: Test with both flags and verify CSV creation 408 | result2 = pytester.runpytest( 409 | "--run-eval", f"--out-path={out_dir}", "--save-evals-csv", "-v" 410 | ) 411 | result2.assert_outcomes(passed=1) 412 | 413 | # Verify both JSON and CSV files exist 414 | csv_file = out_dir / "eval-results-raw.csv" 415 | json_file = out_dir / "eval-results-raw.json" 416 | assert csv_file.exists() 417 | assert json_file.exists() 418 | 419 | 420 | def test_save_evals_csv_missing_pandas(pytester, tmp_path, monkeypatch): 421 | """Test handling of missing pandas when --save-evals-csv is used""" 422 | out_dir = tmp_path / "test-output" 423 | out_dir.mkdir(exist_ok=True) 424 | 425 | # Mock pandas to raise ImportError 426 | import sys 427 | 428 | with patch.dict(sys.modules, {"pandas": None}): 429 | pytester.makepyfile(""" 430 | import pytest 431 | 432 | @pytest.mark.eval(name="test_eval") 433 | def test_simple(): 434 | assert True 435 | """) 436 | 437 | result = pytester.runpytest( 438 | "--run-eval", f"--out-path={out_dir}", "--save-evals-csv" 439 | ) 440 | assert result.ret != 0 441 | result.stderr.fnmatch_lines( 442 | "*The --save-evals-csv option requires the pandas library*" 443 | ) 444 | 445 | 446 | def test_csv_data_normalization(pytester, tmp_path): 447 | """Test that complex data structures are properly normalized in CSV output""" 448 | out_dir = tmp_path / "test-output" 449 | out_dir.mkdir(exist_ok=True) 450 | 451 | pytester.makepyfile(""" 452 | import pytest 453 | from datetime import datetime 454 | 455 | TEST_DATA = [ 456 | {"input": "test1", "expected": True}, 457 | {"input": "test2", "expected": False} 458 | ] 459 | 460 | @pytest.mark.eval(name="test_eval") 461 | @pytest.mark.parametrize("case", TEST_DATA) 462 | def test_complex_data(eval_bag, case): 463 | eval_bag.nested_data = { 464 | "list": [1, 2, 3], 465 | "dict": {"a": 1, "b": 2}, 466 | "date": str(datetime.now()), 467 | "case": case 468 | } 469 | assert True 470 | """) 471 | 472 | result = pytester.runpytest( 473 | "--run-eval", f"--out-path={out_dir}", "--save-evals-csv", "-v" 474 | ) 475 | result.assert_outcomes(passed=2) # Two test cases due to parametrize 476 | 477 | # Verify CSV was created with normalized data 478 | csv_file = out_dir / "eval-results-raw.csv" 479 | assert csv_file.exists() 480 | 481 | # Read the CSV content to verify structure (if pandas is available) 482 | try: 483 | import pandas as pd 484 | 485 | df = pd.read_csv(csv_file) 486 | assert not df.empty 487 | assert "eval_bag.nested_data.date" in df.columns 488 | except ImportError: 489 | pass # Skip detailed verification if pandas isn't available 490 | -------------------------------------------------------------------------------- /example/example_notebook_advanced.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a13d6843ef4301", 6 | "metadata": {}, 7 | "source": [ 8 | "# Advanced LLM Evaluation with pytest-evals\n", 9 | "\n", 10 | "This notebook demonstrates advanced techniques for evaluating LLM performance:\n", 11 | "1. Running parallel evaluations across multiple models\n", 12 | "2. Tracking and comparing results across different runs\n", 13 | "3. Visualizing performance trends and model comparisons\n", 14 | "4. Statistical analysis of model performance\n", 15 | "\n", 16 | "## Setup\n", 17 | "First, let's load required extensions and configure our environment." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "id": "cefdc9561d3ab77a", 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2025-01-21T08:33:47.364267Z", 26 | "start_time": "2025-01-21T08:33:47.278694Z" 27 | } 28 | }, 29 | "source": [ 30 | "%load_ext pytest_evals" 31 | ], 32 | "outputs": [], 33 | "execution_count": 1 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "80ea828ddde1c26a", 38 | "metadata": {}, 39 | "source": [ 40 | "## Model Implementation\n", 41 | "\n", 42 | "Define our classifier that leverages different LLM models to determine if text is computer-related." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "id": "6e55ed323e3cc980", 48 | "metadata": { 49 | "ExecuteTime": { 50 | "end_time": "2025-01-21T08:33:49.412049Z", 51 | "start_time": "2025-01-21T08:33:49.211224Z" 52 | } 53 | }, 54 | "source": [ 55 | "import openai\n", 56 | "\n", 57 | "\n", 58 | "def classify(text: str, model=\"gpt-4o-mini\") -> bool:\n", 59 | " \"\"\"Classify text as computer-related or not using specified LLM model.\n", 60 | "\n", 61 | " Args:\n", 62 | " text (str): Input text to classify\n", 63 | " model (str): Model identifier (e.g., \"gpt-4o\", \"gpt-4o-mini\")\n", 64 | "\n", 65 | " Returns:\n", 66 | " bool: True if text is computer-related, False otherwise\n", 67 | " \"\"\"\n", 68 | " resp = openai.chat.completions.create(\n", 69 | " model=model,\n", 70 | " messages=[\n", 71 | " {\n", 72 | " \"role\": \"system\",\n", 73 | " \"content\": \"Is this text about a computer-related subject? \"\n", 74 | " \"Reply ONLY with either true or false.\",\n", 75 | " },\n", 76 | " {\"role\": \"user\", \"content\": text},\n", 77 | " ],\n", 78 | " )\n", 79 | " return resp.choices[0].message.content.lower() == \"true\"" 80 | ], 81 | "outputs": [], 82 | "execution_count": 2 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "id": "51c2d42c6a10f861", 87 | "metadata": {}, 88 | "source": [ 89 | "## Test Data and Results Tracking\n", 90 | "\n", 91 | "Set up our test cases and initialize our experiment tracking DataFrame." 92 | ] 93 | }, 94 | { 95 | "metadata": { 96 | "ExecuteTime": { 97 | "end_time": "2025-01-21T08:33:50.609927Z", 98 | "start_time": "2025-01-21T08:33:50.605050Z" 99 | } 100 | }, 101 | "cell_type": "code", 102 | "source": [ 103 | "import pandas as pd\n", 104 | "\n", 105 | "# Define test cases\n", 106 | "TEST_DATA = [\n", 107 | " {\"text\": \"I need to debug this Python code\", \"label\": True},\n", 108 | " {\"text\": \"The cat jumped over the lazy dog\", \"label\": False},\n", 109 | " {\"text\": \"My monitor keeps flickering\", \"label\": True},\n", 110 | " {\"text\": \"The weather is nice today\", \"label\": False},\n", 111 | " {\"text\": \"Updating system drivers fixed the issue\", \"label\": True},\n", 112 | " {\"text\": \"the new llama can understand bizzare nuanced slang\", \"label\": True},\n", 113 | "]\n", 114 | "\n", 115 | "# Initialize experiment tracking\n", 116 | "experiments_df = pd.DataFrame()" 117 | ], 118 | "id": "58c440f913c3d97", 119 | "outputs": [], 120 | "execution_count": 3 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "id": "959af2e2c4f8670", 125 | "metadata": {}, 126 | "source": [ 127 | "## Run Model Evaluations\n", 128 | "\n", 129 | "Execute evaluations across different models and collect detailed metrics." 130 | ] 131 | }, 132 | { 133 | "metadata": { 134 | "ExecuteTime": { 135 | "end_time": "2025-01-21T08:34:02.226687Z", 136 | "start_time": "2025-01-21T08:33:52.406707Z" 137 | } 138 | }, 139 | "cell_type": "code", 140 | "source": [ 141 | "%%ipytest_evals --run-eval\n", 142 | "import pytest\n", 143 | "from collections import defaultdict\n", 144 | "\n", 145 | "@pytest.mark.eval(name=\"computer_classifier\")\n", 146 | "@pytest.mark.parametrize(\"case\", TEST_DATA)\n", 147 | "@pytest.mark.parametrize(\"model\", [\"gpt-4o\", \"gpt-4o-mini\", \"gpt-3.5-turbo\"])\n", 148 | "def test_classifier(case: dict, model, eval_bag):\n", 149 | " \"\"\"Test individual classification cases across different models.\n", 150 | " \n", 151 | " Args:\n", 152 | " case (dict): Test case with text and expected label\n", 153 | " eval_bag: Container for test results\n", 154 | " model: Model identifier\n", 155 | " \"\"\"\n", 156 | " eval_bag.input_text = case[\"text\"]\n", 157 | " eval_bag.label = case[\"label\"]\n", 158 | " eval_bag.prediction = classify(case[\"text\"], model)\n", 159 | " eval_bag.precision = 1 if eval_bag.prediction == eval_bag.label else 0\n", 160 | "\n", 161 | " print(f\"Model: {model}\")\n", 162 | " print(f\"Input: {eval_bag.input_text}\")\n", 163 | " print(f\"Expected: {eval_bag.label}, Predicted: {eval_bag.prediction}\\n\")\n", 164 | "\n", 165 | " assert eval_bag.prediction == eval_bag.label" 166 | ], 167 | "id": "4c8575999b45c4b0", 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "\n", 174 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-case0] Model: gpt-4o\n", 175 | "Input: I need to debug this Python code\n", 176 | "Expected: True, Predicted: True\n", 177 | "\n", 178 | "\u001B[32mPASSED\u001B[0m\n", 179 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-case1] Model: gpt-4o\n", 180 | "Input: The cat jumped over the lazy dog\n", 181 | "Expected: False, Predicted: False\n", 182 | "\n", 183 | "\u001B[32mPASSED\u001B[0m\n", 184 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-case2] Model: gpt-4o\n", 185 | "Input: My monitor keeps flickering\n", 186 | "Expected: True, Predicted: True\n", 187 | "\n", 188 | "\u001B[32mPASSED\u001B[0m\n", 189 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-case3] Model: gpt-4o\n", 190 | "Input: The weather is nice today\n", 191 | "Expected: False, Predicted: False\n", 192 | "\n", 193 | "\u001B[32mPASSED\u001B[0m\n", 194 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-case4] Model: gpt-4o\n", 195 | "Input: Updating system drivers fixed the issue\n", 196 | "Expected: True, Predicted: True\n", 197 | "\n", 198 | "\u001B[32mPASSED\u001B[0m\n", 199 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-case5] Model: gpt-4o\n", 200 | "Input: the new llama can understand bizzare nuanced slang\n", 201 | "Expected: True, Predicted: True\n", 202 | "\n", 203 | "\u001B[32mPASSED\u001B[0m\n", 204 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-mini-case0] Model: gpt-4o-mini\n", 205 | "Input: I need to debug this Python code\n", 206 | "Expected: True, Predicted: True\n", 207 | "\n", 208 | "\u001B[32mPASSED\u001B[0m\n", 209 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-mini-case1] Model: gpt-4o-mini\n", 210 | "Input: The cat jumped over the lazy dog\n", 211 | "Expected: False, Predicted: False\n", 212 | "\n", 213 | "\u001B[32mPASSED\u001B[0m\n", 214 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-mini-case2] Model: gpt-4o-mini\n", 215 | "Input: My monitor keeps flickering\n", 216 | "Expected: True, Predicted: True\n", 217 | "\n", 218 | "\u001B[32mPASSED\u001B[0m\n", 219 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-mini-case3] Model: gpt-4o-mini\n", 220 | "Input: The weather is nice today\n", 221 | "Expected: False, Predicted: False\n", 222 | "\n", 223 | "\u001B[32mPASSED\u001B[0m\n", 224 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-mini-case4] Model: gpt-4o-mini\n", 225 | "Input: Updating system drivers fixed the issue\n", 226 | "Expected: True, Predicted: True\n", 227 | "\n", 228 | "\u001B[32mPASSED\u001B[0m\n", 229 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-4o-mini-case5] Model: gpt-4o-mini\n", 230 | "Input: the new llama can understand bizzare nuanced slang\n", 231 | "Expected: True, Predicted: False\n", 232 | "\n", 233 | "\u001B[31mFAILED\u001B[0m\n", 234 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-3.5-turbo-case0] Model: gpt-3.5-turbo\n", 235 | "Input: I need to debug this Python code\n", 236 | "Expected: True, Predicted: True\n", 237 | "\n", 238 | "\u001B[32mPASSED\u001B[0m\n", 239 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-3.5-turbo-case1] Model: gpt-3.5-turbo\n", 240 | "Input: The cat jumped over the lazy dog\n", 241 | "Expected: False, Predicted: False\n", 242 | "\n", 243 | "\u001B[32mPASSED\u001B[0m\n", 244 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-3.5-turbo-case2] Model: gpt-3.5-turbo\n", 245 | "Input: My monitor keeps flickering\n", 246 | "Expected: True, Predicted: False\n", 247 | "\n", 248 | "\u001B[31mFAILED\u001B[0m\n", 249 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-3.5-turbo-case3] Model: gpt-3.5-turbo\n", 250 | "Input: The weather is nice today\n", 251 | "Expected: False, Predicted: False\n", 252 | "\n", 253 | "\u001B[32mPASSED\u001B[0m\n", 254 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-3.5-turbo-case4] Model: gpt-3.5-turbo\n", 255 | "Input: Updating system drivers fixed the issue\n", 256 | "Expected: True, Predicted: True\n", 257 | "\n", 258 | "\u001B[32mPASSED\u001B[0m\n", 259 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_classifier[gpt-3.5-turbo-case5] Model: gpt-3.5-turbo\n", 260 | "Input: the new llama can understand bizzare nuanced slang\n", 261 | "Expected: True, Predicted: False\n", 262 | "\n", 263 | "\u001B[31mFAILED\u001B[0m\n", 264 | "\n", 265 | "============================================= FAILURES =============================================\n", 266 | "\u001B[31m\u001B[1m________________________________ test_classifier[gpt-4o-mini-case5] ________________________________\u001B[0m\n", 267 | "\n", 268 | "case = {'label': True, 'text': 'the new llama can understand bizzare nuanced slang'}\n", 269 | "model = 'gpt-4o-mini'\n", 270 | "eval_bag = ResultsBag:\n", 271 | "{'input_text': 'the new llama can understand bizzare nuanced slang', 'label': True, 'prediction': False, 'precision': 0}\n", 272 | "\n", 273 | " \u001B[0m\u001B[37m@pytest\u001B[39;49;00m.mark.eval(name=\u001B[33m\"\u001B[39;49;00m\u001B[33mcomputer_classifier\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 274 | " \u001B[37m@pytest\u001B[39;49;00m.mark.parametrize(\u001B[33m\"\u001B[39;49;00m\u001B[33mcase\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, TEST_DATA)\u001B[90m\u001B[39;49;00m\n", 275 | " \u001B[37m@pytest\u001B[39;49;00m.mark.parametrize(\u001B[33m\"\u001B[39;49;00m\u001B[33mmodel\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, [\u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-4o\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, \u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-4o-mini\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, \u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-3.5-turbo\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m])\u001B[90m\u001B[39;49;00m\n", 276 | " \u001B[94mdef\u001B[39;49;00m\u001B[90m \u001B[39;49;00m\u001B[92mtest_classifier\u001B[39;49;00m(case: \u001B[96mdict\u001B[39;49;00m, model, eval_bag):\u001B[90m\u001B[39;49;00m\n", 277 | " \u001B[90m \u001B[39;49;00m\u001B[33m\"\"\"Test individual classification cases across different models.\u001B[39;49;00m\n", 278 | " \u001B[33m\u001B[39;49;00m\n", 279 | " \u001B[33m Args:\u001B[39;49;00m\n", 280 | " \u001B[33m case (dict): Test case with text and expected label\u001B[39;49;00m\n", 281 | " \u001B[33m eval_bag: Container for test results\u001B[39;49;00m\n", 282 | " \u001B[33m classifier: Classification function\u001B[39;49;00m\n", 283 | " \u001B[33m model: Model identifier\u001B[39;49;00m\n", 284 | " \u001B[33m \"\"\"\u001B[39;49;00m\u001B[90m\u001B[39;49;00m\n", 285 | " eval_bag.input_text = case[\u001B[33m\"\u001B[39;49;00m\u001B[33mtext\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m]\u001B[90m\u001B[39;49;00m\n", 286 | " eval_bag.label = case[\u001B[33m\"\u001B[39;49;00m\u001B[33mlabel\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m]\u001B[90m\u001B[39;49;00m\n", 287 | " eval_bag.prediction = classify(case[\u001B[33m\"\u001B[39;49;00m\u001B[33mtext\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m], model)\u001B[90m\u001B[39;49;00m\n", 288 | " eval_bag.precision = \u001B[94m1\u001B[39;49;00m \u001B[94mif\u001B[39;49;00m eval_bag.prediction == eval_bag.label \u001B[94melse\u001B[39;49;00m \u001B[94m0\u001B[39;49;00m\u001B[90m\u001B[39;49;00m\n", 289 | " \u001B[90m\u001B[39;49;00m\n", 290 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mModel: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00mmodel\u001B[33m}\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 291 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mInput: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.input_text\u001B[33m}\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 292 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mExpected: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.label\u001B[33m}\u001B[39;49;00m\u001B[33m, Predicted: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.prediction\u001B[33m}\u001B[39;49;00m\u001B[33m\\n\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 293 | " \u001B[90m\u001B[39;49;00m\n", 294 | "> \u001B[94massert\u001B[39;49;00m eval_bag.prediction == eval_bag.label\u001B[90m\u001B[39;49;00m\n", 295 | "\u001B[1m\u001B[31mE AssertionError: assert False == True\u001B[0m\n", 296 | "\u001B[1m\u001B[31mE + where False = ResultsBag:\\n{'input_text': 'the new llama can understand bizzare nuanced slang', 'label': True, 'prediction': False, 'precision': 0}.prediction\u001B[0m\n", 297 | "\u001B[1m\u001B[31mE + and True = ResultsBag:\\n{'input_text': 'the new llama can understand bizzare nuanced slang', 'label': True, 'prediction': False, 'precision': 0}.label\u001B[0m\n", 298 | "\n", 299 | "\u001B[1m\u001B[31m/var/folders/cn/zpwdtbhd7ylgt032s2t3tpdw0000gn/T/ipykernel_2511/2061077205.py\u001B[0m:25: AssertionError\n", 300 | "\u001B[31m\u001B[1m_______________________________ test_classifier[gpt-3.5-turbo-case2] _______________________________\u001B[0m\n", 301 | "\n", 302 | "case = {'label': True, 'text': 'My monitor keeps flickering'}, model = 'gpt-3.5-turbo'\n", 303 | "eval_bag = ResultsBag:\n", 304 | "{'input_text': 'My monitor keeps flickering', 'label': True, 'prediction': False, 'precision': 0}\n", 305 | "\n", 306 | " \u001B[0m\u001B[37m@pytest\u001B[39;49;00m.mark.eval(name=\u001B[33m\"\u001B[39;49;00m\u001B[33mcomputer_classifier\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 307 | " \u001B[37m@pytest\u001B[39;49;00m.mark.parametrize(\u001B[33m\"\u001B[39;49;00m\u001B[33mcase\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, TEST_DATA)\u001B[90m\u001B[39;49;00m\n", 308 | " \u001B[37m@pytest\u001B[39;49;00m.mark.parametrize(\u001B[33m\"\u001B[39;49;00m\u001B[33mmodel\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, [\u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-4o\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, \u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-4o-mini\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, \u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-3.5-turbo\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m])\u001B[90m\u001B[39;49;00m\n", 309 | " \u001B[94mdef\u001B[39;49;00m\u001B[90m \u001B[39;49;00m\u001B[92mtest_classifier\u001B[39;49;00m(case: \u001B[96mdict\u001B[39;49;00m, model, eval_bag):\u001B[90m\u001B[39;49;00m\n", 310 | " \u001B[90m \u001B[39;49;00m\u001B[33m\"\"\"Test individual classification cases across different models.\u001B[39;49;00m\n", 311 | " \u001B[33m\u001B[39;49;00m\n", 312 | " \u001B[33m Args:\u001B[39;49;00m\n", 313 | " \u001B[33m case (dict): Test case with text and expected label\u001B[39;49;00m\n", 314 | " \u001B[33m eval_bag: Container for test results\u001B[39;49;00m\n", 315 | " \u001B[33m classifier: Classification function\u001B[39;49;00m\n", 316 | " \u001B[33m model: Model identifier\u001B[39;49;00m\n", 317 | " \u001B[33m \"\"\"\u001B[39;49;00m\u001B[90m\u001B[39;49;00m\n", 318 | " eval_bag.input_text = case[\u001B[33m\"\u001B[39;49;00m\u001B[33mtext\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m]\u001B[90m\u001B[39;49;00m\n", 319 | " eval_bag.label = case[\u001B[33m\"\u001B[39;49;00m\u001B[33mlabel\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m]\u001B[90m\u001B[39;49;00m\n", 320 | " eval_bag.prediction = classify(case[\u001B[33m\"\u001B[39;49;00m\u001B[33mtext\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m], model)\u001B[90m\u001B[39;49;00m\n", 321 | " eval_bag.precision = \u001B[94m1\u001B[39;49;00m \u001B[94mif\u001B[39;49;00m eval_bag.prediction == eval_bag.label \u001B[94melse\u001B[39;49;00m \u001B[94m0\u001B[39;49;00m\u001B[90m\u001B[39;49;00m\n", 322 | " \u001B[90m\u001B[39;49;00m\n", 323 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mModel: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00mmodel\u001B[33m}\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 324 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mInput: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.input_text\u001B[33m}\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 325 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mExpected: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.label\u001B[33m}\u001B[39;49;00m\u001B[33m, Predicted: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.prediction\u001B[33m}\u001B[39;49;00m\u001B[33m\\n\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 326 | " \u001B[90m\u001B[39;49;00m\n", 327 | "> \u001B[94massert\u001B[39;49;00m eval_bag.prediction == eval_bag.label\u001B[90m\u001B[39;49;00m\n", 328 | "\u001B[1m\u001B[31mE AssertionError: assert False == True\u001B[0m\n", 329 | "\u001B[1m\u001B[31mE + where False = ResultsBag:\\n{'input_text': 'My monitor keeps flickering', 'label': True, 'prediction': False, 'precision': 0}.prediction\u001B[0m\n", 330 | "\u001B[1m\u001B[31mE + and True = ResultsBag:\\n{'input_text': 'My monitor keeps flickering', 'label': True, 'prediction': False, 'precision': 0}.label\u001B[0m\n", 331 | "\n", 332 | "\u001B[1m\u001B[31m/var/folders/cn/zpwdtbhd7ylgt032s2t3tpdw0000gn/T/ipykernel_2511/2061077205.py\u001B[0m:25: AssertionError\n", 333 | "\u001B[31m\u001B[1m_______________________________ test_classifier[gpt-3.5-turbo-case5] _______________________________\u001B[0m\n", 334 | "\n", 335 | "case = {'label': True, 'text': 'the new llama can understand bizzare nuanced slang'}\n", 336 | "model = 'gpt-3.5-turbo'\n", 337 | "eval_bag = ResultsBag:\n", 338 | "{'input_text': 'the new llama can understand bizzare nuanced slang', 'label': True, 'prediction': False, 'precision': 0}\n", 339 | "\n", 340 | " \u001B[0m\u001B[37m@pytest\u001B[39;49;00m.mark.eval(name=\u001B[33m\"\u001B[39;49;00m\u001B[33mcomputer_classifier\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 341 | " \u001B[37m@pytest\u001B[39;49;00m.mark.parametrize(\u001B[33m\"\u001B[39;49;00m\u001B[33mcase\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, TEST_DATA)\u001B[90m\u001B[39;49;00m\n", 342 | " \u001B[37m@pytest\u001B[39;49;00m.mark.parametrize(\u001B[33m\"\u001B[39;49;00m\u001B[33mmodel\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, [\u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-4o\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, \u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-4o-mini\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m, \u001B[33m\"\u001B[39;49;00m\u001B[33mgpt-3.5-turbo\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m])\u001B[90m\u001B[39;49;00m\n", 343 | " \u001B[94mdef\u001B[39;49;00m\u001B[90m \u001B[39;49;00m\u001B[92mtest_classifier\u001B[39;49;00m(case: \u001B[96mdict\u001B[39;49;00m, model, eval_bag):\u001B[90m\u001B[39;49;00m\n", 344 | " \u001B[90m \u001B[39;49;00m\u001B[33m\"\"\"Test individual classification cases across different models.\u001B[39;49;00m\n", 345 | " \u001B[33m\u001B[39;49;00m\n", 346 | " \u001B[33m Args:\u001B[39;49;00m\n", 347 | " \u001B[33m case (dict): Test case with text and expected label\u001B[39;49;00m\n", 348 | " \u001B[33m eval_bag: Container for test results\u001B[39;49;00m\n", 349 | " \u001B[33m classifier: Classification function\u001B[39;49;00m\n", 350 | " \u001B[33m model: Model identifier\u001B[39;49;00m\n", 351 | " \u001B[33m \"\"\"\u001B[39;49;00m\u001B[90m\u001B[39;49;00m\n", 352 | " eval_bag.input_text = case[\u001B[33m\"\u001B[39;49;00m\u001B[33mtext\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m]\u001B[90m\u001B[39;49;00m\n", 353 | " eval_bag.label = case[\u001B[33m\"\u001B[39;49;00m\u001B[33mlabel\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m]\u001B[90m\u001B[39;49;00m\n", 354 | " eval_bag.prediction = classify(case[\u001B[33m\"\u001B[39;49;00m\u001B[33mtext\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m], model)\u001B[90m\u001B[39;49;00m\n", 355 | " eval_bag.precision = \u001B[94m1\u001B[39;49;00m \u001B[94mif\u001B[39;49;00m eval_bag.prediction == eval_bag.label \u001B[94melse\u001B[39;49;00m \u001B[94m0\u001B[39;49;00m\u001B[90m\u001B[39;49;00m\n", 356 | " \u001B[90m\u001B[39;49;00m\n", 357 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mModel: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00mmodel\u001B[33m}\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 358 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mInput: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.input_text\u001B[33m}\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 359 | " \u001B[96mprint\u001B[39;49;00m(\u001B[33mf\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m\u001B[33mExpected: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.label\u001B[33m}\u001B[39;49;00m\u001B[33m, Predicted: \u001B[39;49;00m\u001B[33m{\u001B[39;49;00meval_bag.prediction\u001B[33m}\u001B[39;49;00m\u001B[33m\\n\u001B[39;49;00m\u001B[33m\"\u001B[39;49;00m)\u001B[90m\u001B[39;49;00m\n", 360 | " \u001B[90m\u001B[39;49;00m\n", 361 | "> \u001B[94massert\u001B[39;49;00m eval_bag.prediction == eval_bag.label\u001B[90m\u001B[39;49;00m\n", 362 | "\u001B[1m\u001B[31mE AssertionError: assert False == True\u001B[0m\n", 363 | "\u001B[1m\u001B[31mE + where False = ResultsBag:\\n{'input_text': 'the new llama can understand bizzare nuanced slang', 'label': True, 'prediction': False, 'precision': 0}.prediction\u001B[0m\n", 364 | "\u001B[1m\u001B[31mE + and True = ResultsBag:\\n{'input_text': 'the new llama can understand bizzare nuanced slang', 'label': True, 'prediction': False, 'precision': 0}.label\u001B[0m\n", 365 | "\n", 366 | "\u001B[1m\u001B[31m/var/folders/cn/zpwdtbhd7ylgt032s2t3tpdw0000gn/T/ipykernel_2511/2061077205.py\u001B[0m:25: AssertionError\n", 367 | "\u001B[36m\u001B[1m===================================== short test summary info ======================================\u001B[0m\n", 368 | "\u001B[31mFAILED\u001B[0m t_4373bcce5bc844c49ab1aee5ce828d18.py::\u001B[1mtest_classifier[gpt-4o-mini-case5]\u001B[0m - AssertionError: assert False == True\n", 369 | "\u001B[31mFAILED\u001B[0m t_4373bcce5bc844c49ab1aee5ce828d18.py::\u001B[1mtest_classifier[gpt-3.5-turbo-case2]\u001B[0m - AssertionError: assert False == True\n", 370 | "\u001B[31mFAILED\u001B[0m t_4373bcce5bc844c49ab1aee5ce828d18.py::\u001B[1mtest_classifier[gpt-3.5-turbo-case5]\u001B[0m - AssertionError: assert False == True\n", 371 | "\u001B[31m=================================== \u001B[31m\u001B[1m3 failed\u001B[0m, \u001B[32m15 passed\u001B[0m\u001B[31m in 9.70s\u001B[0m\u001B[31m ===================================\u001B[0m\n" 372 | ] 373 | } 374 | ], 375 | "execution_count": 4 376 | }, 377 | { 378 | "metadata": { 379 | "ExecuteTime": { 380 | "end_time": "2025-01-21T08:34:02.276647Z", 381 | "start_time": "2025-01-21T08:34:02.250710Z" 382 | } 383 | }, 384 | "cell_type": "code", 385 | "source": [ 386 | "%%ipytest_evals --run-eval-analysis\n", 387 | "@pytest.mark.eval_analysis(name=\"computer_classifier\")\n", 388 | "def test_analysis(eval_results):\n", 389 | " \"\"\"Analyze results across all models and compute detailed metrics.\n", 390 | "\n", 391 | " Args:\n", 392 | " eval_results: Collection of all test results\n", 393 | " \"\"\"\n", 394 | " # Group results by model\n", 395 | " res = defaultdict(list)\n", 396 | " for r in eval_results:\n", 397 | " res[r.test_params['model']].append(r)\n", 398 | "\n", 399 | " global experiments_df\n", 400 | "\n", 401 | " # Calculate metrics for each model\n", 402 | " for model, results in res.items():\n", 403 | " tp = sum(1 for r in results if r.result.prediction == r.result.label and r.result.label)\n", 404 | " fp = sum(1 for r in results if r.result.prediction != r.result.label and not r.result.label)\n", 405 | " tn = sum(1 for r in results if r.result.prediction == r.result.label and not r.result.label)\n", 406 | " fn = sum(1 for r in results if r.result.prediction != r.result.label and r.result.label)\n", 407 | "\n", 408 | " # Calculate metrics\n", 409 | " precision = tp / (tp + fp) if (tp + fp) > 0 else 0\n", 410 | " recall = tp / (tp + fn) if (tp + fn) > 0 else 0\n", 411 | " f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0\n", 412 | " accuracy = (tp + tn) / len(results)\n", 413 | "\n", 414 | " experiments_df = pd.concat([experiments_df, pd.DataFrame([{\n", 415 | " 'timestamp': pd.Timestamp.now(),\n", 416 | " 'model': model,\n", 417 | " 'accuracy': accuracy,\n", 418 | " 'precision': precision,\n", 419 | " 'recall': recall,\n", 420 | " 'f1_score': f1,\n", 421 | " 'true_positives': tp,\n", 422 | " 'false_positives': fp,\n", 423 | " 'true_negatives': tn,\n", 424 | " 'false_negatives': fn,\n", 425 | " 'total_samples': len(results),\n", 426 | " }])], ignore_index=True)\n", 427 | "\n", 428 | " assert any(model_results['accuracy'].iloc[-1] >= 0.7\n", 429 | " for _, model_results in experiments_df.groupby('model'))" 430 | ], 431 | "id": "ca31a4d09acc7dcd", 432 | "outputs": [ 433 | { 434 | "name": "stdout", 435 | "output_type": "stream", 436 | "text": [ 437 | "\n", 438 | "t_4373bcce5bc844c49ab1aee5ce828d18.py::test_analysis \u001B[32mPASSED\u001B[0m\n", 439 | "\n", 440 | "\u001B[32m======================================== \u001B[32m\u001B[1m1 passed\u001B[0m\u001B[32m in 0.01s\u001B[0m\u001B[32m =========================================\u001B[0m\n" 441 | ] 442 | } 443 | ], 444 | "execution_count": 5 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "id": "b1d45def36cb41b8", 449 | "metadata": {}, 450 | "source": [ 451 | "## Analyze Results\n", 452 | "\n", 453 | "Examine performance metrics and visualize trends across models and runs." 454 | ] 455 | }, 456 | { 457 | "metadata": { 458 | "ExecuteTime": { 459 | "end_time": "2025-01-21T08:34:08.110500Z", 460 | "start_time": "2025-01-21T08:34:07.915693Z" 461 | } 462 | }, 463 | "cell_type": "code", 464 | "source": [ 465 | "import matplotlib.pyplot as plt\n", 466 | "import seaborn as sns\n", 467 | "\n", 468 | "# Set plotting style\n", 469 | "sns.set_palette(\"husl\")\n", 470 | "\n", 471 | "\n", 472 | "def plot_performance_comparison():\n", 473 | " \"\"\"Create comprehensive performance visualization comparing models.\"\"\"\n", 474 | " if len(experiments_df) > 1:\n", 475 | " fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))\n", 476 | "\n", 477 | " # Plot accuracy trends\n", 478 | " sns.lineplot(\n", 479 | " data=experiments_df,\n", 480 | " x=\"timestamp\",\n", 481 | " y=\"accuracy\",\n", 482 | " hue=\"model\",\n", 483 | " marker=\"o\",\n", 484 | " ax=ax1,\n", 485 | " )\n", 486 | " ax1.set_title(\"Model Accuracy Over Time\")\n", 487 | " ax1.axhline(\n", 488 | " y=0.7, color=\"r\", linestyle=\"--\", alpha=0.5, label=\"Minimum Threshold\"\n", 489 | " )\n", 490 | " ax1.set_ylim(0.5, 1.0)\n", 491 | "\n", 492 | " # Plot error metrics\n", 493 | " error_data = experiments_df.melt(\n", 494 | " id_vars=[\"model\"],\n", 495 | " value_vars=[\"false_positives\", \"false_negatives\"],\n", 496 | " var_name=\"metric\",\n", 497 | " value_name=\"count\",\n", 498 | " )\n", 499 | " sns.barplot(data=error_data, x=\"model\", y=\"count\", hue=\"metric\", ax=ax2)\n", 500 | " ax2.set_title(\"Error Analysis by Model\")\n", 501 | "\n", 502 | " plt.tight_layout()\n", 503 | " plt.show()\n", 504 | "\n", 505 | " # Show summary statistics\n", 506 | " print(\"\\nPerformance Statistics by Model:\")\n", 507 | " display(\n", 508 | " experiments_df.groupby(\"model\")[\n", 509 | " [\"accuracy\", \"true_positives\", \"false_positives\", \"false_negatives\"]\n", 510 | " ].describe()\n", 511 | " )\n", 512 | "\n", 513 | "\n", 514 | "plot_performance_comparison()" 515 | ], 516 | "id": "d4cc96b376881d9a", 517 | "outputs": [ 518 | { 519 | "data": { 520 | "text/plain": [ 521 | "
" 522 | ], 523 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAPdCAYAAABba9tpAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAt35JREFUeJzs3Qd0VFUXhuGdToAEQkKVLlKkF8ESVKp0EBXpRRBRiiBKEwREugIKKr1J701ARbEAigiCgnSkSW9SUknmX/vwz5hKEgg3meR91hozc+/JbXMyJh/n7Otis9lsAgAAAAAAAFjI1cqdAQAAAAAAAIpQCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAJEqbNm2kWLFi0rx583jb9OrVy7Tp16/ffe9v+/btZlv69UF8z/jx403bYcOG3eeRpk0HDx6UPn36yNNPPy2lSpWSZ599Vt566y3Zs2dPih6XvmcJPVasWGH6qz4AAEDq5Z7SBwAAAJyHq6ur7N69W86dOye5cuWKti4oKEg2b94sziAyMlJWrVolRYsWldWrV8vbb78t3t7eKX1YqYZek3fffVceffRREzQ+9NBD5j1ftmyZtGjRQt555x3p0KFDihzb4sWLo71++eWX5cUXX5SXXnrJsSx//vxSpkyZFDg6AACQFIRSAAAg0TSkOHLkiGzcuFHat28fbZ0GUhrs+Pr6Smq3ZcsWE7KMGzdOWrduLevWrYsWaqRnf/31lwmkGjdubEaRaRBp16hRIxk+fLiMHj3ajEh68sknLT++cuXKxVqmAWnM5dmyZbPwqAAAwL1g+h4AAEi0jBkzyjPPPGNCqZjWr18vzz33nLi7R/83r9DQUPn000+lTp06Urp0aaldu7ZMnTrVjFaKatGiReb7dYSLBkVnzpyJtQ9dplPIKleuLGXLlpV27dqZECWpli9fbkZJVaxYUapUqRJr9I3dDz/8YKYrauARGBgo7733nly/ft2x/tixY9KtWzdzPI899pi89tprcvTo0btOJYw5rax69eoyYsQIcy567hoIqQMHDphtP/7441KyZEmpWrWqfPDBBxISEuL43rCwMJkwYYLUqFHDfG+DBg1k5cqVZt38+fPN/v/+++9Yo6BKlCghZ8+ejfOcJ0+ebN7ngQMHRguk7HSUVO7cuc17qgYNGiRPPfWURERERGun4ZVe2/DwcPP60KFD5vpUqFDBPLp27SqnTp1ytLdfL+0H1apVM222bt0q9yrmddZtL1y40Ewt1fdd3zP79dSQTa+zHq9ef+2zdtpPtb/WqlXLTGPUPvrFF1/c83EBAID/EEoBAIAkqVevnmMKn93Nmzflxx9/NKFIVDabTbp06SLTp083I5E08NBwSoOUwYMHO9rNmzfPvNbA67PPPjOBk4YdUV25csUERPv27TPrPvroIxMYtGrVyhEEJca1a9fku+++kyZNmpjXzz//vPz5559muzFHfmmI4u/vb45Xp/ht2rTJTGdT58+fN1PHjh8/LkOGDJGxY8fKpUuXTLik+0gKDZA0sNNz16loFy5cMOcVHBwso0aNkmnTpkn9+vVNGDJ37lzH9+kxzZo1y1zbKVOmmOBMQxcd+dWwYUPx8vIyIVRUOm3xiSeeMMFSTHo9NQjS9fFNZ/T09JSaNWvKzp075erVq2ZElZ531PBNt7NhwwZzzB4eHiYY0/fu8uXLJgDSwEoDKZ0KqMuimjRpkvTt29cEgOXLl5fkpO+RHr/uQ99/vZ76VQO6Dz/80IRYOkUxauik7+0nn3xiRonZ+6+GiPZQDgAA3Dum7wEAgCTRgtcaWESdwvfNN9+Y8EZHoESlQdW2bdvMNDkNKJSOqsmQIYN8/PHH0rZtWylSpIgJYzTsGjBggGmj4YoGXTpqxm7OnDkm7NHRLlrjSGkRbv0+3ZYGB4mxdu1aE5pomKJ05Nb7779v9hW16PnEiRPNiCINMFxcXMwyDTR0XxrCzJ4924xU0lAoe/bsZn3x4sVN0KLFwPUcEytPnjwmYIo6vVD3rfvKnDmzWaZT5TQw0vCnc+fOZuTRV199Za6ZBmFKw6R//vnHtNGAUEf3rFmzRt58801zDhok/vLLLyaciYteX73u9usbnwIFCpjAUcMcfc+1vQZh9ul8uv+LFy86rrFeQ+0zes3s56PHquGWBpYaQtm1bNnSBD8PgvY1fa+VjpRaunSpGcmlgZSO8NN+p9d0165dpo2GaUuWLDGj8/SaK22j11JDQD1WPz+/B3KsAACkB4yUAgAASaJhi045izqF78svv5S6des6whu7X3/91fyxHzNk0FEn9vU6BU5Hy+iUrah0e1H9/PPPJqjJmTOn3L592zx0epkGUxp8JWXqnk7T0oBJp+JpKKHno6GKBjJKp3TptEANTaKekwZgGloEBASYkUI6rc8eSNlrG+kIKx3xlRR6XlFp8KGjx3Skk9bw+vbbb+Xzzz83o8U0CFO6f3uoFpWGafZwTUddaUj122+/OUZJZcqUyYRVd6Ojm+7Gzc3NfNVgSq+Pvp86isx+bNofChYsaEa8KQ3CNATSvmN/7zScqlSpUqz3Lua1SE5RR17pOWigpFMjo045zZo1q9y4ccNx3HqO2j/sx60Pfa1T/OzvAQAAuDeMlAIAAEmmgZHWO9KRNxqcaGDUs2fPWO3+/fdf84e/PcSwswc5+se/tlExR5xEDXvso3hOnDhhQoS46FS3hGjQtH//fvNca0DFpKOKdPSLHpOGETr6Kz56PHnz5pXkoDWcotKRXDq6TKf16V0Ndaqd1ozSax11/+pux6h1kvQYNYzS89WvGqxF3U5U+h7osZw+ffqux2uvBWWfAqgjojQ0++mnn0ztq6+//toxest+rFpzTB8xxSxIHvNaJCf7KK3E7s9+je2j/GLSKZwAAODeEUoBAIAk09FJOuJGR0vpH/UafGgR6JiyZMli6g5pEeyowZTWTLKHIPYwKmZtoZh1mXx8fMxomz59+sR5TDryKSErVqwwx6vTBWMW8dYaRlrwXEMpDS90BJCOTIpKR8fo6BkdAaTHE3O90oBOr4d9hFXMgu63bt0y1+5utLC2TnUbOnSoGQml+7KPfLKz3+VQj0FHaNlpfS29djqtTo9Ba2ZpjSSdVqjT0bSmU3y0vY5Y03ApvuPU91JHRWkhcnugVKhQIROaaR0pva46As0+Gk7p8evUvg4dOsTaXszC+KmJ/Rrr1NG4roVOuwQAAPeO6XsAACDJ7MWudSqbvaB1XDRE0ulOMe/WpyOSlAYnOs1LR9zEbKPT4GJuS0MVDUC0KLj9oYW8tTh1zNFYMenUMq0npVOvtJ6RTuGL+tCC13rHOy3irgGETiOLeQxaI0trC2moplPPtHZU1GBKg7VOnTqZu/bZR+VELQivI7ASU5Rdp4Vp/aMXXnjBEUjpqBytI2UPuez1u7Roe1RaH0kLids1bdrUhEQaRj388MOOKXXx0eLuOupMQ7qYd9RTOoJLR6xpAfuodLSUhlk6dU8Dq3z58kV773Qaol5T+/umIaYGb1qPLLXS91hpsBq1z+l7rvW+klrQHgAARJd6/2kKAACkajoNTAMMHRkzcODAeEdUaeCj6zVU0ULgWkdK7yanI3g0eFFa5Lt3796mndaf0mBIC5pHpUXVNYDSr6+88ooZYaXTwbQQdf/+/RM8Xh3doyFCzDsERg1VNGjQgudaK6pHjx7y+uuvmyLXGlhpcXMNZDSMK1q0qDkOnQ6nIZReB63DpFPYdNSS3vlOQykN2/QubfaRV1ocO7672kWlo450NJeOmNJj0RBIv1eDNfs0Rb2Weq20aLnWwNLAR0MzDdK0sHjU0Tw6SkmLp0ctph6fYsWKmTv+6TXV0VU6ckxHfmkQpyPNtNi6bidm3SztD/p9+p5EvbOieuONN8zd9/Q66TZ1+qCOStP3JLEF6lOCXgsd8aV3e9TaXBqkaTA6fvx4c000UAUAAPeOUAoAANwTDTp0epMGLzoCJy72IEaDBx0VoyNM9I95DXqiTuXSoEjDLQ1iNHjS0Efvkqbt7LTAuQZGH330kQwZMsRMpdNQQEcFRZ3WFh8NVHQ6oRYRj4uGN1p3SUd+aSCj09gmT55sAp6uXbuaqWoaNnXv3t201/NesGCBCYX69etnRo9pAKeBhe5H6XmPGDHCnIcWR9c6S1rYXYONu9HwRkfnzJ0714Raui8NzezXU0c+6bXXfevx6fQyba/vg+5Tg7OYd0zUaYX2u+ElREe+aSCj75luT++kp+evI4c0LNSgLCZdr9dWQ6uYhe01QNP6WHptdPql1uvS91jPrUaNGpKajRw50lxz7Xs66k1reGkApzXUEhqdBwAA7s7Fpr8VAAAAIM3S0Vw6OklDIAAAgNSCkVIAAABplIZQOipLp+7pqC4AAIDUhFAKAAAgjdIi6CdPnjRT5rT4OAAAQGriFHff06KeWmti+/bt8bb566+/5KWXXjJ3lNE71ezdu9fSYwQAAEhtli9fLjt27DCF4QEAAFKbVB9KaRFTLQ56+PDheNsEBQWZ2zNr8U0tYlq+fHlTIFSXAwAAAAAAIPVJ1aHUkSNHpFmzZmbY+d3orYe1eKcOTde7zrz77ruSKVMm2bhxo2XHCgAAAAAAgDQSSv3666/m1sqLFy++a7s9e/ZIxYoVzW2SlX7Vugm7d++26EgBAAAAAACQZgqdt2zZMlHtLl68KEWKFIm2zN/f/65T/gAAAAAAAJByUnUolVjBwcHi6ekZbZm+1gLpSXHlyg2x2cSp6KgwP19vCR0xVSQ4NHYDby/xGtBZrl4PFpsTnZwOesuWzccp3xOA/gtnRv+Fs6LvwpnRf+Gs6LtIqG+ki1BK60nFDKD0dYYMGZK0nchIccIfJJuEhYSJW+XSEvH1tlhr3Z6pZNZHRDjXif1/JqaTvidI7+i/cGb0Xzgr+i6cGf0Xzoq+i4T6RroIpXLmzCmXLl2Ktkxf58iRQ9KDW2GRkrV6FfM84qedd0ZMeXuJW9WK4la9ity4mbQRYwAAAAAAAA9amgilypYtK9OmTTPT03Q6m37dtWuXdOnSRdKDiIhIuXYzTDIFVhSvmk+ILThEXLwzmBFSGkjpegAAAAAAgNQkVd99L6Hi5iEhIeZ5nTp15Pr16zJ8+HA5cuSI+ap1purWrSvphQZP14Nvy+VrwXItXMxXfU0gBQAAAAAAUiOnHSkVGBgoI0eOlKZNm0rmzJllypQpMnjwYFmyZIkUK1ZMpk6dKhkzZpT0RkeJOVv9KAAAAACAc4qMjJTw8DBqSqUzbm7u4up6/+OcXGzOdEu2B+zSJe4YkJqKogUE+PCewCnRf+HM6L9wVvRdODP6L5xVRES4XLt2QcLDb6f0oSAFeHtnFl/fbKaMUnyfa2l2pBQAAAAAAEgZOr7l2rUrYrO5SLZsOcXFxWmrA+Ee3vuwsFC5efOqeZ0li7/cK0IpAAAAAACQJJGRERIeHiLZsmUXT88MKX04sJinp5f5qsGUj4/fPU/lI8oEAAAAAABJriWl3N09UvpQkMLBVETEvU/fJJQCAAAAAABAksRVSyqpCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAIBns2vWbBAZWSlTb9evXyosvNpT0jFAKAAAAAAAAliOUAgAAAAAAgOUIpQAAAAAAQLpx9uwZM8Vu27YtZvpcrVpVZcKED+XYsSPSsWMbqVkzUPr06SlBQbcc0+xatXpRqld/yqzfvXuXY1u3bt2UwYMHSK1aT0vz5k3lwIG/ou3r/Plz0rdvL6lR4ymzr5kzp0pERITl55xauaf0AQAAAAAAAFht3rzZMmrUOPn776MydOhA+eWXrdK7d1/x8sog/fr1lrVrV4mPj6+MHz9G3nqrr5QsWUq+/HKtvPPOm7JgwXLJnj2HjB07Uk6ePC6TJk2Va9euyvDhQxzbt9ls8u67faRIkUdk1qz5cunSJRk7doS4urpK+/adUvTcUwtGSgEAAAAAgHRHgyENjGrVqiN+ftmkZs3n5LHHHpcyZcpJpUqV5cSJ47Js2SJ58cXmUrduA8mfv6C8/np3KVy4iCxfvkRu3rwpmzdvkp4935FixYpLlSpPRAubdu7cIefOnZU+fd4131uhQiXp2rWnLFmyMEXPOzVhpBQAAAAAAEh38uR5yPHcy8tLcuXKHe11eHi4HD9+XDp0eDXa95UqVVpOnPhbTp06YabiPfJIUce6EiUedTzXNtev/yvPPfeMY1lkZKSEhobKv/9ee4Bn5jwIpQAAAAAAQLrj5uYW7bVOq4vJ09Mz1rKIiEjziDpNz87d3SNKuwgzQmrUqI9ibSNTpsz3dexpBdP3AAAAAAAA4pA/fwHZt29vtGX79v1pluvD3d1d9u//r7j54cMHHc/z5StgCp1nzeonefPmM4+zZ/+RGTOmiIuLi6XnkVoRSgEAAAAAAMTh5ZdbyfLli2Xjxi/l5MkT8vnnE+Xo0cPSsGETM9qpTp36MmHCWBNc7dr1m7m7nl3lyo9Lrly55P33B8nRo0dkz57fZcyYEZIhQ4ZYo7TSK6bvAQAAAAAAxKFGjVpy5cplmT59svlapEhRGTdukhQoUNCs79XrHRk/fqz06tVVfHx8TFH0Tz+dYNZp8KR399PQqnPnduLtnVGqVasp3bq9mcJnlXq42KJOfkznLl26IVyN1EFHMgYE+PCewCnRf+HM6L9wVvRdODP6L5xReHiYXL58VnLkyCOurv/VUUL66wP+/rnFw8Mzzs+1hDB9DwAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAACnGxcVF3NxczdfULCjolmzYsC5RbW/fvi3t27eUGTOmPPDjcmaEUgAAAAAAwHIaRPl6u4t/Vm/J6iHmq77W5anRokXz5csv1ySq7cKF8+TIkUMP/JicnXtKHwAAAAAAAEhfNHjKmtlTIr7dLqFbdooEh4p4e4lb1YqStXoVuXYzTCIiIiU1sdlsiWp3+vQpWbZskRQsWPiBH5OzI5QCAAAAAAD3T0Ob8NuJaprJN4MJpCK+2fbfwuBQifj6zutMT1aQ68HhCW/Iw13n/yXpMP/557SMGTNC9u7dIw89lFfq1GkgK1YskVde6Sxr166SMmXKyYoVS8XHx0c6dHhVGjZsIuvXr5VZs6aZ7w8MrCRbtvwW7/bHjh1htvXNNxtjrdPtzJ8/R86ePSuFChWW7t17SblyFSS9IpQCAAAAAAD3x2aTjF+sErd/ziXcNpO3eA587c4IqThE/LRTvKpVFp+x80VuBd91U7fz5pLg1k0SHUxprae+fXtJwYKFZPr0L+Tw4UMmRMqSJYtZv3//PvH2zihTpsyUv/7aJx9+OFJy5swlNWrUkmPHjsrevX/I8OFj4t2+Tu8LCwuVRo2ejxVKaSA1fvwYeeutvlKyZCn58su18s47b8qCBcsle/Yckh4RSgEAAAAAgPtmS+SAJRffTGK7GXRnyl5cdPmt4DvtEgilkmrXrt/k/PnzMmXKLMmUKbMZrXTs2BHZtOkrs97V1VUGDRoqfn7ZpHDhIrJ79y5Zs2aFVK78uHh7e4u7u7v4+wfEue2rV6/IlCmfyoQJn8ZZtF2n9L34YnOpW7eBef36693N9pcvXyJdunST9IhQCgAAAAAA3B8XlzsjlhIxfc/F1UX8s2Q2NaTiDKZ0uW8mudH6ebFF2pJ1+t6RI4clX778JpCyK1WqtCOU0ul8GkjZFS9eQlatWhFrO3v2/C5vv93D8bpNmw5y9OgRqVevoQmz4nL8+HEzHTCqUqVKy4kTf0t6RSgFAAAAAADun4ZDnh4JNtOYKSwk3BQ1t9eQikqX63qbe/JHFu7ubv8/grgLmOtIqKgiIyPF1TV26KVh1axZCxyvfX19pV69GuLl5SXLly82y0JDQ810v82bv5V585aIp6dnrO1ERESmuoLuViKUAgAAAAAAlroVFmnusmevIRX17ntu1avIjZthD2S/Ol3v1KlTEhR0SzJmzGSWHTx4wLH+9OnTEhQUJBkzZjSvDxzYLw8//Ih5HnVKnpdXBsmbN1+0bS9atDLa66FDB5raUc2btzav8+cvIPv27ZWqVZ91tNm3708pW7a8pFeEUgAAAAAAwFI6OujazTDJFFhRvGo+IbbgEHHxziBhIWEmkHpQo4cqVqwsOXPmlNGjP5AOHTrL338flaVLF4qv751C58HBQaa4ebt2Hc0Uvc2bN8nHH39u1mXI4C2XLl2Ss2fPSO7ceWJtO2ZIpaOmfHx8JVeu3Ob1yy+3klGj3jdF1h99VAudr5GjRw/LwIFDJb0ilAIAAAAAAJbT4Ol6cKS4hESYKXKRocHRptI9CFrIfPjwsf8PpVpK/vwFpV69RvLLL1vN+hw5cppC5p06tTFfBw0aJmXKlDPrnnmmmqxevVxat35Jli1bG632VGLoHfyuXLks06dPNl+LFCkq48ZNkgIFCkp65WJ70O+4E7l06YbexRKpgI6KDAjw4T2BU6L/wpnRf+Gs6LtwZvRfOKPw8DC5fPms5MiRR1xdE64jlVroHfIOHTooVao84Vi2YMFc2bZtiylSPnPmVBM4IfF9wN8/t3h4eMb5uZYQ10TsBwAAAAAAIE3o1+8tWblymZw7d1Z27NguS5YslGrVaqb0YaVLhFIAAAAAACBd0Cl3778/UlatWiYtW74go0YNkxdeaCZNm76U0oeWLlFTCgAAAAAApBt697uod8Cz0+l7+oB1GCkFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAIMW4uLiIm5ur+ZqaBQXdkg0b1iWq7e3bt6V9+5YyY8YUsUq3bp0Tvb8XX2wo69evlZTmntIHAAAAAAAA0h8Norwz2MTLy0vCQ2+Ih5ePhIaGSnCIi0REREpqs2jRfNm16zepW7dBgm0XLpwnR44ckqpVnxGrjBgxVtzdPRLVdtq0uZIxo7ekNEIpAAAAAABgeSDl6+MuJ3bPklN7F8ntsBvi7ukj+Uo1lwLlOsj1G7dTXTBls9kS1e706VOybNkiKViwsFjJ1zdLotv6+flJakAoBQAAAAAAkiW0iYwISVTbTN5ecmL3F/L3rmmOZRpM2V/nLtFCrt8ITXA7rm4Zkjzt759/TsuYMSNk79498tBDeaVOnQayYsUSeeWVzrJ27SopU6acrFixVHx8fKRDh1elYcMmZqrbrFl3ji0wsJJs2fJbvNsfO3aE2dY332yMtU63M3/+HDl79qwUKlRYunfvJeXKVYhzOzoqa8SIodK+fSeZOvUzCQsLkzZt2kvJkqVlzJjhcvHiRXn66Wfl3XeHiKurq5m+V758RenY8TUZPnyI+Pr6mjZbt/4oWbJklc6d35A6deo7pu/pMdar11BSEqEUAAAAAAC470DqyA/dJejK3gTbemTIKoEt15kRUnHR5QXLtZODy5tKeMi1u24ro38pKfL0xEQHU1rrqW/fXlKwYCGZPv0LOXz4kAmRsmS5M8po//594u2dUaZMmSl//bVPPvxwpOTMmUtq1Kglx44dlb17/5Dhw8fEu/0vv1wjYWGh0qjR87FCKQ2kxo8fI2+91VdKliwlX365Vt55501ZsGC5ZM+eI87tXbp0UX788XuZNGmKbNnyk0yePFGKFHlEBgwYIv/+e00GDuwjTz9dTZ55plqs712+fIm8+urr8tprXWXZssXmPAMDn5HMmTNLakGhcwAAAAAAcP8SOWDJM2OAhAVfMSOj4qLLw0KumnbJTUcfnT9/Xvr3f8+MVKpdu4688EIzx3odcTRo0FApXLiINGjQWGrWfE7WrFkhXl4ZxNvbW9zd3cXfP+7junr1ikyZ8qm8886AOEMyndL34ovNTU2q/PkLyuuvdzf70fDobiFat249TfsXXnhJIiMjpWnTZlKqVGl56qmqUqRIUTl58nic36vrWrVqZ0aDder0mqnX9fffRyU1YaQUAAAAAAC4LxrC6IilxEzfc3F1Ec9MWUwNqbiCKV2ugVThpyeJLdKWrNP3jhw5LPny5ZdMmf4bLaQBz6ZNX5nnGuD4+WVzrCtevISsWrUi1nb27Pld3n67h+N1mzYd5OjRI2Y6nAZNcTl+/LiZDhhVqVKl5cSJv+PcXqlSZczzPHkeMl81GFO5c+dxtNMi8TqtLy558+ZzPLefr4ZcqQmhFAAAAAAAuG8aDrm5J+6ObmGhYaaoedSaUna6XNe7umZI9vld7u5uOtkw3gLmOhIqKh2Z5OoaO/TSsGrWrAWO11q/qV69GiYkWr58sVmmI5N0ut/mzd/KvHlLxNPTM9Z2IiIizSOu7WmAFtcxJTaE8/DwuOdi7VYhlAIAAAAAAJYKDnExd9lT8d19L2Z4lBx0yt6pU6ckKOiWZMyYySw7ePCAY/3p06clKChIMmbMaF4fOLBfHn74kVhhkI5aijoSSS1atDLa66FDB5raUc2btzav8+cvIPv27ZWqVZ91tNm3708pW7Z8nNtLDwilAAAAAACApXR0kAZPeUq2lkIVOkp46A3x8PKR0JBQs1zXPwgVK1aWnDlzyujRH0iHDp1NjaWlSxeKr++dQufBwUGmuHm7dh3NlLrNmzfJxx9/btZlyOAtly5dkrNnz0SbQmcXM1TSUVM+Pr6SK1du8/rll1vJqFHvmyLrjz6qhc7XyNGjh2XgwKGSXhFKAQAAAAAAy2nwdPOWyK2gUHF19ZLIoND/Ty97cFPMtJD58OFj/x9KtTQFxOvVayS//LLVrM+RI6cpZN6pUxvzddCgYVKmTDmzTu9wt3r1cmnd+iVZtmxttNpTiaF38Lty5bJMnz7ZfC1SpKiMGzdJChQoKOmViy21TShMQZcu3RCuRuqgoyIDAnx4T+CU6L9wZvRfOCv6LpwZ/RfOKDw8TC5fPis5cuQRV9fYtYtSK71D3qFDB6VKlSccyxYsmCvbtm0xRcpnzpxqAickvg/4++cWDw/POD/XEpLMJcMAAAAAAABSr3793pKVK5fJuXNnZceO7bJkyUKpVq1mSh9WukQoBQAAAAAA0gWdcvf++yNl1apl0rLlCzJq1DB54YVm0rTpSyl9aOkSNaUAAAAAAEC6oXe/i3oHPDudvqcPWIeRUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAABSjIuLi7i5uZqvqVlQ0C3ZsGHdXdscOPCXdOnyitSo8ZS0aNE0wfbt2rWQwMBK0R7Hjh2Jt/13322Sq1ev3NPxnz17xmxfv6YW7il9AAAAAAAAIP3RIMrD2yYZPL3kVvgN8fHwkZDQUAkPcZGIiEhJbRYtmi+7dv0mdes2iHP9zZs35e23e0jdug1l0KD3Zd++P2XEiKHy0EN5pUyZcrHaR0REyKlTJ2XSpKmSL19+x/IsWbLGuf1z587Ke+/1k6VL10hakapHSoWGhsqAAQOkUiVNCwNl5syZ8bbdsmWLNGrUSMqXLy/t27eXY8eOWXqsAAAAAAAg8YFUJl93WXdinnT4uqbjse7kPLNc16c2NpvtrusvXDgnVao8KW+80cMEUbVr15XChR+WP//cE2f7s2fPyO3b4VKiREnx9w9wPNzd3e9p/84oVY+UGjNmjOzdu1fmzJkjZ86ckb59+0qePHmkTp060dodPnxYXnvtNencubM0bNhQli1bJu3atZONGzdKpkyZUuz4AQAAAABILzQ0CYsMSVRbn0xesuLIXFlyaJpjmY6WWnJoqnleL38LCboemuB2PF0zJHna3z//nJYxY0bI3r17THhUp04DWbFiibzySmdZu3aVGdW0YsVS8fHxkQ4dXpWGDZvI+vVrZdasO8eqU+C2bPkt1nYLFy5iRkipyMhI2bZti5w8eULKlq0Q53EcP35McuTIKV5eXok67pdeauT4OmDAYBNq/f77TjPSyu7FFxua86hXr6F069ZZHn64iGzbtlUiIm7L2LEfmzabN2+SpUsXya1bt6RmzdrSs+c74unpadbt3fuHfPrpx3L48EHx88smrVq1lSZNXpR0F0oFBQXJ0qVLZdq0aVKyZEnz0PBp/vz5sUKphQsXmhFSb775pnn9zjvvyPfffy9r166V5s2bp9AZAAAAAACQfgKpMfu6ydEbexNs6+uZVabU/FK+/HtRnOu//HuhPF+knfTY/bxcD7t212097FNa+pScmOhg6vbt29K3by8pWLCQTJ/+hRw+fEjGjh0hWbJkMev3798n3t4ZZcqUmfLXX/vkww9HSs6cuaRGjVpy7NhRE9oMHz7mrvsIDw+XWrWqmn01afKClCpVOs52x4//Le7uHtKnT085cGC/5M9fwIyyevTRUnG2nzZtjrz6ajvzVUdgzZs3J8Hz1TBt3LhJ4uHhKRkzZjTL1qxZKUOHjjDTB4cNe0+++GKWdOz4mjmeHj1el5dfbin9+w+Sffv2ykcfjRI/P3955plqkq5CqQMHDpg3UMMmu4oVK8rkyZNN4ujq+t9QvlOnTkmZMmUcr7UzFi1aVHbv3p2kUCqV11RLV+zvBe8JnBH9F86M/gtnRd+FM6P/whnF3V8T14mzegXIv6FXzMiouOjy62FXTbuEQqmk0ppQ58+flylTZkmmTJmlUKHCprD4pk1fmfWaNQwaNNSMEtKRT7t375I1a1ZI5cqPi7e3t5lap1PsEjJlymw5ceK4jBs3WvLmzSfNm7eO1ebkyRNy8+Z1adCgiXTs2EXWrl0pb775hsybt8QEYTFlzern+OrllSFR5/vkk4FSunRZ89xe4LxHj96OGlevvvq6fP75RBNK6f6LFi0mr73W1azLn7+gCaoWLJh711BK+0LM/pDYz7NUG0pdvHhR/Pz8HEPIVEBAgKkzde3aNcmWLVu05dqpojp37pwj6Uwsf3+fZDhyJCfeEzgz+i+cGf0Xzoq+C2dG/4UzCQkJkStX7gwWcXe/83VA2U8TNX1PB5Jky+AjmTx84gymdLmfV4AMKPN5gnWUkjp97++/j0j+/PklSxZfx7IyZcqaUMrV1cUESNmz/xc6PfpoSVm5cpk5R12v+9LnGlb16tXd0a5du1ekffuO5rm7u5eULPmoeVy9ekmWLl0srVu3jXUsAwYMktDQEBOO3dlXCVN/6ptvNkiuXLll9OjhjrZ9+77rCJK03lbM44lKl+syXZcnz0OO9fY6XaVLl3YsK1GihFy5clmCgm6aEK1UqVLRtle2bDlZvXp5rH2oyEgXE+L5+WWSDBkSF5I5TSgVHBwcLZBS9tdhYWHRltetW1feeOMNadCggVStWtVM2/vzzz+lSpUqSdrn5cs3JA3WDXNK+pmi/1PmPYEzov/CmdF/4azou3Bm9F84o/DwMDOLSd2+/d+d8twkEfWRbCIhYWFSv1ALRw2pqHR5SGiYuEZGzwTiEhGhPzSJ/8FxcXE1QVfUY9ZpbCoy0iZubm7R1ukMLg13dJmut3/vI48Ul1mzFjja+fr6ysmTp8zd9KpUecKxXEcb/fvvtWjb/I+reHlljLZO258/f0GaNm0Wbfs6MOfff//9//FGmu/Rz4u4zkWP8856m5keaF9vv6Ohfp99WXj4nXN3cXEzU/zs32sXHn7bsb+4rr32gatXb4mHR3iM65y4oD3VhlJa6Ctm+GR/HTOBe/rpp6Vr167SvXt38wZoGNW4cWNzO8akuPOGJsPBI9nwnsCZ0X/hzOi/cFb0XTgz+i+cyf321fBgF2lapIOjhpSOmNIRUhpI6fJb128nKWxKLJ2upyWAgoJuScaMd26MdvDgAcf606dPmxrX9vpLWuvp4YcfMc+jjsjS6XM6qiqqX3/9xdSgWr16o2N6nW67QIFCcR5L9+6vSfnyFU1hcqUBz9Gjh6Vp05fMsdmPz+769evRXnt4eJhjtdPnV69eSfAaHD16xOzXXkNLi63r1EStaaUjwKLat+8Ps/xBfXalvnss/l/OnDnl6tWrJpWMOqVPAylNIGN6/fXXZdeuXbJlyxaZPXu2qSL/0EMPWXzUAAAAAAAgITr6RoOnBvlby6zamxwPfa3L7aN6klvFipVN3jB69AemXtKdO9EtdAROwcFBJljSqWxaEFzXP//8nbvPZcjgLZcuXXLUZorpySermql4emc/rRf19dcbZf78uWZq351zjpDLly+ZQujqqaeqypIlC2TLlh/k5EmtPzXGDK7RO+fFRfevjhw5ZAKo4sUfNSHWd99tMvsbM2a4uLq6JXgNxo8fY4qY79jxi8yYMVmaNWthlj///Eum8PuUKZ+a7W3YsM7chVBDsgcl1YZSOq9RC4hpsXK7nTt3mrmPUYucq3Xr1snw4cPN9D5/f38zt3X79u1Jnr4HAAAAAACsocFTyC2b/Hs1VCKDvMxXff2gAimlecLw4WPNoJcOHVrK7NkzpF69RiZ/UDpqSAuZd+rUxhT4HjRomKOWkxb7ttkipXXrl+IckaSjq/ROdxpcdezYWqZN+0zefPMtqVr1WbP+woXz0rhxHVM3Sr38citp2bKtjB8/Vtq3bynHjx+TCRM+jTVCyi5r1qzy3HN15b33+su6daukUqXK5k55Gka9/vor5o58WgMrIRo+9ev3lgwa1F+ee66+NGvW0izPlSuXjBkzXrZv3ybt2jWXOXNmSLduvaR+/UbyoLjYEqoaloLee+89M/ppxIgRcuHCBenbt6+MHDlSateubTqQj4+PGTm1d+9eadGihXz00Ufmrntjx46VM2fOyPLly2MFWHdz6RJzuFMLDakDAnx4T+CU6L9wZvRfOCv6LpwZ/RfOWlPq8uWzkiNHHnF19RBnoWHSoUMHo9V90vBp27YtZoTSzJlTZdmytSl6jM7WB/z9c5t6VHF9rjntSCnVv39/KVmypLRr106GDh1qakZpIKUCAwNl/fr15rlWhx8yZIiMGjVKmjZtapZNmTIlSYEUAAAAAABI+3SUkN5R79y5s7Jjx3ZZsmShVKtWM6UPK11K1SOlrMa/TKQe/GsRnBn9F86M/gtnRd+FM6P/whk560gp9dNP38v06ZPNnfL8/LJJkyYvSOvW7U0NJUZKWTtSKtXefQ8AAAAAACC5aY0ne52nqHT6XnxFxvFgML8NAAAAAAAASZIcE+8IpQAAAAAAQJLYazjfvh2e0oeCFBIWFmq+urnd+yQ8pu8BAAAAAIAkcXV1Ew+PDHL9+jXx9XUVFxfGvKQXOkJKA6mbN6+Kt3fm+7rJHKEUAAAAAABIEhcXF8maNZtcu3ZBrlw5n9KHgxSggZSvb7b72gahFAAAAAAASDJ3dw8pWrSonD9/lTtHpjNubu73NULKjlAKAAAAAADcEw0mPDw8CaVwT5j0CQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAko2Li4u4ubmarwAAAHfjfte1AAAAQCJoEOWdwSZeXl4SHnpDPLx8JDQ0VIJDXCQiIjKlDw8AAKRChFIAAAC470DK18ddTuyeJaf2LpLbYTfE3dNH8pVqLgXKdZDrN24TTAEAgFgIpQAAAHBfdISUBlJ/75rmWKbBlP11npKt5eatFDxAAACQKlFTCgAAAPdMa0fplD0dIRUXXa7rqTEFAABiIpQCAADAPXN1dTE1pHRkVFx0ua7XdgAAAFERSgEAAOCeRUbaTFFzrSEVF12u67UdAABAVIRSAAAAuGc2m83cZU+LmsdFl+t6bQcAABAVhc4BAABwX4JDXMxd9lR8d98TIZQCAADREUoBAADgvkRERJrgSe+yV6hCR1NDSqfshYaEmuW6HgAAICZCKQAAANw3DZ5u3hK5FRQqrq5eEhlkn7LHCCkAABA3QikAAAAkGw2iIiIIogAAQMIodA4AAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACyXqkOp0NBQGTBggFSqVEkCAwNl5syZ8bb95ptvpG7dulK+fHlp0aKF7Nu3z9JjBQAAAAAAQBoJpcaMGSN79+6VOXPmyODBg2XSpEmycePGWO0OHz4svXv3ltdee01Wr14tJUqUMM+Dg4NT5LgBAAAAAADgpKFUUFCQLF26VN59910pWbKk1KpVSzp16iTz58+P1Xbr1q1SpEgRadKkieTPn1/eeustuXjxohw5ciRFjh0AAAAAAABOGkodOHBAbt++babj2VWsWFH27NkjkZGR0dpmzZrVBFA7d+4061asWCGZM2c2ARUAAAAAAABSH3dJpXSkk5+fn3h6ejqWBQQEmDpT165dk2zZsjmW16tXT7777jtp2bKluLm5iaurq0yZMkWyZMmSpH26uCTrKeA+2N8L3hM4I/ovnBn9F86KvgtnRv+Fs6LvIj6J7ROpNpTSelBRAyllfx0WFhZt+dWrV02I9d5770nZsmVl4cKF0r9/f1m5cqX4+/snep/+/j7JdPRILrwncGb0Xzgz+i+cFX0Xzoz+C2dF38W9SrWhlJeXV6zwyf46Q4YM0ZZ/+OGHUrRoUWnVqpV5PWzYMHMnvuXLl0vnzp0Tvc/Ll2+IzZYsh49kSFX1g433BM6I/gtnRv+Fs6LvwpnRf+Gs6LtIqG84bSiVM2dOMwJK60q5u985TB0NpYGUr69vtLb79u2TNm3aOF7r9L3ixYvLmTNnkrRP/SHiByl14T2BM6P/wpnRf+Gs6LtwZvRfOCv6LtJcofMSJUqYMGr37t2OZVrIvHTp0iZ0iipHjhxy9OjRaMv+/vtvyZs3r2XHCwAAAAAAgDQQSnl7e0uTJk1kyJAh8scff8imTZtk5syZ0rZtW8eoqZCQEPO8WbNmsmTJElm1apWcOHHCTOfTUVLPP/98Cp8FAAAAAAAAnGr6ntJi5RpKtWvXTjJnzizdu3eX2rVrm3WBgYEycuRIadq0qbn73q1bt8wd986dO2dGWc2ZMydJRc4BAAAAAABgHRebjZmfdpcuUZwtNRVFCwjw4T2BU6L/wpnRf+Gs6LtwZvRfOCv6LhLqG047fQ8AAAAAAABpF6EUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAABw/lDq1KlTyb1JAAAAAAAApDHJHkrVqVNHXnrpJZk9e7acP38+uTcPAAAAAACANCDZQ6mffvpJmjZtKt99953UqFFDWrduLQsWLJArV64k964AAAAAAADgpJI9lMqWLZu0aNFC5s6dKz/88IPUr19ffvzxR6lZs6Z07NhRVq5cKcHBwcm9WwAAAAAAADiRB1ro/OLFi+Zx7tw5iYyMlEyZMsmSJUvk2Wefla+//vpB7hoAAAAAAACpmHtyb3D//v2yceNG8/jnn3/kySeflA4dOpiRUhpKqc8++0wGDRoktWvXTu7dAwAAAAAAID2GUlpPqmLFitK+fXtT9NzPzy9WG13PXfoAAAAAAADSr2QPpTZv3iwBAQHy77//OgKp33//XUqWLCmenp7mdZUqVcwDAAAAAAAA6VOy15S6evWquevejBkzHMvefvttM2rq8OHDyb07AAAAAAAAOKFkD6Xef/99qVWrlvTq1cux7JtvvpHq1aubdQAAAAAAAIDrgyh03q5dO/Hw8PhvJ66u0rZtW9m7d29y7w4AAAAAAABOKNlDqdy5c8vPP/8ca/muXbtMrSkAAAAAAAAg2Qudd+nSRd59911T3LxUqVJm2YEDB2TNmjUyePDg5N4dAAAAAAAAnFCyh1KNGzeWbNmyyZIlS2ThwoXi7u4uBQoUMIXPK1WqlNy7AwAAAAAAgBNK9lBKVa1a1TwAAAAAAAAAS0Kp4OBgWbx4sRw5ckQiIiIcy8PCwuSvv/6SDRs2JPcuAQAAAAAAkN4LnQ8cOFCmTp1qwimtIxUeHm4Cqi+//FLq16+f3LsDAAAAAACAE0r2kVI//vijfPzxx/Lkk0/K4cOHpX379qbg+ahRo8xrAAAAAAAAINlHSoWGhkrBggXN80ceeUT27t1rnr/88svy22+/JffuAAAAAAAA4ISSPZR6+OGHZdu2bY5QaufOneb5jRs3TGAFAAAAAAAAJPv0vW7dusmbb74pkZGR0rhxY1NHqkuXLnLw4EHuyAcAAAAAAIAHE0rVqFHD3GFPQ6ncuXPLggULZPXq1VKhQgVp06ZNcu8OAAAAAAAATijZQ6mmTZvKyJEjpVixYuZ18eLFzQMAAAAAAAB4YDWlLly4IG5ubsm9WQAAAAAAAKQhyT5SqkmTJtKpUydp1KiRPPTQQ+Ll5RVrPQAAAAAAANK3ZA+l1q9fL66urrJu3bpY61xcXAilAAAAAAAAkPyh1HfffZfcmwQAAAAAAEAak+yh1I4dO+66/rHHHkvuXQIAAAAAACC9h1Jt2rSJc7mnp6dkz55dvv322+TeJQAAAAAAANJ7KHXgwIForyMiIuTkyZMybNgwadiwYXLvDgAAAAAAAE7I9UHvwM3NTQoVKiT9+vWTjz/++EHvDgAAAAAAAE7ggYdSdpcvX5br169btTsAAAAAAACkp+l7/fv3j7Xs1q1bsm3bNqlTp05y7w4AAAAAAABOKNlDqbhkzZpV+vbtK40bN7ZidwAAAAAAAEhvodTIkSPl9u3b8u+//4q/v79Z9vvvv0vJkiXNHfgAAAAAAACAZK8ptX//fqlRo4bMmDHDseztt982U/cOHz6c3LsDAAAAAACAE0r2UOr999+XWrVqSa9evRzLvvnmG6levbpZBwAAAAAAADyQkVLt2rUTDw+P/3bi6ipt27aVvXv3JvfuAAAAAAAA4ISSPZTKnTu3/Pzzz7GW79q1SwICApJ7dwAAAAAAAHBCyV7ovEuXLvLuu++a4ualSpUyyw4cOCBr1qyRwYMHJ/fuAAAAAAAA4ISSPZRq3LixZMuWTZYsWSILFy4Ud3d3KVCggCl8XqlSpeTeHQAAAAAAAJxQsodS6tFHH5W33npLChUqZF6vX7/eBFMAAAAAAADAA6kppfWk9O57a9eudSybO3eu1KtXT3bu3MlVBwAAAAAAQPKHUqNHjzZ1pXr06OFYtmjRIunUqZOMGDEiuXcHAAAAAAAAJ5TsodTx48elTp06sZbXrVtXjhw5kty7AwAAAAAAgBNK9lCqcOHCsmHDhljLv/vuO8mfP39y7w4AAAAAAABOKNkLnffs2VPeeOMN2bp1q5QsWdIsO3DggPz2228yadKk5N4dAAAAAAAAnFCyj5R6+umnZdWqVeYOfMeOHZNTp06Z53oHvieffDK5dwcAAAAAAAAnlOwjpS5duiSLFy829aMiIiIkKChI/vjjD3PnvaNHj8qOHTuSe5cAAAAAAABI7yOlBgwYID/99JOULl1adu3aJeXKlRN/f38TTHXv3j25dwcAAAAAAAAnlOwjpXQk1MyZM6V8+fKmrtSzzz4rFStWlKlTp8qPP/4obdu2Te5dAgAAAAAAIL2PlLLZbJIzZ07zvEiRIvLXX3+Z53Xr1pU///wzuXcHAAAAAAAAJ5TsoZQWNV+9erV5XqJECTNaSp0+fTq5dwUAAAAAAAAnlezT93r37i1dunQRb29vady4sUyfPl0aNmwoZ86ckUaNGiX37gAAAAAAAOCEkj2U0vpRmzdvlpCQEPHz85Ply5fLpk2bJGvWrGYKHwAAAAAAAJDsoZTKnDmzeSitL9WqVasHsRsAAAAAAAA4qWSvKQUAAAAAAAAkhFAKAAAAAAAAliOUAgAAAAAAgOUIpQAAAAAAAGA5QikAAAAAAABYjlAKAAAAAAAAlkvVoVRoaKgMGDBAKlWqJIGBgTJz5sw427Vp00aKFSsW69G/f3/LjxkAAAAAAAAJc5dUbMyYMbJ3716ZM2eOnDlzRvr27St58uSROnXqRGs3ceJECQ8Pd7zes2eP9OzZU1q2bJkCRw0AAAAAAACnDaWCgoJk6dKlMm3aNClZsqR5HD58WObPnx8rlMqaNavjeUREhIwfP146deokpUuXToEjBwAAAAAAgNOGUgcOHJDbt29L+fLlHcsqVqwokydPlsjISHF1jXvm4YoVK+Tff/+VV199Nek7DQsXsdliL9d9uUe5VGFh8W/DxUXEw+Pe2obHs/8H2VZ5et5b29u3RSIjk6etHq8et72tLfLOtdOHLYG2id1uRMSdR3K01f5g74Opoa1eA70W8XFzu/NILW21j0UZ3XhfbaP+fD6otgn9LMfVNr7+y2fEvbW91597PiPu7Wc5vv4bV1s+I+7tMyI+fEbcW9uYP/cRfEbwe4STfUZ48BkRqy2/R6Setnf7+dRTirodPiOS3jat/x7hrKHUxYsXxc/PTzyjfNgEBASYOlPXrl2TbNmyxfoem80m06dPl7Zt20qmTJmSvM+Mn3+ihaxiLY8o/LCEvviy47X3px+Ly+24O1hEvgIS2qLVf22nfiYuwUFxt82VW0LbdnC8zjBzqrhe/zfOtpH+ARLSsfN/bb+YJa6XL8Xd1jeLhHTp6njttWieuJ07G2dbm3dGCe7e87+2y5eI26kTcbd195Dgt975r+3q5eJ27KjEJ6jPAMdzzy/XiPuhA/G37fm2438snt9sFPe9f4hk9JKMQbHfj6Cub4r8//312LxJPHbvine7wa+9IbYsd0bSefz0vXjs2B5/2w6vii179jttt28Tj60/xds2pE17icydxzx337lDPH/4Lv62zVtJZP4Cd9r+8bt4bvo6/rYvvCSRDz9inrvt3ydeG9bF2za00fMSUbzEnbaHD4rXmpXxt63bQCJKlzHPXY8flQzLl8bbNqxmbbldodKdtv+ckgyL5sff9pnqcrvK43faXjgnGb6YHW/b8KeqmodyuXRJvGdNi7/tY1UkvFqNO22v/yveUz6Lv225ChJe+/+jJ4OCJOOnH8fb9napMhJWr8H/vzFcMn78YfxtixaXsCZNHa/v1jbmZ4Q5Bg/XOPsvnxHJ+BkRX1s+I+7rM8K8ZxvWxNl/FZ8R9/8Zwe8RD+YzwsUrymfEn3xG8HuEc31GhL30suNvOT4j/t+W3yOc5jNCWr4sLvkeNk/5jOD3iMj/f0bYM1inDaWCg4OjBVLK/josnrRv+/btcu7cOWnWrNk97TNjRi+R/wem0fh4i0+Az3+vM3mJhMdTI94nQ+y2LvEk0JljtM2cQeR2SLxtM8dsG+yV+LZ6bnHJ6CWZorb1uUtbD48Ybb3jb6ubjtrWNxFt7e93lLbmPYmrrT10zJLx7tv19xHJ6pPItplFAhLZNluUtlmT0jbT3dv6RWnrl1DbTPfW9krmu7fNGqXtzYTaZvyvbejd25pram8bGZz4tu4RiW/r7Xr3ttq37G31cySxbdXd2sb8jNC2+j+iuL6Hz4hk/YyIty2fEff3GRHP569ZzmdE7LZJ/Yzg94gH+hnhm5jt8hnB7xGp7DNCtK+JiL9+5TPi/235PcJZPiMcfVfxGXHnOb9HSGK52HR4USq0YcMG+eCDD2Tr1q2OZUePHpV69eqZ8ClqHSm7oUOHyqVLl0zh83tx+ewVM9oqzQ2Xc8IhtS62SPPBdvnyjdiHw5Da2G1TwzBZhtQ6XrqEh8Xff/mMuLe2DLu/v7ZJ+Pk0n79ZvOPuvzHa8hmRTobdO8lnhIury53P3vNXxcb0PX6PcLLPCBcP9/9+dwjlM8Lg94jU0/YuP596Sv45ssrla0F33lo+I5LeNo3+HmH6hj2sdMaRUjlz5pSrV6+aulLu/38DdEpfhgwZxNfXN87v+emnn6Rbt273vE+bh0f8n5FRl3tEH8GVbG3dPZyrrZt73CPL7qVt1PbaVj+vPT3F5uEZ93tiu4fturrdeaTFti6uie9rqaGtvsFO1fYefu4T03+Tut3U8HOfWj4jHkTb1PCznFo+I+7Wf+9nuyn+s5yKPiMeRNvU8HOfCj4jbG7uYovnH5BjbZvPiNTzuwGfEXe+2PRvEj4jDH6PSD1t7/bzqX+3ubnd6bvans+Ie2grqaPtg/pZTkBC/8tOMSVKlDBh1O7dux3Ldu7cae6oF1eR8ytXrsipU6dMMXQAAAAAAACkbqk2lPL29pYmTZrIkCFD5I8//pBNmzbJzJkzTRFz+6ipkJD/5jsePnxYvLy8JG/evCl41AAAAAAAAHDqUEr1799fSpYsKe3atTP1orp37y61a9c26wIDA2X9+vWOtpcvXzbT+lwSW+IdAAAAAAAAKSbVFjpPCZcuxVPUFZbTbDEgwIf3BE6J/gtnRv+Fs6LvwpnRf+Gs6LtIqG849UgpAAAAAAAApE2EUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAACQCri4uIibm6v5mh64p/QBAAAAAAAApGdubq7i4W2TDJ5eciv8hvh4+EhIaKiEh7hIRESkpFWEUgAAAAAAACkYSGXydZcVR2bJl38vNKFUJg8fqV+ohTQt0kFuXb+dZoMpQikAAAAAAIAU4uFtM4HUkkNTHcs0mLK/bpC/tUTckjSJmlIAAAAAAAApwMXFxUzZ0xFScdHlGby80myNKUIpAAAAAACAFODq6mJGRekjLvZ12i4tIpQCAAAAAABIAZGRNlM/Sh9xsa/TdmkRoRQAAAAAAEAKsNlsEhIWaoqax0WX6134tF1aRKFzAAAAAACAFBIe7GLusqfiu/ueCKEUAAAAAAAAklFERKQJnvQuey8+0tERSukIKV2u69MqQikAAAAAAIAUFBERKRG3REKDQsXV1Uv+jbRP2UubI6TsCKUAAAAAAABSAZvNJhERaTuIiopC5wAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKpOpQKDQ2VAQMGSKVKlSQwMFBmzpwZb9uDBw9KixYtpEyZMtKwYUP55ZdfLD1WAAAAAAAApJFQasyYMbJ3716ZM2eODB48WCZNmiQbN26M1e7GjRvyyiuvSJEiRWTt2rVSq1Yt6datm1y+fDlFjhsAAAAAAABOGkoFBQXJ0qVL5d1335WSJUuaoKlTp04yf/78WG1XrlwpGTNmlCFDhkiBAgWkR48e5qsGWgAAAAAAAEh93CWVOnDggNy+fVvKly/vWFaxYkWZPHmyREZGiqvrf3nar7/+KjVq1BA3NzfHsuXLlyd5ny4uyXDgSBb294L3BM6I/gtnRv+Fs6LvwpnRf+Gs6LuIT2L7RKoNpS5evCh+fn7i6enpWBYQEGDqTF27dk2yZcvmWH7q1ClTS2rQoEHy3XffyUMPPSR9+/Y1IVZS+Pv7JOs54P7xnsCZ0X/hzOi/cFb0XTgz+i+cFX0X9yrVhlLBwcHRAillfx0WFhZrqt/UqVOlbdu2Mm3aNPnyyy+lY8eOsmHDBsmdO3ei93n58g2x2ZLpBHDfqap+sPGewBnRf+HM6L9wVvRdODP6L5wVfRcJ9Q2nDaW8vLxihU/21xkyZIi2XKftlShRwtSSUo8++qhs3bpVVq9eLV26dEn0PvWHiB+k1IX3BM6M/gtnRv+Fs6LvwpnRf+Gs6LtIc4XOc+bMKVevXjV1paJO6dNAytfXN1rb7NmzS+HChaMtK1iwoJw9e9ay4wUAAAAAAEAaCKV05JO7u7vs3r3bsWznzp1SunTpaEXOVbly5eTgwYPRlh07dszUlgIAAAAAAEDqk2pDKW9vb2nSpIkMGTJE/vjjD9m0aZPMnDnT1I2yj5oKCQkxz5s3b25CqYkTJ8qJEyfk448/NsXPGzdunMJnAQAAAAAAAKcKpVT//v2lZMmS0q5dOxk6dKh0795dateubdYFBgbK+vXrzXMdETV9+nTZvHmzNGjQwHzVwuc6BRAAAAAAAACpj4vNRjkyu0uXuGNAaqrUHxDgw3sCp0T/hTOj/8JZ0XfhzOi/cFb0XSTUN5x6pBQAAAAAAADSJkIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJZL1aFUaGioDBgwQCpVqiSBgYEyc+bMeNu+/vrrUqxYsWiPzZs3W3q8AAAAAAAASBx3ScXGjBkje/fulTlz5siZM2ekb9++kidPHqlTp06stkePHpWxY8fKE0884ViWJUsWi48YAAAAAAAATh1KBQUFydKlS2XatGlSsmRJ8zh8+LDMnz8/VigVFhYmp0+fltKlS0v27NlT7JgBAAAAAADg5KHUgQMH5Pbt21K+fHnHsooVK8rkyZMlMjJSXF3/m3l47NgxcXFxkXz58t3XPnWTNtt9bQLJxMXlzlfeEzgj+i+cGf0Xzoq+C2dG/4Wzou8iob7htKHUxYsXxc/PTzw9PR3LAgICTJ2pa9euSbZs2aKFUpkzZ5Y+ffrIr7/+Krly5ZLu3bvLM888k6R9Zsvmk6zngPvHewJnRv+FM6P/wlnRd+HM6L9wVvRdpLlC58HBwdECKWV/rdP1otJQKiQkxBRDnz59ugmjtPD5n3/+aekxAwAAAAAAwMlHSnl5ecUKn+yvM2TIEG35G2+8IW3atHEUNi9evLjs27dPlixZYupMAQAAAAAAIHVJtSOlcubMKVevXjV1paJO6dNAytfXN1pbrS8V8057hQsXlvPnz1t2vAAAAAAAAEgDoVSJEiXE3d1ddu/e7Vi2c+dOM/IpapFz1a9fP+nfv3+sQukaTAEAAAAAACD1SbWhlLe3tzRp0kSGDBkif/zxh2zatElmzpwpbdu2dYya0jpSqnr16rJ27VpZtWqVnDhxQiZNmmQCrNatW6fwWQAAAAAAACAuLjZb6r1xoxY711Dq66+/NnfX69ixo7Rv396sK1asmIwcOVKaNm1qXi9dutQUOT9z5ow88sgjZuTUY489lsJnAAAAAAAAAKcLpQAAAAAAAJA2pdrpewAAAAAAAEi7CKUAAAAAAABgOUIpAAAAAAAAWI5QKp3SuxRq4fjy5cvLs88+a4rEx3Tjxg2pWrWqrFix4q7bmj17tmmn2xowYIApUG93/vx56dGjh1SuXNm00eL0oaGh8W7r+++/l8aNG5ttNWzYUL799lvHOi1uH9dD77qYmH2dOnXKFMovV66c1KtXT7Zs2RJt39u2bZMGDRpI2bJlzV0etX1izxPWSo/9d/fu3dK8eXOz7eeee87c3CEq+q/zSI/9N6HzWrdundSsWdP0365du8qVK1cc67T05YcffiiPP/642f6YMWMkMjLyrtcFD0Z67Lt6A51XX33V9M1atWrJ+vXro+2bvus80mP//e2338xNofR3X92H/q4QFf3XOaTFvpvQOfF3Wzqjhc6RvkRERNhq165t6927t+3vv/+2ff/997YKFSrY1qxZE63doEGDbEWLFrUtX7483m1t3LjRVrFiRdt3331n27Nnj61evXq2oUOHmnWRkZG2Zs2a2Tp16mQ7dOiQbceOHbZatWrZRo0aFee29u/fbytZsqRtzpw5tuPHj9vmzZtnXutydeHChWiPMWPG2KpVq2a7fv16gvvS9Q0bNjTnfOTIEdvkyZNtZcuWtf3zzz9mvX4tV66cbcaMGeb733zzTVuDBg3M9yV0nrBWeuy/2r5SpUq2jz76yJzzunXrbKVLl7Zt3rzZrKf/Oo/02H8TOi899jJlythWrlxp9te6dWtb586dHeu1Xz/zzDNmuz///LMtMDDQNn369Hu4+rgf6bHvhoeHm8/SLl262I4ePWpbuHCh2fbBgwfNevqu80iP/ffSpUvmOKdNm2Y7efKk7fPPPze/+549e9asp/86h7TYdxM6J/5uS38IpdKh8+fPmx/eGzduOJZ17drVNnjwYMdr+wfRU089ddcPt5YtW9o++eSTaN+n/4MLCgoyHyL64Xjx4kXH+rVr15r/qcVl7Nixto4dO0Zb9sorr9jGjRsXq63+z1X/KN+6dat5ndC+tm3bZj68bt265Vjfrl07x7FPmDDB/M/YTo+/fPnytl9++SXB84S10mP/XbBgga1OnTqxfvl46623zHP6r/NIj/03ofN65513bH379nW8PnPmjK1YsWJmP0r/KIraftWqVeYXW1grPfbdTZs2mT9sop7z66+/blu0aJF5Tt91Humx/3799de2ypUrR9uGvt6wYYN5Tv91Dmmx7yZ0Tvzdlv4wfS8dypEjh0yYMEEyZ85shubu3LlTduzYYYZqqrCwMBk0aJC899574unpGe17dUioDr1UERER8ueff0qlSpUc63WIZXh4uBw4cECyZ89uhmIGBARE28bNmzfjPK7nn39e3n777TiHo8b0ySefyBNPPCFPPvmkeZ3Qvvbs2SOPPvqoZMyY0bGuYsWKZkqUfX3U8/D29paSJUua9QmdJ6yVHvuvfQh1TFH7N/3XOaTH/pvQecXsv7lz55Y8efKY5TqV4OzZs/LYY49F++z+559/5MKFC3GeCx6M9Nh3f/31V9Nez9nus88+k5dfftk8p+86j/TYf7NmzSrXrl2Tr7/+2pzzpk2b5NatW1K0aFGznv7rHNJi303onPi7Lf1xT+kDQMqqXr26qZdQrVo1U6dGTZ482XwQBAYGxmqvc3r1D2R1/fp1M89YP1js3N3dzf8Ez507Z+bw2tsqnYc+b948Mzc9Lg8//HC014cPH5aff/7Z1NGJSo9X58AvWrTIsczX1/eu+7p48WK041T+/v7mOBNan9B5IuWkl/6bN29e87C7fPmyfPnll9K9e3fzmv7rnNJL/03ovPQPnPj6r/ZtFXW9/RdmXR/z+2CN9NJ3tUbJQw89ZOrqrF69Wvz8/Ey9Fa3Bo+i7zim99F/9o7xVq1amz7q6upo/1vUfuAoXLmzW03+dT1rpuwmdE3+3pT+EUumcJteXLl2SIUOGmP9R6QeJfmisWbMmzvYZMmQwDxUSEmK+xkzl9bWm9jGNHTtW/vrrL1m2bFmCx6WFFvUP7goVKkiNGjWirdPvL1WqlClsF5+Y+9Lidnc7zrutT+p5wjrppf9Gpcet29ZfDu3/Wk//dU7ppf8eOXLkruel55KU/mt/Tv9NOeml7wYFBcnKlSvNH3b6h9/27dvNH/iLFy+W0qVL03edVHrpvzoqSoPVbt26mT/4dcTUBx98YLahgQL91/mkxb4b85wGDhzI323pEKFUOqe/VClNlHUIpg531F+4Yg7djIuXl5f5GvMHXF/rMMqYH2xz5syR8ePHO4YNx0c/mDp06GCGc+oHlf7rTlRfffVVrBQ+oX3pseoQ5pjHaf+g1vVxnYf+K1RSzhPWSi/9105/wXzjjTfk+PHjsmDBAsdx0n+dU3rov7od/QXzbucVX//V84j6R1DMc6b/ppz00HeVm5ub+dd1/WNJt6fTQ/RuZkuWLDHXgL7rnNJL/9WpWLo9DaWU9t8//vhD5s6dK0OHDqX/OqG02HdjnlOfPn34uy0doqZUOqQfHjqvPKoiRYqYubY6F3f06NFmCKc+dDjl4MGDpVOnTrG2o7+o6Q++bs/u9u3b5kNE5yXbDRs2TGbNmmU+4OzDMuOjc9h1qLF+cOj/NLNlyxZtvc5v1391j5nCJ7SvnDlzRjtO+3WwD+2Mb72eR2LPE9ZIj/3XPqdfb52rw6P1F4WCBQs61tF/nUd66796Dr///vtdz+tu/VfXKftUkqjP6b/WSm99V+nvCPpZG/WPrEKFCpntKfqu80iP/Xffvn1SvHjxaG1LlChhzk/Rf51DWuy7dzsn/X2Xv9vSH0KpdOj06dPmX030g8Ru7969kiVLFjO0d9WqVY6H/vBrAj98+PBY29Ff0jTd1uJ0dvrhqPN27f8TnDRpkhlWOm7cOKlfv/5dj0uHyeuHqG5X5zDb/4cYlRa2sxdijOlu+9Iho/o/Z/uQTqXHbR9Kql+jnocOC9Uhq7o8MecJ66TH/qvz+vWc9dy/+OILeeSRR6Ktp/86j/TWf3U7CZ1XzP6rv8TqQ5fr9+v+oq7X57qMmibWSm99V2kf1H8I0Fo8dkePHjV1puzr6bvOIT32Xz0PDQSiOnbsmKNGJf3XOaTFvhvfOWmopQ/+bkuHUvr2f7De7du3bU2bNjW37Tx8+LDt+++/tz355JO22bNnx2qrt36NemvR4OBg24ULFxyv161bZ6tQoYLtm2++se3Zs8dWv35927Bhw8w6vbVoiRIlbOPHjzffE/Vhp891m0pvIaq369TtRG17/fp1R3u9vaced0wJ7UvPuV69eraePXvaDh06ZJsyZYq51eg///xj1p86dcrcqlSX63q9TWnDhg1tkZGRCZ4nrJUe++/ixYttxYsXt23evDnauqtXr5r19F/nkR77b0LntWvXLlvJkiVtS5Ysse3fv9/c5vm1115zrNd+rbek1ls960Ofz5w5M0nXHfcvPfZdvV259rdBgwbZjh8/bps3b57t0Ucfte3du9esp+86j/TYf3///XezftasWbaTJ0+ar9pf9fcERf91Dmmx7yZ0Tvzdlv4QSqVT586ds3Xt2tX8wD711FO2zz//3PGDfLcPN31etGjRaG30A+GJJ56wVaxY0da/f39bSEiIY7m2jethp8/t23/uuefibNu3b19H+/fee8/Wq1evWMeZmH3pL5StWrWylSpVynw4bd26Ndo29AOxdu3a5gO2Xbt25n/giTlPWC+99V/9n3Zc6/QXSDv6r/NIb/03ofOyn9szzzxjfunUa3PlyhXHOv3ldMSIEbZKlSrZqlSpYhs7dmyc1wsPXnrsu/oHk/13B/2M/eqrr6Jtg77rPNJj/920aZOtUaNGpn8+//zzsX73pf86h7TWdxNzTvzdlr646H9SerQWAAAAAAAA0hdqSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAcA/2798vu3btku3bt0uxYsUs3/+pU6fkhx9+sHy/AAAAyYVQCgAA4B507dpVjh8/LuXLl5ctW7ZYvv8BAwbIH3/8Yfl+AQAAkguhFAAAwH3w9PSU7Nmzp/RhAAAAOB1CKQAAgCRq06aN/PPPP9K/f3+pXr26Y/re6dOnzfPvv//eLNdRVB988IEcOnRImjZtKuXKlZPXXntNbt686djWokWLHG11uwcPHnSs+/nnn6Vx48ZSunRpqVGjhmmr+vXrJ7/++qtMmjTJfI/auXOntGjRQsqWLWv28+qrr8qFCxfMuhUrVph2n3/+uTz22GPy1FNPyapVq2Tjxo1SrVo1qVSpkowdO9axXz2e2bNnS8OGDc22OnfuLBcvXrTs+gIAgPSBUAoAACCJJk6cKLly5TJT6PQR09SpU+Wzzz6TYcOGyRdffCHdunWT3r17y4wZM2T37t2ybNky0+67774zwdKgQYNk5cqVUrFiRWnbtq38+++/EhERIT179pQ6derIhg0b5M0335ShQ4fKkSNH5N133zUh1iuvvGKO5caNGybs0rBp3bp1Zj8nT540x2H3+++/mzpUuu/69evLkCFDZO7cuSao0pBr+vTp8tdff0U7x06dOsnixYslODhYunfvbtHVBQAA6QWhFAAAQBJlzZpV3NzcxMfHxzxieuONN6R48eLSoEED8ff3NyGQBkYaOj3xxBNy7Ngx006DIA2TdLRSwYIFTQj10EMPyZo1a0zQdO3aNQkICJC8efNKo0aNZNasWWaqoO7Tw8NDMmbMaI4lJCTE7FPrXOXLl8/sp3bt2nL48GHHMdlsNhk4cKAUKFBAXn75ZUfQpMf54osvmuO0H5d64YUXzCgtHfk1YsQIE2rpiC8AAIDk4p5sWwIAAIChwZBdhgwZTNAU9XVYWJh5fvToUTNtbty4cY71oaGhpoC6hk06HU+DJB11pcGVBkVZsmSJtT8Nqpo0aWKm3OldAXU0lU4DrFChgqONhk4aYikvLy/zVcOuuI5LRf1ePR89Hj3eokWLJss1AgAAIJQCAABIZjqKKipX17gHp+sUPZ3+p6OnosqcObP5qlPsWrVqJZs2bTIPnUqnAdUzzzwTrf358+dNYFWyZEl58sknpVmzZqau1Z49exxt3N1j/9rn4uIS7znEbK/HGt95AAAA3At+swAAAEghhQoVknPnzpkpdfbH5MmTTd0pLSyuNaR02euvvy7Lly+Xxx9/3NShiumbb74xI6imTJki7dq1M4XLtX6UTtm7VwcOHHA8P3HihJlOaC/oDgAAkBwYKQUAAHAPdCqc1mCyj2q6Fx06dDBFy7WelE6X05FQWtRc60xpyKRhkwZLWtBcR0NpUKS1ouz712l+ly9fNlPrzpw5Y+7Wp1PydBtff/21uWvfvdIi6CVKlDBTD7Vgu9bE0uMEAABILoRSAAAA90DrPX344YeyZMmSe95GvXr15NKlS/LJJ5+Yr0WKFDF3w7OHPzpVT4uMa5HzTJkymYLkL730klmnX3Xqn94hT++ot2PHDunRo4eZkqdhVN++fc0d9KLWiUqK559/3tS60rBLpwvqqC0AAIDk5GK7n3HdAAAASHOqV68u3bp1k6ZNm6b0oQAAgDSMmlIAAAAAAACwHKEUAAAAAAAALMf0PQAAAAAAAFiOkVIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAHoh+/fpJsWLF4n089dRTkpr07t3bHNfMmTMf6H5WrFhh9nP69Olk26Zub+LEianqmOzs7/e4cePiXB8ZGSlVq1Y1bfQ47pdeB93Wg/4eAABw/9yTYRsAAABxyp49u0yaNCnOdR4eHpJa3LhxQzZt2iRFixaVxYsXS4cOHcTFxUWchR5zrly5JLVydXWVjRs3yltvvRVr3Y4dO+TChQspclwAACBlEUoBAIAHxtPTU8qVKyep3bp168zXd999V9q1aye//PKLPPHEE+IsUvs1rlChgvz222/y119/yaOPPhpt3ZdffiklSpSQ/fv3p9jxAQCAlMH0PQAAkOLatGkjb7/9tvTo0cMELDpSSaeS6ZSqWbNmSZ06daRs2bKyfPly0/7PP/+Ujh07SpUqVUzg0aVLFzl8+LBje9u3bzffu2jRIqlWrZpps3Xr1nj3r9vVEOrxxx+XAgUKmO+L6xg1tJo6dao8++yzUrp0aWnevLn88ccf0drpiKuWLVtK+fLlpVSpUubY58+fH+d+v//+e3OcW7ZsibZcAxxdvnPnTvN6zpw5Zju6T53qNmTIELl582a80/cSah+fXbt2SZMmTcxxN2jQQNavX+9Y98ILL5jzjal9+/bm/bqbxx57TAICAsxoqahu374tX3/9tdSvXz/W9+joqf79+8szzzwjZcqUkRdffFG+/fbbaG1CQ0Nl5MiRZiqoXm9tr8ti0uvZunVr04cqV64sffv2lStXriR4PQAAwINFKAUAAB4oDR7iethstmjtNmzYIJkyZZLPP/9cOnXq5FiuYcurr74qY8aMMeGDjmJq0aKFWTdixAj54IMP5OzZsyYwOXr0aLRt6tRBDSDee+89E1rERcMsDbk0jFH6VcOPS5cuxWr71VdfmXUDBw40NZK0Tffu3SUiIsIRMnXt2lVKliwpn332mTn2fPnyyfvvvy979uyJtT0NjHLkyCGrV6+OtnzVqlVSsGBBqVixohnFNXbsWGnVqpXMmDHDbF/bDxs2LM7zSWr7qPQ61a1b1xz7I488Ir169TIhm9JQ6Pfff5cTJ0442ut11wCwadOmd92um5ubPPfcc7FCqZ9//tmESNWrV4+2XK+r7k/DJD0GvY4PPfSQOZc1a9Y42r3zzjuyZMkSee2112TChAny77//yuzZs2NND9TgLEOGDKbNgAED5Ndff5W2bdtKSEhIgtcEAAA8OEzfAwAAD8w///xjApq49OnTx4x2ilpjaujQoWbKn7IX3daQREfp2GkIpKOZdMSShh0qMDBQatWqJZ988ol8/PHHjrY6YklHDN2NjpLKmjWrIxh5/vnnTQiybNkyMwIrKg3TNOjJnDmzeX3r1i0TeunUMx1ddOTIEfP9OqLKTsMwHdGl4Y2O1IlKj1/bf/HFF2ZbGsppUKIBXefOnU0bDVDy5s1rQiatzaQjfTJmzGgCmLgktX1Uem3t78nTTz8tx48fNwFVzZo1zcipUaNGmYBLR7Qpfa7HrNc+IfXq1TMjxqJO4dORWDVq1BAvL69obXV0nI5k0hBQwyilI6Y0XNJwUo9FA0hdr6PA7CGlhnwNGzY074PdRx99JIUKFZIpU6Y4+ou+Dzo6S997vU4AACBlMFIKAAA80ELnGu7E9WjcuHG0toULF3YEUlFpvSG7oKAgM6pJgyp7wKB8fX3NND0NZOL73riEh4ebkTcaumgYdP36dROy6AglHYGjd4aLqkiRIo5ASuXMmdN8DQ4ONl91hJcGNxow7d2714QuGoaosLCwOI9BAzc9r2+++ca81q/62j5yS6cU/v3332Y0ko780vPX4EWnE8Ylqe1jBkdR6XXREEnPx8fHR2rXrh1tpNLKlSvN9+gopIToNdXrZR8tpddDR2FpwBSTvo8a5tkDKbtGjRrJxYsX5dixY2YUlYo6ykpDOB2RZafvi45Q00BLR+bZR+np6LWHH374rlM6AQDAg8dIKQAA8MBoyKR1jRJDw6C46CifqHfJ03BB6xPFpMt0fXzfGxedbnf58mVHUBbTTz/9ZAINO29v72jrNQRR9vBKR/cMHjzYhC169z4d0VWpUiWzLuZ0RTtto6OZdMqeBlH69cknn3QEXhr66PYXLFjgmBKoYY3W4IoZIt1L+6hiXld/f39z3FqPSt8fnVKnoZQGQhoK6kiq0aNHS2Lo9dBRa/a78Om11eunUzLPnz8fra2O6tLgKL7j0/DQPvLLz88vVhBqp+30WkybNs08Yoo5QgsAAFiLUAoAADgNHa2j4UZc9Z50BI1Ow0sKnb6l4cfw4cOjLdcgplu3bqbgedRQKiEa/OgoHq1rpCN9NJTT0To66upudLSU1jrSKWlaZ+nDDz+Mtl5HE+lDQzctiq4Bi9ZTso8+iimp7e006IkaTOl11vApS5Ys5rWGZ/nz5zfBkgZKOrotKXf+01BMi7DrdEcdRaYjr3TaZky6P30/Y7Iv0yDKHkbpMebJk8fR5tq1a47nGqRpf9Fpf3EVU48ZMgIAAGsxfQ8AADgNHfmktZu05pK9uLjS8EVHPWnoklgacOhoHQ0rtOZT1IdOgdNRPT/88EOsUTx3o3fL06BFt2Gfivjjjz+arzGnAkalU840INH6SBqk6LQ5u549e5oC3/ZQTqcuvvHGG2Yamt6hLqakto9Kr6GdHq+GT1p/yT49TwMenRaoI8G+++47Uw8rKTTA0lFbWotKvz+uoMh+tz4tqq41yaLSUVo6EkpHl+l7pGIWT9+8ebPjuU611PpVGhTqiD37Q4u46wgyrfMFAABSDiOlAADAA6N1g3bv3h3v+mLFiiV5tErv3r1NMW4tBK6FzLUulBY9133Zw5jE0GlyGtTEF4zoVLqlS5eaUU5aADwxypQpI2vXrjXF3XPlyiW7du0yx6Zhjr3uVFz0GuhxLF682BTtjlpbS8MXnRKo0+S0+LhOSdNaUXp3vuLFi8faVlLbR6V3p9OwL3fu3LJw4UJTm0qLjkeloZQGOipmXbDE0LBv7ty5ZlSbjryKS4cOHUwApSOcdMSattX3S++8qHdc1FFaGky9/PLLMn78ePM+av0wDbsOHjwYbVs6VVD7ivYbrUml5zdz5kxTa0rDOgAAkHIIpQAAwAOjo5E0OIiPBg0JFSOP6YknnjBBid5pTwMHDXC0bpOGMDoCJrFWrFhh2hctWjTO9TrqSu9ip8FUYsMLLXI+bNgw81AaBOkdBe11mO7m2WefNaGUhj5RNW/e3ARvOpVQ60TpqCW9BjodL66pb0ltH9XIkSPNOZw4ccJcF532FzM40ul/Gm7pNL+7TQW82xQ+vYOhjuCy1+SKSUdDaSimd8774IMPzPnoPrVGlt6tz07DNz2OefPmmamHevc9vWOihmt2emdG3Z8Gc3rXQL0GGhpqH0rK1EMAAJD8XGzxVd0EAACAZTRg0dE7GtSlZjqdUe90qKFg1GmGAAAAScVIKQAAgBSkU9m05pFOExw7dqykVlqc/Ntvv5WvvvrKjACrXr16Sh8SAABwchQ6BwAASEE6rU9rIbVr187cMS+1Cg0NNVPetCbTuHHj4p16BwAAkFhM3wMAAAAAAIDl+CcuAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWM7d+l2mXpcv3xDuRQgAAAAAAHDvXFxE/P19EmxHKBWFBlKEUgAAAAAAAA8e0/cAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJajplQSREZGSkTE7ZQ+DDghNzd3cXUlAwYAAAAAwI5QKhFsNptcv35FgoNvpvShwIl5e2cWX99s4qK3IQAAAAAAIJ0jlEoEeyCVObOfeHp6ESogyaFmWFio3Lx51bzOksU/pQ8JAAAAAIAURyiVgMjICEcglTmzb0ofDpyUhplKgykfHz+m8gEAAAAA0j3+Mk5AREREtFABuFf2PkRdMgAAAAAACKUSjSl7uF/0IQAAAAAA/kMoBQAAAAAAAMsRSiFZXb16Rb77blO869evXysvvtjQ0mMCAAAAAACpD4XOkaw+/3yiudtc9eo141xfo0YteeKJQMuPCwAAAAAApC4pOlLq/Pnz0qNHD6lcubJUrVpVRo4cKaGhoXG2/euvv+Sll16SsmXLygsvvCB79+6Ntn7dunVSs2ZNs75r165y5coVi84CUWkgdTdeXhnEz8/PsuMBAAAAAACpk2tKhhcaSAUHB8v8+fNl/PjxsnnzZpkwYUKstkFBQdK5c2epVKmSrFixQsqXLy+vvfaaWa7++OMPeffdd6Vbt26yePFiuX79uvTv3z8Fzso5nT17RgIDK8m2bVvM1LpatarKhAkfyrFjR6RjxzZSs2ag9OnTU4KCbpn2q1Ytl5deamTadevWWY4ePWKWz5gxRTZsWGce9il6ut3p0ydL/fo1pG/fXrGm7+3fv09ef72j1KjxlDRv3lQ2bfoqha4CAAAAAABIF9P3jh07Jrt375atW7dKQECAWaYh1ejRo6Vv377R2q5fv168vLykT58+5g5mGkD9+OOPsnHjRmnatKnMmzdP6tatK02aNDHtx4wZI9WqVZNTp05Jvnz5UuT8nNG8ebNl1Khx8vffR2Xo0IHyyy9bpXfvvmZ0U79+vWXt2lXy0EP5ZNasqdKnz0DJn7+AbNz4pfTo8ZosXLhSWrRoIydOHDfb6tWrj2O7W7f+KJ9/PkMiIiJNCBW1/lSvXl2ldu260r//INm7908ZPnyIFChQSB55pGiKXAMAAAAAAJDGR0plz55dpk+f7gik7G7evBmr7Z49e6RixYomkFL6tUKFCibUsq/XUVR2uXPnljx58pjlSLz27TtJkSKPSK1adcTPL5vUrPmcPPbY41KmTDmpVKmyCZwWLJgrbdp0kKeeqir58uWXV199XXLmzC1ff71eMmbMaMJDfUSdote4cVPJn7+gFCpUONr+Nm36Wnx8skjPnu+Y9fXqNZTXXusa7xROAAAAAACQdqTYSClfX19TR8ouMjLSjHh6/PHHY7W9ePGiFClSJNoyf39/OXz4sHl+4cIFyZEjR6z1586dS9Ix/T/zSnBZWpUnz0OO5xos5cqVO9rr8PBwOXHib/nss4kyZcqnjnVhYWFy6tTJeLebK1eeOJefPHlCihYtKq6u/2WjzZu3lrRO+1R66lcAAAAAonN1dXEMugCSq0RSZOTdazxbKbHdO9XcfW/s2LGmmPmyZctirdO6U56entGW6WsNQ1RISMhd1yeWv79PrGW67StXXMXNzUXc3VO0LvwD4+Z257w8PT2inaO7u5vjtf0DMyIiQnr16m1GTkWVKVNm09beLup2vL0zOF7rh699vaenu2mfVq9rTJGRLiaA8/PLJBkyZEjpwwEAAACQQmyREeLi6pbSh4E0xOakfco9tQRSc+bMMcXOdeRMTDpKJ2bApK/tf9jHt97b2ztJx3H58g2JefO48PAwM4orIsImt29HSlqktZ7sX6Oeo6as9tf2u+rly1dAzp07L7lz53W0GzFiqDz99LMSGPiMo23U7UTdrj251dd58uSTrVu3SHh4hCPMeu+9/lK8eAlp2bKtpDXah7QvXb16Szw8wlP6cAAAAACk0KAA/Yfq7RtGyPUr8c84ARLLN1t+qVJ3gPlb0/73fUrTP/HjGviT6kKpYcOGycKFC00w9dxzz8XZJmfOnHLp0qVoy/S1fcpefOu1blVSaO4SM5SK+Tq9a968lYwa9YGpJ1W6dFlZvXqFfPfdN6bOlNKg8Nixo3Lx4gXJnj36lMqYtMC53pnvs88+kUaNnpc//9wjW7b8IG3atJe0LK5+BgAAACB90UDq2sU7dzIHkouz/a2ZovOmJk2aJIsWLZJx48ZJ/fr1421XtmxZ+f333x2jdfTrrl27zHL7+p07dzranz171jzs65F8atSoLZ07v2HCpDZtXpadO3fI6NHjTUilnnuuvpw6dULat2/heL/i4+PjI2PHTpDdu3dJ27Yvy/z5c2Tw4A/kkUeKWXQ2AAAAAAAgpbjYEkoOHpCjR49Kw4YNpXPnztKqVato63SEkxY319BCR97oHflq1aplgqvmzZubIGvjxo3y9ddfmzu+aWDVpk0bGTx4sJQuXVqGDx8umTJlksmTJyfpmC5dinv63uXLZ8XfP7d4eESvWwUkBX0JAAAAgNbU1el738zvwkgpJIus2YtIrVaTzfS91FJ2SKfvBQT4pN6RUt9++60pmv35559LYGBgtIfSr+vXrzfPM2fOLFOmTDGjoZo2bSp79uyRqVOnmkBKlS9fXt5//3359NNPpUWLFpIlSxYZOXJkSp0aAAAAAAAAUutIqdSIkVJ4kOhLAAAAABgpheSWlZFSAAAAAAAAQOIRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKHUfXB1dRF3d1fLHrq/pNiy5Qd5/vl6UqPGU7J9+8/xtlu/fq28+GJDcVbDhw8xD2Wz2WTFiqVxrgMAAAAAAKmHe0ofgLPSgChb1ozi4mZdrmeLiJQr14IkMtKWqPbTp0+RypUflw4dXhU/Pz9Jq958823H8927d8m4caOladOXYq0DAAAAAACpB6HUfYRSGkhdXfG13L509YHvzz3AT/ya1jb7TWwodevWTSlTppzkypVb0rLMmTM7nutIqfjWAQAAAACA1INQ6j5pIHX73EVJbXQ63rlzZ2XkyPdl1qxp8t57w+TzzyfKoUMHxMXFRcqVqyD9+r0nAQEBsb53ypRPZf36NXLjxk159NGS8tZbfaVw4YfNuj17fpdPPhknf/99TPLmzSuvvNJZnn22RqKPqVmzFvLll2vkn39OS/nyFaVfv0Hi73/nGI4f/9tse+/ePyRjxozSuHFTadeuo7i6usqNGzdk1Kj3ZefOHSLiIk8+GSi9e/eVTJkyO6bn6bH06NHFPA8MrCSffDJZNmxY5xgx1ahRbfnww0+kQoVKZllQ0C1p0KCWjB//mZQtW05++GGzTJv2mZw9e8ac7xtvvGmOUR0+fEg++miUHD58UHx8fM2x6Qg0AAAAAABwb6gplUZNmzZXcuTIKT169JZJk6ZKnz49zVS+L75YIuPGTZLTp0/LvHmzYn2fBjNr1qyQ998fLV98sVj8/f1l5MihZt3ly5fMdurVayBz5y6SVq3ayfDhQ01QlVgzZkyRli3bypQpsyUkJEQGDuxjll+7dk26du1kQrKpU2ebwGn58sWydOlCx/dduXJZPvtshkycONmEQ3PmzIi2bT3f4cPHmOerV2+U0qXLRhsxVaXKE/LDD985lm3d+pNkzeonZcqUNaGThltt23aUOXMWSe3a9eTtt3vI6dOnTNsPPhgsjzxSzFw/DdLmz58jP/+8JYnvCgAAAAAAsGOkVBqlNaR0hJGGMR4eHtKuXSdp3ryVGSWVJ89D8uyz1WX//n2xvu/cuTPi7u4hOXPmkly5cknPnn3k5MkTZp0WEK9UqbK88MLL5nXevPnk0KGDsmTJAilbtnyijqt+/Uby3HP1zPP+/d+TZs0ay7FjR2Tnzt/EyyuD9Onzrri7u0vBgoVMCKajvF5+uZU5Lm/vjObYM2TIIB98MCbWVD03NzcziknZR19FVaNGbfn004+lZ893zHX4/vtvpVq1mub5okVfSMOGTaR27Tqm7UsvNZfdu3fKypXLpHv3Xmb/Vas+Y6ZC6jFMmPCZ5M6dJ8nvCwAAAAAAuINQKh3QgKZu3QayePF8MyJIp8kdOXIo2kgiu5o1n5Ply5dIs2aNpGTJ0lK16rPSoEFjs+7Eib/N6KJatao62t++fVvy5cuf6GOJuk8Nd3x9s8jx48fNtosVK2ECKbtSpcrK5cuXzdS9l15qIf369ZYGDWqaYEynDNaqdSdASqynnnpaRo0aJvv27ZUiRR4xdyScOHGKWafHcOzYJjNKzC48PFwqV37CPG/TpoOZ1rh69QozdVCDtbiCLwAAAAAAkDiEUunAxYsXpFOnNib0qVSpijRq9Lxs27ZF9u37M1ZbDVoWLFguv/76i2zb9pMsXPiFrF27UmbNWiARERFSu3Zdadv2lWjfEzVISkjMtpGREaZ4u6enZ6y2us7+tWLFx2TFii9ly5YfzLGPGTPCHKPWykosb29veeqpqmYK36VLFyRbNn8pUaKkWafnptMR69SpH+17vLy8zNfWrdtL9eq15McfN5tg7s03XzejunR0FQAAAAAASDpqSqUDGqT4+GSRMWMmmELjOtXuzJl/4myrgc/atavMaKC33+4vs2cvkFOnTsrRo0ckX74CpsaSTtuzP3766Qf5+usNiT4WHallp9u6efOmPPzwI5I/fwE5eHC/GXllt3fvn6bmk46m0lFeul5HfA0bNkoGDHhPvv/+v/pQdjoV725q1HjO1IL66afvzXQ+O93/2bP/RDs3HTX1yy/bJDQ0VCZM+NBMg2zevLUZXaXBXlz7BwAAAAAAicNIqfvkHuCX6vejoc758+fkt99+NXWQNm/eZEYLFS/+aKy2kZGR8umnE8wooqJFi8mmTV+ZGk46Ra9p05dk2bLFMnXqZyYc2r//L5k69VNTGyqxli5dZLabK1ceGT9+jDz2WBWzbS2oPmPGVBkzZrgphH7q1AmZOXOKPP/8SyZounDhgqxZs1L69x8sWbJkMfWgdDtxjYZSBw7sl0KFCsda//jjT8qIEUPk/Pnz8tln0x3LmzVraQqtFy9e0gRyW7f+KIsXL5CPP/7cjJb644/dcuHCeenSpasEBQWZ4u46tREAAAAAANwbQql7FBlpE1tEpPg1/W+0zYOm+9P9JpVOO9MQZeDAvibgKVHiUenWrae5o11YWFi0toGBT0vHjl1k4sRx5m53+fMXlJEjPxJfX1/zGD16nHz++UQzrS8gIIfZjk7pSyy9c9/kyZ/K+fNn5Ykn7ozGUhkzZpKPPvpEPv74I3nllVZmhJTWkdJaTurVV1+XW7duSr9+b0lwcJCUK1cxzql7hQsXMUHX66+/IkOGDI+1XqcJapj011975ZFHijqWlypVWgYNel9mzpwqn332sTz0UF4ZPHi4lCtXwax///2RMm7caOnUqZ0pqF69ek1p375jEt4FAAAAAAAQlYst5i3M0rFLl25IzKsRHh4mly+fFX//3OLhEb3ukdZC0odVNJC6l1AqtXjxxYbyyiudpV69hpIe3a0vAQAAAEgf3N1dxc8vk3wzv4tcu3gkpQ8HaUDW7EWkVqvJcvXqLbl9O1JSA62sExDgk2A7Rkql45AIAAAAAAAgpRBKIVn07/+2/Pbb9njXv/POAEuPBwAAAAAApG6EUkgWvXv3k5CQ4HjXZ8uWLUm1pwAAAAAAQNpGKIVkERAQkNKHAAAAAAAAnIhrSh8AAAAAAAAA0h9CKQAAAAAAAFiOUAoAAAAAAACWI5QCAAAAAACA5QilAAAAAAAAYDnuvncfXF1dzMMqkZE28wAAAAAAAHB2hFL3SMMoPz9vcXV1s2yfkZERcvVqcKKDqS1bfpCPPhot16//KyNGfChVqjwRZ7v169fKzJlTZdmytZJWXb16RX7/fZdUr17TvA4MrCSffDJZKlSolNKHBgAAAABAukQodV+jpNxk+4YRcv3KyQe+P99s+aVK3QFmv4kNpaZPnyKVKz8uHTq8Kn5+fpKeff75RLHZbI5QavXqjeLrmyWlDwsAAAAAgHSLUOo+aSB17eIRSY1u3bopZcqUk1y5ckt6p4FUVP7+ASl2LAAAAAAAgELnadaLLzaUs2fPyMiR75vnf/yxW15/vaPUqPGU1KwZKG+/3UMuXboU5/dOmfKpNG78nFSv/pR069ZZjh076li3Z8/v0rFjG7OubduX5fvvv03SMa1YsVQ6d24v1as/Ke3bt5QDB/Y71p8/f0769u1ljlHb6pTCiIgIx/pff/3F7FP33bt3Dxk/fowMHz7ErAsPD5eJE8dJkyZ15ZlnqpjvX716hVk3Y8YU2bBhnXnocvv0vV27fpNVq5Y5ltnp9zVv/rx5HhYWJhMmfCj169cwj/ffH2SmQ9otXbpIXnihgTkfvS579uxO9PUAAAAAACA9I5RKo6ZNmys5cuSUHj16y6RJU6VPn55mKt8XXyyRceMmyenTp2XevFmxvu+HHzbLmjUr5P33R8sXXywWf39/GTlyqFl3+fIls5169RrI3LmLpFWrdjJ8+FATVCXWzJlTpHXr9jJ79kLJnDmzfPzxWMdIpnff7SN+ftlk1qz5MmDAYPnmm43yxRd3jvGff05Lv35vSfXqtWT27PlSosSjJuCy03bbtm2RDz4YIwsWLJe6dRuY0OrKlcvSooWGaLXMQ69LVM8+W1MuXrwQLRz74YfvTFt7QHfgwF8yduzH8sknU+TmzZsyaFA/s+7QoQPy2WcfS+/e/WT+/GVStmw5ee+9vhIZGZnEdwsAAAAAgPSH6XtplNaQcnV1NcGPh4eHtGvXSZo3byUuLi6SJ89D8uyz1WX//n2xvu/cuTPi7u4hOXPmkly5cknPnn3k5MkTZp2GQJUqVZYXXnjZvM6bN58cOnRQlixZIGXLlk/UcdWt21CefvpZ81yPZ+DAvub5zp075Ny5szJ16mxz3PnzF5SuXXvKiBFDpX37TrJu3WopUaKkea46deoiO3Zsd2y3SJGiUrFiZSlVqrR53aZNB5k1a5qcOnXSHJuXl5fjukSVNWtW830aRBUvXkKuX79uRlDpvkNCQmTFiiUyffoX8vDDRUz7QYPeNyOmjh49ImfPnjXXU69T7tx55NVX35Ann6xqQik9BwAAAAAAED9CqXRA6yfpyKHFi+fL4cOH5Pjxv+XIkUNSunTZWG1r1nxOli9fIs2aNZKSJUtL1arPSoMGjc26Eyf+lq1bf5Jatao62t++fVvy5cuf6GPRIMsuY8ZM5vvt29Zpcc8994xjvYY7oaGh8u+/1+To0cNSvPij0balAZSGSEqDrh07fpGJE8fLyZPHzSgmFXX6X3xq1qwt8+bNltde62ruWJg3b34TQh07dsRMC+zSpUO09npcp06dkMcff0oKFy4ibds2l6JFi0lg4DPSqNHz4u7OjxUAAAAAAAnhr+d0QKenderURooVKyGVKlUxwYlOddu37884Ayyd/qb1m7Zt+0kWLvxC1q5dKbNmLTABT+3adaVt21eifU9SQhgdtRUX3baOjho16qNY6zJlyixubm46yS/e4uVTp34ma9euknr1GkqdOvXNlLqYtaLi8/TT1eTDD0ea2lnff/+d1KhRK1qg9dln08XbO2O078mWLZtkyJDBjOzavXuXbN36o6xfv1ZWrVouM2Z8Idmz50jUvgEAAAAASK9SRSilxaSbNm0qgwYNkipVqsRa36ZNG/n1119jLdfvGTlypPz7779SuXLlWNOytm//b3rXg+KbLfGjhFJqPz/+uFl8fLLImDETHMuWLVscZ1sNq7Tg+PPPvyhPPhkoHTq8Ko0b1zHT1fLlKyB79/4RbbTTwoXzJDw8LFZQlVS6bd1v1qx+Zsqh0pFP69evk4EDh0rBgoXlzz/3RPuegwcPmKmIavXq5dK7d3+pXr2mef3338eitdVpdjHvwGen+6tS5QnZvHmT/Pbbr9KtW0+z/KGH8powTPvXI48UM8uuXr0iI0cOkx493jIhlk47bNeuo1SoUElee62bNGpU2xSVr1Gj9n1dDwAAAAAA0roUD6V0elbv3r3l8OHD8baZOHGimUZlt2fPHunZs6e0bNnSvD5y5IgJodatW+do86Br+kRG2iQyMkKq1B3wQPcTfZ8RZr9J5eubxQQ+Grho7SMNX+7UUHo0jn1EyqefTpBs2fzNlLRNm74yI4J0il7Tpi+ZMEtHJel0wP37/5KpUz+V/v3fu+9z0yLsWptJ726n0+hu3rwhY8aMMDWsNBhq3LipLFo0z0yz05FNetc/LbCuwZH9HHW0UrFixc1dBT/++ENH4Kn0HDRE0lFjcY1i0hBp9OjhUqBAAcmfv4BjemHDhk3kww9HSZ8+A0wRdp0eeP78WXMdg4ODTN0qvVZ6nDpiKjg4WB5++JH7vh4AAAAAAKR1KRpKaZikgVR8I1jsNHCy0ylV48ePl06dOknp0neKWh87dkwKFSok2bNnF6toOHT1arC4urpYus97CaX0TnIa4GhRcR0xpHeu09FAM2ZMcYQ2doGBT0vHjl1k4sRx5s51OqVu5MiPxNfX1zxGjx4nn38+0UzrCwjIYbajU/rulwZPo0aNkwkTxkrnzu3MdLlq1WpKt25vmvW5cuWWYcNGy6RJE8xxP/ZYFala9RnH1EENxj76aJS0afOy6QcaJuk2Dx8+KI8//qQ891x9GTCgt7Rv30LWrdsUa/9PPfW02GzDYo1w6tatl9mnXjutf1WuXHlzJz7dto6e0v3Onj3d3OlPi8NrIfSCBQvd9/UAAAAAACCtc7EllAg9QAsWLJDjx49Lr169pFy5cjJ37tw4p+9FtXTpUvn444/lq6++kkyZMpll9il8o0aNuq/juXTphsS8Gjo17fLls+Lvn1s8PDzva/u4d1p0XEOhokWLO5a9886bZrRXx46viTOgLwEAAABwd3cVP79M8s38LnLt4pGUPhykAVmzF5FarSbL1au35PbtSEkNXFxEAgJ8UvdIKfv0u8TS/Gz69OnStm1bRyCljh49agKLF198Uc6fPy+VKlWS/v37S44cOZJ80RKzDNb755/TppbT0KHDTf2pHTu2m3pOWsfJ2Wifol8BAAAAAJJbavlbM7HHkeI1pZJCC5efO3dOmjVrFm25Tt/Tu6FpEKXBlU7v69KlixlVdeeubYnj7x87xQsJCZErV1zFzc3FJNqIW9++vc0d++Jf/67UqVPvnrdfrVp1OX78mIwa9YEpNq51nz74YJQUL36nALkziIx0MbXO9F9FtMYVAAAAAADJRf/WdDZOFUrplL2nn346Wo0p9eWXX5paSfY/9D/55BMJDAw0BdErVKiQ6O1fvhz39D0t/h0RYUs1w+BSo169+kpISHC86zU0vN/r16bNK+YRlTO9J9qHtC/pkEoPj/8K9wMAAABIP9zc7vxDNZDc9G/NiIjIVDNSKq6BP04dSv3000/SrVvs6Vre3t7RXvv7+5vgSqfyJYUGUjFDKfvrFCy95RQCAgJS+hBSPXsfiqufAQAAAABwv5ztb02nmY925coVOXXqlFSsWDHa8ps3b8pjjz0mv/zy39QxDaOuXr0qhQsXvu/92qf/hYWF3ve2kL7Z+5Cbm1NlwQAAAAAAPBCp9q/jixcvio+Pj2NK3uHDh8XLy0vy5s0brV3mzJlNUKV34Bs2bJgJkYYPHy5Vq1aVYsXuv96Qq6ubeHtnlps3r5rXnp5eZqogkJQRUhpIaR/SvqR1pQAAAAAASO9SbSilNaE0aGratKl5ffnyZfH19Y0zEBo9erSMGjVKOnfuLGFhYVKjRg0ZOHBgsh2Lr28289UeTAH3QgMpe18CAAAAACC9c7FRLMnh0qXYhc6julPw/LaVh4Q0QqfsMUIKAAAAgN7VXQudfzO/i1y7eCSlDwdpQNbsRaRWq8mm0HlquRmYjicKCEhjhc5TmoYKrq6eKX0YAAAAAAAATo+hGwAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAAEifoVRYWJg0aNBAtm/fHm+b119/XYoVKxbtsXnzZsf62bNnS9WqVaV8+fIyYMAACQ4OtujoAQAAAAAAkFTuksJCQ0Old+/ecvjw4bu2O3r0qIwdO1aeeOIJx7IsWbKYr1999ZVMmjTJrPf395f+/fub5++9994DP34AAAAAAAA42UipI0eOSLNmzeTkyZMJjqQ6ffq0lC5dWrJnz+54eHp6mvVz586Vdu3aSbVq1aRMmTIydOhQWb58OaOlAAAAAAAAUqkUHSn166+/SpUqVaRXr15Srly5eNsdO3ZMXFxcJF++fLHWRUREyJ9//indunVzLNNthYeHy4H/tXcfUFZV5x7Av4GRIkVpoghPQQ0QbAhGfWLsBmISkUSXoljyDJoElh0FI0VjsL2XBIliI7Hw8hRDzBM7lmiKJRglalCKUWOlioiAwLy1j2vumxEwAw5n7mV+v7XuunPq3Yel5878z97fnjkzG85XU2VlG3ERAAAAAEWgrKy02lGnodTAgQNrtF8KpZo3bx7Dhg3Lgqxtt902hg4dGgceeGAsWbIkGwK4zTbbFPYvLy+PrbfeOt59990Nak+bNi02+BoAAAAA6lqrVs2i1NR5TamahlLLly+PPn36xODBg+Phhx/OCp/fcccd0bZt22yfyqF8ldJyGva3IRYs+DAqKmq16QAAAFDQsGGDkgwPKH6LFn0Uq1eviWLpKVWTjj8lEUr94Ac/iEGDBhUKm3fr1i1eeumluPPOO7Ohf8lnA6i03LRp0w36nBRICaUAAACAUlRRYplGnRY6r6kGDRoUAqlKXbp0iffeey8bpte4ceOYP39+YduqVati8eLFWTF0AAAAAIpPSYRSF154YQwfPrzaulTEPAVTKbBKs/JNnz69sO3555/P6kqlHlUAAAAAFJ+iDaXmzZuX1ZFKDjnkkLjnnnvi7rvvjtdffz3Gjx+fhVAnnnhioWD6zTffHNOmTYsZM2bE6NGj49hjj93g4XsAAAAA5KNoa0qlouZjx46NAQMGxBFHHBGjRo2K6667Lt5+++3YZZdd4qabboqOHTtm+x555JHx1ltvxciRI7NaUmn/888/v64vAQAAAID1KKuoKLUyWJvO/Plm3wMAAGDTKS//dPa9hyedEYvnza7r5rAZ2LrdznH4CROy2fdWrSqe2ffatm1RusP3AAAAANh8CaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAID6GUqtXLkyvvGNb8TTTz+93n0ef/zxOOqoo6Jnz57xzW9+Mx555JFq23v37h1du3at9vroo49yaD0AAAAAG6o86tiKFSvi3HPPjVmzZq13n5kzZ8aQIUNi2LBhceCBB8Yf/vCHOPPMM+Ouu+6Kbt26xXvvvRcffvhhTJs2LZo0aVI4bsstt8zpKgAAAAAomVBq9uzZWSBVUVHxuftNnTo19t133zjppJOy5R122CEeffTRuP/++7NQas6cOdGuXbvo1KlTTi0HAAAAoGRDqWeeeSb22WefOPvss2PPPfdc735HH310fPLJJ2utT72jKsOtzp07f+H2lJV94VMAAAAA1ImystJqR52GUgMHDqzRfjvttFO15TTU789//nMcd9xx2XLqKfXxxx/HoEGD4rXXXovu3bvHiBEjNjioatOmxQbtDwAAAFAMWrVqFqWmzmtKbaiFCxfG0KFDY6+99opDDz00Wzd37tz44IMP4pxzzonmzZvHjTfeGKecckrce++92XJNLVjwYfyLkYQAAACw0Ro2bFCS4QHFb9Gij2L16jVRLD2latLxp6RCqfnz58epp56a1aAaN25cNGjw6eSBN998cza8r1mzT//Hvvrqq7OC6I899lg2U19NpUBKKAUAAACUoooSyzRKJpRKM+xVFjq/9dZbo3Xr1oVtjRo1yl6VGjduHB07dsyOAQAAAKD4fNrVqMgtW7YsTjvttKxn1O233x7t27cvbEu9pg477LCYMmVKtf1ff/316NKlSx21GAAAAICS7Ck1b968aNGiRTRp0iSuv/76eOONN+K2224rbEvStrTPQQcdFNdcc01sv/32WQ+qn//857HttttmQ/gAAAAAKD5FG0r16dMnxo4dGwMGDIgHH3wwli9fHsccc0y1fY4++ui4/PLL4/zzz4/y8vI499xzY+nSpbHvvvvGDTfcEA0bNqyz9gMAAACwfmUVafwbmfnzzb4HAADAplNe/unsew9POiMWz5td181hM7B1u53j8BMmZLPvrVpVPLPvtW3bYvOoKQUAAADA5kUoBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEBphFInnXRSLFmyZK31CxcujAEDBtRGuwAAAADYjJXXdMcnnngiZsyYkf387LPPxoQJE2LLLbests/rr78eb731Vu23EgAAAID6GUp17tw5brrppqioqMhezz33XGyxxRaF7WVlZVlIddlll22qtgIAAABQ30KpTp06xa233pr9PHz48LjooouiefPmm7JtAAAAANT3UKqqsWPHZu/z5s2LVatWZT2nqurQoUPttA4AAACAzdJGhVJ//OMf4+KLL4533nknW06hVBq+V/n+97//vbbbCQAAAEB9D6UuueSS2H333eO6664zhA8AAACAfEKpd999Nyt6nupMAQAAAMCGarDBR0RE7969Y/r06RtzKAAAAABsXE+pvffeO8aMGROPP/547LDDDrHFFltU2z5kyJDaah8AAAAAm6GNLnS+6667xoIFC7JXVanQOQAAAADUeih12223bcxhAAAAALDxodTdd9/9udv79++/MacFAAAAoJ7YqFBq3Lhx1ZZXr16dDeMrLy+P3XffXSgFAAAAQO2HUo8++uha6z766KMYOXJkdO3adWNOCQAAAEA90qC2TtSsWbMYOnRo/PKXv6ytUwIAAACwmaq1UCqZOXNmrFmzpjZPCQAAAMBmaKOG7w0aNCjKysrWGr73yiuvxCmnnFJbbQMAAABgM7VRodQ+++yz1rpGjRrFeeedF/vtt19ttAsAAACAzdhGhVJDhgwp/Lx06dJs9r2tttpqoxuxcuXKGDBgQFx88cXrDLySl19+OUaNGhWvvvpq7LzzzjFmzJjYddddC9unTp0aP/vZz2LevHnRp0+fuPTSS6N169Yb3SYAAAAAirCm1C233BIHHHBA7L333rHvvvvG/vvvH+PHj9/g86xYsSLOOeecmDVr1nr3WbZsWQwePDh69+4dU6ZMiZ49e8bpp5+erU9mzJgRF110URaW3XHHHbFkyZIYPnz4xl4aAAAAAMXYU+oXv/hF3H777XHmmWdmAVEqbv7cc89loVQaxpcCpJqYPXt2nHvuuVFRUfG5+913333RuHHjGDZsWFbLKgVQTzzxRDzwwANZD6vUln79+kX//v2z/a+88so4+OCD480334xOnTptzCUCAAAAUGw9pe6888647LLL4rjjjouuXbtG9+7d44QTTsiGzP3617+u8XmeeeaZbLhe6t30eV544YXo1atXobh6et9rr73i+eefL2xPvagqbbfddtGhQ4dsPQAAAACbSU+pVEdqxx13XGt9586dY+HChTU+z8CBA2u0X6oTlepIVdWmTZvCkL/3338/ttlmm7W2v/vuu7EhPjOhIABA0WnQoGytWZDhi0ijFtas+fyRCwCUhrKy0mrHRoVSacjexIkT45JLLokGDT7tbJWKnd98882x++67R237+OOPs2GBVaXlVCA9Wb58+edur6k2bVrUQmsBADadijWro6xBw7puBpsR/00BbB5atWoWpWajQqlURDwN1/vTn/4UPXr0yNa99NJLWQh000031XYbs3pSnw2Y0nKTJk0+d3vTpk036HMWLPgw/kV5KwCAOtOwYYPsF86n7/9JLFn4Rl03h81Ay9b/Fvv0GxGLFn0Uq1evqevmQL26l0NtK6Z7eeopVZOOPxsVSu20004xYsSIWLx4ccydOzcLhR577LEYN25cdOvWLWpb+/btY/78+dXWpeXKIXvr296uXbsN+pwUSAmlAIBilwKpxfNm13Uz2Mz4PRig9FVU1INC57fddluMHj06WrRokb2nnlODBg2K8847LyuCXtv22GOP+Otf/1qYpS+9p9n+0vrK7dOnTy/s/84772Svyu0AAAAAbAah1C9/+cv4z//8zzj66KML6y644IK46qqr4oYbbqiVhqXi5qlWVNK3b99YsmRJNuPf7Nmzs/dUZ6pfv37Z9uOPPz5+97vfxeTJk2PmzJkxbNiwOOigg6JTp0610hYAAAAAiiCUWrRoUfzbv/3bOmff++wwuo3Vp0+fuO+++7KfmzdvHtdff33WG2rAgAHxwgsvZOHXlltuWSi8noqu/+IXv8gCqq222irGjh1bK+0AAAAAoPZtVE2pXr16xTXXXJMFP5XFxFesWBETJkzIAqKN8corr3zucprV77e//e16j09hVXoBAAAAsJmGUiNHjozvfve7WW+mHXfcMVv3xhtvRNu2bePaa6+t7TYCAAAAsJnZqFAqDd1LQ+uefPLJ+Mc//hHl5eVZOJVCqoYNG9Z+KwEAAADYrGxUKJU0atQoDj300NptDQAAAAD1wkYVOgcAAACAL0IoBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuhFIAAAAA5E4oBQAAAEDuyqMOrVixIsaMGRMPPfRQNGnSJL773e9mr88aNGhQPPPMM2utHzBgQIwdOzY++OCD+MpXvlJt29Zbbx1PP/30Jm0/AAAAACUYSl155ZXx4osvxi233BJvv/12XHDBBdGhQ4fo27dvtf2uueaa+OSTTwrLL7zwQpx11lkxcODAbHn27NlZCDV16tTCPg0a6AQGAAAAUKzqLJRatmxZTJ48OW688cbo0aNH9po1a1ZMmjRprVAqBU6VVq9eHT/96U/jtNNOi9122y1bN3fu3OjcuXO0a9cu9+sAAAAAYMPVWXeimTNnxqpVq6Jnz56Fdb169cp6Qa1Zs2a9x02ZMiUbrve9732vsC71lNpxxx03eZsBAAAAKPGeUvPmzYtWrVpFo0aNCuvatm2b1ZlavHhxtG7deq1jKioq4qabboqTTjopmjVrVlg/Z86cLOD6zne+E++991707t07hg8fHttss80Gtams7AteFAAAlCi/CwOUvrKy0mpHnYVSH3/8cbVAKqlcXrly5TqPSYXL33333Tj22GOrrU/D91KIlYKoFFyl4X1nnHFGNjywYcOGNW5TmzYtNupaAACglLVq9f8PfAEoTa1K8F5eZ6FU48aN1wqfKpfTTHzr8uCDD8ZXv/rVajWmknvvvTfKysoKx40bNy769OmTDQXca6+9atymBQs+jIqKjbgYAIAcNGzYoCR/4aT4LVr0Uaxevf4SGkDtcS+nPtzLy8pq1vGnzkKp9u3bx6JFi7Jhd+Xl5YUhfSlYatmy5TqPefLJJ2PIkCFrrW/atGm15TZt2mTBVRrKtyFSICWUAgCgPvJ7MEDpqyixe3mdFTrv3r17FkY9//zzhXXTp0/PZtRr0GDtZi1cuDDefPPNrBh6VUuXLo299947nnrqqcK6FEalwKtLly6b+CoAAAAAKKlQKvVu6t+/f4wePTpmzJgR06ZNi4kTJ2ZFzCt7TS1fvryw/6xZs7Ihfx07dqx2nubNm2dB1dixY7PzvPTSS3H22WfHAQccEF27ds39ugAAAAAo4lAqSYXJe/ToESeffHKMGTMmhg4dGkcccUS2LdWEuu+++wr7LliwIBvWl2pHfdYVV1wRX/7yl2Pw4MExaNCg2H777ePqq6/O9VoAAAAAqLmyijRdHZn58xU6BwCKV3n5p8VxH550RiyeN7uum8NmYOt2O8fhJ0zIiuOuWlUcxXFhc+deTn24l5eVRbRt26K4e0oBAAAAUD8JpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAADInVAKAAAAgNwJpQAAAACoX6HUihUrYsSIEdG7d+/o06dPTJw4cb37fv/734+uXbtWez322GOF7b/61a/igAMOiJ49e2bn/Pjjj3O6CgAAAAA2VHnUoSuvvDJefPHFuOWWW+Ltt9+OCy64IDp06BB9+/Zda985c+bEVVddFfvtt19h3VZbbZW9P/jggzF+/Phse5s2bWL48OHZzyNHjsz1egAAAAAo8p5Sy5Yti8mTJ8dFF10UPXr0iMMPPzxOO+20mDRp0lr7rly5Mv75z3/GbrvtFu3atSu8GjVqlG2/9dZb4+STT46DDz44dt999xgzZkz85je/0VsKAAAAoEjVWU+pmTNnxqpVq7LhdpV69eoVEyZMiDVr1kSDBv+fl82dOzfKysqiU6dOa51n9erV8be//S2GDBlSWLfnnnvGJ598kn1G1fP/K2VlX+iSAACgZPldGKD0lZWVVjvqLJSaN29etGrVqtDbKWnbtm1WZ2rx4sXRunXraqFU8+bNY9iwYfHMM8/EtttuG0OHDo0DDzwwlixZkh2zzTbbFPYvLy+PrbfeOt59990NalObNi1q6eoAAKB0tGrVrK6bAEA9vJfXWSiVhtZVDaSSyuU0XK+qFEotX748K4Y+ePDgePjhh7PC53fccUcWZFU9tuq5Pnuef2XBgg+jomIjLwgAYBNr2LBBSf7CSfFbtOijWL16TV03A+oF93Lqw728rKxmHX/qLJRq3LjxWqFR5XKTJk2qrf/BD34QgwYNKhQ279atW7z00ktx5513xtlnn13t2Krnatq06Qa1KQVSQikAAOojvwcDlL6KEruX11mh8/bt28eiRYuyulJVh/SlQKply5bV9k31pSoDqUpdunSJ9957LxumlwKu+fPnF7alc6YhgKkYOgAAAADFp85Cqe7du2e1n55//vnCuunTp2cz7FUtcp5ceOGFMXz48GrrUhHzFEylfdMx6dhK6Zzp3KlHFQAAAADFp85CqTS0rn///jF69OiYMWNGTJs2LSZOnBgnnXRSoddUqiOVHHLIIXHPPffE3XffHa+//nqMHz8+C6FOPPHEbPvAgQPj5ptvzs6RzpXOeeyxx27w8D0AAAAA8lFnNaWS1PspBUgnn3xyNrtemlHviCOOyLalouZjx46NAQMGZOtGjRoV1113Xbz99tuxyy67xE033RQdO3bM9j3yyCPjrbfeipEjR2a1pNL+559/fl1eGgAAAACfo6yiotTKYG068+ebfQ8AKF7l5Z/O2PTwpDNi8bzZdd0cNgNbt9s5Dj9hQjZj06pVxTFjE2zu3MupD/fysrKItm1bFO/wPQAAAADqL6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAACQO6EUAAAAALkTSgEAAABQv0KpFStWxIgRI6J3797Rp0+fmDhx4nr3ffzxx+Ooo46Knj17xje/+c145JFHqm1P5+jatWu110cffZTDVQAAAACwocqjDl155ZXx4osvxi233BJvv/12XHDBBdGhQ4fo27dvtf1mzpwZQ4YMiWHDhsWBBx4Yf/jDH+LMM8+Mu+66K7p16xbvvfdefPjhhzFt2rRo0qRJ4bgtt9yyDq4KAAAAgKINpZYtWxaTJ0+OG2+8MXr06JG9Zs2aFZMmTVorlJo6dWrsu+++cdJJJ2XLO+ywQzz66KNx//33Z6HUnDlzol27dtGpU6c6uhoAAAAASiKUSr2fVq1alQ3Hq9SrV6+YMGFCrFmzJho0+P+RhUcffXR88skna50j9Y5KZs+eHZ07d/7CbSor+8KnAACAkuR3YYDSV1ZWWu2os1Bq3rx50apVq2jUqFFhXdu2bbM6U4sXL47WrVsX1u+0007Vjk09qv785z/Hcccdly2nnlIff/xxDBo0KF577bXo3r17VqtqQ4OqNm1afOHrAgCAUtOqVbO6bgIA9fBeXmehVAqRqgZSSeXyypUr13vcwoULY+jQobHXXnvFoYcemq2bO3dufPDBB3HOOedE8+bNsyGBp5xyStx7773Zck0tWPBhVFRs9CUBAGxSDRs2KMlfOCl+ixZ9FKtXr6nrZkC94F5OfbiXl5XVrONPnYVSjRs3Xit8qlyuWqy8qvnz58epp54aFRUVMW7cuMIQv5tvvjkb3tes2af/Y1999dVZQfTHHnssm6mvplIgJZQCAKA+8nswQOmrKLF7eZ2FUu3bt49FixZldaXKy8sLQ/pSINWyZcu19k8z7FUWOr/11lurDe9LPayq9rpKgVfHjh2zYwAAAAAoPv9fTTxnqe5TCqOef/75wrrp06fHbrvtVq3IeeVMfaeddlq2/vbbb88CrUqp19Rhhx0WU6ZMqbb/66+/Hl26dMnpagAAAAAoiZ5STZs2jf79+8fo0aPjJz/5Sbz//vsxceLEGDt2bKHXVIsWLbKeU9dff3288cYbcdtttxW2JWlb2ueggw6Ka665JrbffvusB9XPf/7z2HbbbbMhfAAAAAAUnzoLpZLhw4dnodTJJ5+cFSRPBcyPOOKIbFufPn2ygGrAgAHx4IMPxvLly+OYY46pdvzRRx8dl19+eZx//vlZr6tzzz03li5dGvvuu2/ccMMN0bBhwzq6MgAAAAA+T1lFGv9GZv58s+8BAMWrvPzTGZsennRGLJ43u66bw2Zg63Y7x+EnTMhmbFq1qjhmbILNnXs59eFeXlYW0bZti+KtKQUAAABA/SWUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAAcieUAgAAACB3QikAAAAA6lcotWLFihgxYkT07t07+vTpExMnTlzvvi+//HIcc8wxsccee8S3v/3tePHFF6ttnzp1ahx22GHZ9h/+8IexcOHCHK4AAAAAgJILpa688sosXLrlllti1KhRMX78+HjggQfW2m/ZsmUxePDgLLyaMmVK9OzZM04//fRsfTJjxoy46KKLYsiQIXHHHXfEkiVLYvjw4XVwRQAAAAAUdSiVAqXJkydnYVKPHj3i8MMPj9NOOy0mTZq01r733XdfNG7cOIYNGxY77bRTdkyzZs0KAdbtt98e/fr1i/79+0e3bt2ysOv3v/99vPnmm3VwZQAAAAAUbSg1c+bMWLVqVdbrqVKvXr3ihRdeiDVr1lTbN61L28rKyrLl9L7XXnvF888/X9ieelFV2m677aJDhw7ZegAAAACKT3ldffC8efOiVatW0ahRo8K6tm3bZnWmFi9eHK1bt662784771zt+DZt2sSsWbOyn99///3YZptt1tr+7rvvblCbGjSIqKiIopICuMowDr6o9N+3/5yoLRUVFdmLf829nNrSsOGnzxNbbbNzNCxvUtfNYTPQsnXHar8Ls37u5dQW93Lqw728rKzIQ6mPP/64WiCVVC6vXLmyRvtW7rd8+fLP3V5TrVu32KD9AQDqQu/Dz6vrJrCZadWqWV03Aeod93JqWyney+ssQ0s1oj4bGlUuN2nSpEb7Vu63vu1NmzbdRK0HAAAAoCRDqfbt28eiRYuyulJVh+mloKlly5Zr7Tt//vxq69Jy5ZC99W1v167dJr0GAAAAAEoslOrevXuUl5cXipUn06dPj9122y0afGYQ5B577BF//etfC7VL0vtzzz2Xra/cno6t9M4772Svyu0AAAAAFJc6C6XS0Lr+/fvH6NGjY8aMGTFt2rSYOHFinHTSSYVeU6lWVNK3b99YsmRJXHbZZTF79uzsPdWZ6tevX7b9+OOPj9/97ncxefLkbFa/YcOGxUEHHRSdOnWqq8sDAAAA4HOUVdTh1EkpWEqh1EMPPRTNmzeP//iP/4hTTjkl29a1a9cYO3ZsDBgwIFtOwdWoUaNizpw52bYxY8bEl7/85cK5pkyZEuPGjYsPPvgg9t9//7j00kuz2f0AAAAAKD51GkoBAAAAUD/V2fA9AAAAAOovoRQAAAAAuRNKAQAAAJA7oRRsxpYuXRp33313jfZdtWpVHHXUUXHNNdds8nYB1DfFfj8eNGhQjT/vkEMOySaYAajv9+2//e1vcdxxx8Uee+wRX/va1/7l/t/61reySbuqvl599dX17n///ffHggULNqr9//znP7Pzp3coZkIp2Iz96le/it/85jc12nfixIkxc+bMTd4mgPqo2O/HKZD67ne/W6N977rrrvj617++ydsEUMz37Q8//DC+973vRc+ePWPq1Knxwx/+MH70ox/F9OnT17n/6tWr4x//+Efcfvvt8Yc//KHw6tKlyzr3f+utt+Kss87KZqyHzVl5XTcA2HRqOrnm66+/HrfeemvsvPPOm7xNAPVRsd+Pt9566xrv27p1603aFoBSuG+/88478dWvfjWGDRsWZWVl0alTp/jlL38Zzz33XPTq1Wut/VOPpU8++SR23333aNy48Rf+fNhc6CkFJeDNN9+MU045Jesa/M1vfjNuvvnmwvCJ448/Pq6++ursKc1BBx0UkydPzo5J28aPHx/PPPNM1nX384wcOTKGDh26zj800nn69euXfYEOGDAgnn322U12nQDFrlTux08//XTWrtSraf/994+99947brzxxuyYvn37Zm1Mf0itWbNmreF7F154YYwdOzZ7Qp+u88ADD6w2JMXwPaCUbKr79pe+9KW48sors0Aq3UsfffTReO2117L77brMnj07tttuuxoFUsmhhx5aeE/tSffodK+uqur9OG279NJLs/3TtXz00UfZ+gceeCALz/baa6/sO2blypWF4//6179m/wZ77rlndq5f//rXNWob1CahFBS5VFvk9NNPj5YtW2ZdiAcPHpx9SVYdy/73v/897rjjjhgyZEiMGTMm6wqchlakoRjpSzYtr08654oVK+LYY49da1v6kktfbunz0x8k//7v/559/nvvvbfJrhegWJXa/fj999+PadOmxW233RZnnHFG/Nd//Vf85Cc/icsvvzz7+b777otHHnlkncdOmjQpevTokQ1JOeKII2LUqFHZUBWAUrKp79tJCnnSw4Lvf//7WT3AFPCsy5w5c2KLLbbI2pMeFpx44okxY8aM9Z63MiBL7zUdMp2+K6666qrsGps1a5atu/POO+OnP/1pTJgwIZ544om4/vrrC+05+eSTsxAtHZceiFxxxRXx8MMP1+izoLYIpaDIPfXUU1n34PSHRBrOkZ7wpC+xSunpTHpKk57WfOc734kjjzwy+/Jp0qRJbLnlltmXX7t27dZ57lQ4Mf1hcskll2Tn+az0h0x66tK/f/9svPt5552XfU4aCw9Q35Ta/TgNE7nggguy/U844YTsSX56T38wHXzwwdG9e/eYO3fuOo9NPQNSrZQ0HOXMM8+M5cuXx6xZszbq3w1gc7xvV5VCrdTjKoX9aQjfuqReVB988EEcc8wxccMNN8ROO+2UhUKpfetS2WM2vaf21ETqIZV6RO26666FdSNGjMiGE37lK1/J7uf/8z//k61P1/nlL385zjnnnOx74uijj87+bW666aYafRbUFqEUFLlXXnklOnfuHM2bNy+sq/oEZocddog2bdoUltOXUHry8Vl/+ctfsqc9la/0tOSyyy7LhoCkL+J1SedJT36qSp+9rvMDbO6K9X68rvNVSqFSUvkHzfbbb1/YltZVHcZR1Y477lj4ufJ6U48DgFKyKe/blRo1apT1LE2BV+qVmh4irEvq7Zp6rx522GHZ/qNHj46OHTvG7373u/jf//3faudPyxuj6j2+UtXvjhRCzZ8/PwvH1vW9kj7b7/nkTaFzKHINGzZcq9Bh1eXy8vK1ZvZo0GDtvDl9yVatCbLVVlvFPvvsk/1RUvmkPT0JT2PL09jze++9d51j3tP5K2uQANQnxXo/Xtf50h9i62rTutqzLql3wGcpuguUmk153061qtJsegcccEBhfeqNtWjRonW2JX1W1XAs9dJKPZTSMOzUQynVvKqUgrLFixdXO35dvWg/+7BgXd8VVa+n8trTPX5d+6bvlPRvAHkSSkGR22WXXbIvvKVLlxa+yF566aVqMzWlQoaV48ZffPHFwpP2ql9e6Y+d9DSoqoceeqjachoOkr4QTz311Gw5PVl64YUXsic6ldJy7969N8m1AhSzYr0fr+t8AGza+/Yf//jHrN5eqjlV2Rs1HZ+CpnVJQ7DTA4hUu6oyAEoPENKw6tS2qoFVknozVZWCpMri5Un6eeHChf/y3+DVV1/Nhu4lqYbVtttumw1NTN8rn50wIz0MSeshT4bvQZHbb7/9spk6Lr744qw7bXpqnqYLr7Rs2bLsCzFtS2PD0/aBAwdm25o2bZoVuk1T0K5L+nKt+kpfqOnJT2XX3zRTSXpqn54MpXHwaaz8zJkzszH3APWN+zFAadmU9+1Uv6lFixbZjHbpvnzPPfdk9ZhSwfMk9TiaN29eYZh0mt3uV7/6VTbBRKrnl2oIpgkkUi2ndUmfn6R7fQqgdtttt+zn+++/P/u89Lk16f2ahg2mhxgpRBs3blz2fZKk60xF3lM9w3S+3/72t/Hf//3fWUgGeRJKQZFLXzZpCtjUtTfN6HHttddmdUcqh1akL9pUgDH9YZK+CNOMG6mYYXL44YdnT2FS0cZURHdDpZk+zj777OwL7Fvf+lY2Le7EiROzwowA9Y37MUBp2ZT37dS7Kh2Tgqt0zjTDXSoqXtmjNRUw79OnT9b7KElh0GmnnRY//vGPs7bMnj07K4r+2R5SlVKB83S/P+uss7IZ+FLAls6Rwqjjjjsu6wVWdcjf+hx//PFZUJbOkz43FVdPOnTokM3E9+STT2b1sK677rq48MIL49vf/vYX+BeHDVdWoUAAFLX0Jfjyyy9XG6+evgB///vfZ09W0pSvjz76aJ22EaA+cD8GKC3u21D89JSCEpCebqTutG+99Vb86U9/iltuuSX69u1b180CqHfcjwFKi/s2FDehFBS5NPvGz372s/j1r3+dfYFedNFF2QwdlePdAciH+zFAaXHfhuJn+B4AAAAAudNTCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAAAAyJ1QCgAAAIDcCaUAAErc008/HV27dq3RvlOmTIlDDjlkk7cJAOBfEUoBAAAAkDuhFAAAAAC5E0oBAOTgn//8ZzbE7vHHH8+Gz/Xs2TN+/OMfx6uvvhoDBgyIPffcM04//fRYunRpYZhdv379Yvfdd8+2P/vss4VzpX3OOeec7Bxf+9rX4m9/+1u1z3rnnXfijDPOiD322CP7rPHjx8fq1atzv2YAgM9T/rlbAQCoVTfccENce+21MXv27Dj33HPjiSeeiFGjRkWTJk3iBz/4Qdx1113RsmXLuPTSS7P1KZRKAdXgwYPjgQceiPbt22fr586dG7fffnssXLgwLrzwwsL5KyoqYsiQIdGtW7f47W9/G/PmzYuRI0dGWVlZ/PCHP6zTawcAqEpPKQCAHKXgKQVG3/jGN6JNmzZx5JFHxv777x+9evWK/fbbLwubbrvtthg0aFD0798/unTpEuedd1586UtfykKoDz/8MO6///740Y9+FD169IgDDjggO2elp556Kt5+++0s1ErH7rPPPnHBBRfErbfeWqfXDQDwWXpKAQDkqFOnToWfU++o7bffvtryypUrY86cOWv1akrD+9L61157LRuKl4KtSrvttlvh57TP4sWLs5Cr0po1a2L58uWxaNGiTXhlAAAbRigFAJCjhg0bVltu0GDtjuuNGzdea10KolK4tC6NGjUq/Lxq1aqsh1QaIvhZLVq02MhWAwDUPsP3AACKTOfOneOFF16oti4tp/UpcNpiiy2qFTd/+eWXqx2bhu+1bt06dthhh+yViqyPGzcuqysFAFAshFIAAEXmlFNOyepH3X333dlwvauvvjpmzpwZ3/nOd6J58+Zx1FFHZTWjUlD19NNPZ7PrVerTp082JPD888+PV155Jf7yl7/ExRdfHE2bNl2rlxYAQF0yfA8AoMh8/etfj/nz52e9m9Lsed27d4+JEyfGTjvtlG1PIVMKpU499dTYaqutsqLoV1xxRbYtBU/XXXddtv3YY4+NLbfcMvr27ZsVOwcAKCZlFWneYAAAAADIkeF7AAAAAOROKAUAAABA7oRSAAAAAOROKAUAAABA7oRSAAAAAOROKAUAAABA7oRSAAAAAOROKAUAAABA7oRSAAAAAOROKAUAAABA7oRSAAAAAETe/g+q7/ByhtzHdAAAAABJRU5ErkJggg==" 524 | }, 525 | "metadata": {}, 526 | "output_type": "display_data" 527 | }, 528 | { 529 | "name": "stdout", 530 | "output_type": "stream", 531 | "text": [ 532 | "\n", 533 | "Performance Statistics by Model:\n" 534 | ] 535 | }, 536 | { 537 | "data": { 538 | "text/plain": [ 539 | " accuracy \\\n", 540 | " count mean std min 25% 50% 75% \n", 541 | "model \n", 542 | "gpt-3.5-turbo 1.0 0.666667 NaN 0.666667 0.666667 0.666667 0.666667 \n", 543 | "gpt-4o 1.0 1.000000 NaN 1.000000 1.000000 1.000000 1.000000 \n", 544 | "gpt-4o-mini 1.0 0.833333 NaN 0.833333 0.833333 0.833333 0.833333 \n", 545 | "\n", 546 | " true_positives ... false_positives \\\n", 547 | " max count mean ... 75% max \n", 548 | "model ... \n", 549 | "gpt-3.5-turbo 0.666667 1.0 2.0 ... 0.0 0.0 \n", 550 | "gpt-4o 1.000000 1.0 4.0 ... 0.0 0.0 \n", 551 | "gpt-4o-mini 0.833333 1.0 3.0 ... 0.0 0.0 \n", 552 | "\n", 553 | " false_negatives \n", 554 | " count mean std min 25% 50% 75% max \n", 555 | "model \n", 556 | "gpt-3.5-turbo 1.0 2.0 NaN 2.0 2.0 2.0 2.0 2.0 \n", 557 | "gpt-4o 1.0 0.0 NaN 0.0 0.0 0.0 0.0 0.0 \n", 558 | "gpt-4o-mini 1.0 1.0 NaN 1.0 1.0 1.0 1.0 1.0 \n", 559 | "\n", 560 | "[3 rows x 32 columns]" 561 | ], 562 | "text/html": [ 563 | "
\n", 564 | "\n", 581 | "\n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | "
accuracytrue_positives...false_positivesfalse_negatives
countmeanstdmin25%50%75%maxcountmean...75%maxcountmeanstdmin25%50%75%max
model
gpt-3.5-turbo1.00.666667NaN0.6666670.6666670.6666670.6666670.6666671.02.0...0.00.01.02.0NaN2.02.02.02.02.0
gpt-4o1.01.000000NaN1.0000001.0000001.0000001.0000001.0000001.04.0...0.00.01.00.0NaN0.00.00.00.00.0
gpt-4o-mini1.00.833333NaN0.8333330.8333330.8333330.8333330.8333331.03.0...0.00.01.01.0NaN1.01.01.01.01.0
\n", 715 | "

3 rows × 32 columns

\n", 716 | "
" 717 | ] 718 | }, 719 | "metadata": {}, 720 | "output_type": "display_data" 721 | } 722 | ], 723 | "execution_count": 6 724 | }, 725 | { 726 | "cell_type": "markdown", 727 | "id": "999f79285821072a", 728 | "metadata": {}, 729 | "source": [ 730 | "## Save Results\n", 731 | "\n", 732 | "Optionally save experiment results for future analysis." 733 | ] 734 | }, 735 | { 736 | "metadata": { 737 | "ExecuteTime": { 738 | "end_time": "2025-01-21T08:34:10.119140Z", 739 | "start_time": "2025-01-21T08:34:10.113540Z" 740 | } 741 | }, 742 | "cell_type": "code", 743 | "source": [ 744 | "# Save experiment results\n", 745 | "experiments_df.to_csv(\"experiment_results.csv\", index=False)\n", 746 | "print(\"Results saved to 'experiment_results.csv'\")" 747 | ], 748 | "id": "bc5e706cfa63d8cd", 749 | "outputs": [ 750 | { 751 | "name": "stdout", 752 | "output_type": "stream", 753 | "text": [ 754 | "Results saved to 'experiment_results.csv'\n" 755 | ] 756 | } 757 | ], 758 | "execution_count": 7 759 | } 760 | ], 761 | "metadata": { 762 | "kernelspec": { 763 | "display_name": "Python 3", 764 | "language": "python", 765 | "name": "python3" 766 | }, 767 | "language_info": { 768 | "codemirror_mode": { 769 | "name": "ipython", 770 | "version": 2 771 | }, 772 | "file_extension": ".py", 773 | "mimetype": "text/x-python", 774 | "name": "python", 775 | "nbconvert_exporter": "python", 776 | "pygments_lexer": "ipython2", 777 | "version": "2.7.6" 778 | } 779 | }, 780 | "nbformat": 4, 781 | "nbformat_minor": 5 782 | } 783 | --------------------------------------------------------------------------------