├── .coveragerc
├── .cursor
    └── rules
    │   ├── documentation-sync.mdc
    │   ├── followups.mdc
    │   ├── new-features-planning.mdc
    │   ├── readme.md
    │   └── simple-language.mdc
├── .cursorignore
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE
    │   └── pull_request_template.md
    ├── dependabot.yml
    └── workflows
    │   ├── ai-label.yml
    │   ├── claude.yml
    │   ├── evals.yml
    │   ├── pyright.yml
    │   ├── python-publish.yml
    │   ├── ruff.yml
    │   ├── test.yml
    │   └── test_docs.yml
├── .gitignore
├── .grit
    ├── .gitignore
    └── grit.yaml
├── .pre-commit-config.yaml
├── .ruff.toml
├── .vscode
    └── settings.json
├── CLAUDE.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── build_mkdocs.sh
├── docs
    ├── api.md
    ├── architecture.md
    ├── blog
    │   ├── .authors.yml
    │   ├── index.md
    │   └── posts
    │   │   ├── aisummit-2023.md
    │   │   ├── announcing-gemini-tool-calling-support.md
    │   │   ├── announcing-instructor-responses-support.md
    │   │   ├── announcing-unified-provider-interface.md
    │   │   ├── anthropic-prompt-caching.md
    │   │   ├── anthropic-web-search-structured.md
    │   │   ├── anthropic.md
    │   │   ├── bad-schemas-could-break-llms.md
    │   │   ├── best_framework.md
    │   │   ├── caching.md
    │   │   ├── chain-of-density.md
    │   │   ├── chat-with-your-pdf-with-gemini.md
    │   │   ├── citations.md
    │   │   ├── consistent-stories.md
    │   │   ├── course.md
    │   │   ├── cursor-rules.md
    │   │   ├── distilation-part1.md
    │   │   ├── extract-model-looks.md
    │   │   ├── extracting-model-metadata.md
    │   │   ├── fake-data.md
    │   │   ├── full-fastapi-visibility.md
    │   │   ├── generating-pdf-citations.md
    │   │   ├── generator.md
    │   │   ├── google-openai-client.md
    │   │   ├── img
    │   │       ├── Structured_Output_Extraction.gif
    │   │       ├── agent_mcp_example.png
    │   │       ├── async_type.png
    │   │       ├── chain-of-density.png
    │   │       ├── classification-logfire.png
    │   │       ├── claude_desktop_mcp.png
    │   │       ├── claude_desktop_screenshot.png
    │   │       ├── course.png
    │   │       ├── cursor_mcp_agent.png
    │   │       ├── cursor_mcp_support.png
    │   │       ├── distil_openai.png
    │   │       ├── downloads.png
    │   │       ├── dumb_rag.png
    │   │       ├── gemini_citations.png
    │   │       ├── generator.png
    │   │       ├── image-logfire.png
    │   │       ├── instructor-autocomplete.png
    │   │       ├── iterable.png
    │   │       ├── langsmith.png
    │   │       ├── logfire-asyncio.png
    │   │       ├── logfire-stream.png
    │   │       ├── logfire-sync-fastapi-arguments.png
    │   │       ├── logfire-sync-pydantic-validation.png
    │   │       ├── mcp_architecture.png
    │   │       ├── mcp_stars.webp
    │   │       ├── meta.png
    │   │       ├── parea
    │   │       │   ├── form-mode.gif
    │   │       │   ├── trace.png
    │   │       │   └── validation-error-chart.png
    │   │       ├── query_understanding.png
    │   │       ├── statista-image.jpeg
    │   │       ├── style_1.png
    │   │       ├── style_2.png
    │   │       ├── style_3.png
    │   │       ├── style_4.png
    │   │       ├── style_5.png
    │   │       ├── style_6.png
    │   │       ├── type.png
    │   │       ├── untidy_table.png
    │   │       ├── validation-logfire.png
    │   │       ├── with_completion.png
    │   │       ├── youtube-clips.gif
    │   │       └── youtube-flashcards
    │   │       │   ├── annotations.png
    │   │       │   ├── flashcards.png
    │   │       │   └── telemetry.gif
    │   │   ├── introducing-structured-outputs-with-cerebras-inference.md
    │   │   ├── introducing-structured-outputs.md
    │   │   ├── introduction.md
    │   │   ├── jinja-proposal.md
    │   │   ├── langsmith.md
    │   │   ├── learn-async.md
    │   │   ├── llm-as-reranker.md
    │   │   ├── llms-txt-adoption.md
    │   │   ├── logfire.md
    │   │   ├── matching-language.md
    │   │   ├── migrating-to-uv.md
    │   │   ├── multimodal-gemini.md
    │   │   ├── open_source.md
    │   │   ├── openai-distilation-store.md
    │   │   ├── openai-multimodal.md
    │   │   ├── pairwise-llm-judge.md
    │   │   ├── parea.md
    │   │   ├── pydantic-is-still-all-you-need.md
    │   │   ├── rag-and-beyond.md
    │   │   ├── rag-timelines.md
    │   │   ├── semantic-validation-structured-outputs.md
    │   │   ├── situate-context.md
    │   │   ├── string-based-init.md
    │   │   ├── structured-output-anthropic.md
    │   │   ├── tidy-data-from-messy-tables.md
    │   │   ├── timestamp.md
    │   │   ├── using_json.md
    │   │   ├── validation-part1.md
    │   │   ├── version-1.md
    │   │   ├── why-care-about-mcps.md
    │   │   ├── writer-support.md
    │   │   ├── youtube-flashcards.md
    │   │   └── youtube-transcripts.md
    ├── cli
    │   ├── batch.md
    │   ├── finetune.md
    │   ├── index.md
    │   └── usage.md
    ├── concepts
    │   ├── alias.md
    │   ├── caching.md
    │   ├── dictionary_operations.md
    │   ├── distillation.md
    │   ├── enums.md
    │   ├── error_handling.md
    │   ├── fastapi.md
    │   ├── fields.md
    │   ├── hooks.md
    │   ├── index.md
    │   ├── iterable.md
    │   ├── lists.md
    │   ├── logging.md
    │   ├── maybe.md
    │   ├── models.md
    │   ├── multimodal.md
    │   ├── parallel.md
    │   ├── partial.md
    │   ├── patching.md
    │   ├── philosophy.md
    │   ├── prompt_caching.md
    │   ├── prompting.md
    │   ├── raw_response.md
    │   ├── reask_validation.md
    │   ├── response.png
    │   ├── retrying.md
    │   ├── semantic_validation.md
    │   ├── templating.md
    │   ├── typeadapter.md
    │   ├── typeddicts.md
    │   ├── types.md
    │   ├── union.md
    │   ├── unions.md
    │   ├── usage.md
    │   └── validation.md
    ├── contributing.md
    ├── examples
    │   ├── action_items.md
    │   ├── audio_extraction.md
    │   ├── batch_classification_langsmith.md
    │   ├── batch_job_oai.md
    │   ├── building_knowledge_graphs.md
    │   ├── bulk_classification.md
    │   ├── classification.md
    │   ├── db.png
    │   ├── document_segmentation.md
    │   ├── entity_resolution.md
    │   ├── entity_resolution.png
    │   ├── exact_citations.md
    │   ├── examples.md
    │   ├── extract_contact_info.md
    │   ├── extract_slides.md
    │   ├── extracting_receipts.md
    │   ├── extracting_tables.md
    │   ├── groq.md
    │   ├── image_to_ad_copy.md
    │   ├── index.md
    │   ├── knowledge_graph.md
    │   ├── knowledge_graph.png
    │   ├── local_classification.md
    │   ├── mistral.md
    │   ├── moderation.md
    │   ├── multi_modal_gemini.md
    │   ├── multiple_classification.md
    │   ├── ollama.md
    │   ├── open_source.md
    │   ├── pandas_df.md
    │   ├── partial_streaming.md
    │   ├── pii.md
    │   ├── planning-tasks.md
    │   ├── recursive.md
    │   ├── search.md
    │   ├── self_critique.md
    │   ├── single_classification.md
    │   ├── sqlmodel.md
    │   ├── tables_from_vision.md
    │   ├── tracing_with_langfuse.md
    │   ├── watsonx.md
    │   └── youtube_clips.md
    ├── faq.md
    ├── getting-started.md
    ├── help.md
    ├── hooks
    │   └── hide_lines.py
    ├── img
    │   ├── action_items.png
    │   ├── analogical_prompting.png
    │   ├── cosp.png
    │   ├── cosp_entropy.png
    │   ├── cosp_redundancy.png
    │   ├── error2.png
    │   ├── faithful_cot_example.png
    │   ├── ide_support.png
    │   ├── more.png
    │   ├── mrr_eqn.png
    │   ├── mutual_information.png
    │   ├── partial.gif
    │   ├── partial_streaming.gif
    │   ├── plan_and_solve.png
    │   ├── pot.jpeg
    │   ├── recall_eqn.png
    │   ├── retriever.png
    │   ├── universal_self_adaptive_prompting.png
    │   ├── universal_self_consistency.png
    │   └── youtube.gif
    ├── index.md
    ├── installation.md
    ├── integrations
    │   ├── anthropic.md
    │   ├── anyscale.md
    │   ├── azure.md
    │   ├── bedrock.md
    │   ├── cerebras.md
    │   ├── cohere.md
    │   ├── cortex.md
    │   ├── databricks.md
    │   ├── deepseek.md
    │   ├── fireworks.md
    │   ├── genai.md
    │   ├── google.md
    │   ├── groq.md
    │   ├── index.md
    │   ├── litellm.md
    │   ├── llama-cpp-python.md
    │   ├── mistral.md
    │   ├── ollama.md
    │   ├── openai-responses.md
    │   ├── openai.md
    │   ├── openrouter.md
    │   ├── perplexity.md
    │   ├── sambanova.md
    │   ├── together.md
    │   ├── vertex.md
    │   └── writer.md
    ├── javascripts
    │   └── katex.js
    ├── jobs.md
    ├── learning
    │   ├── getting_started
    │   │   ├── client_setup.md
    │   │   ├── first_extraction.md
    │   │   ├── installation.md
    │   │   ├── response_models.md
    │   │   └── structured_outputs.md
    │   ├── index.md
    │   ├── patterns
    │   │   ├── field_validation.md
    │   │   ├── list_extraction.md
    │   │   ├── nested_structure.md
    │   │   ├── optional_fields.md
    │   │   ├── prompt_templates.md
    │   │   └── simple_object.md
    │   ├── streaming
    │   │   ├── basics.md
    │   │   └── lists.md
    │   └── validation
    │   │   ├── basics.md
    │   │   ├── custom_validators.md
    │   │   ├── field_level_validation.md
    │   │   └── retry_mechanisms.md
    ├── llms.txt
    ├── modes-comparison.md
    ├── newsletter.md
    ├── overrides
    │   └── main.html
    ├── prompting
    │   ├── decomposition
    │   │   ├── decomp.md
    │   │   ├── faithful_cot.md
    │   │   ├── least_to_most.md
    │   │   ├── plan_and_solve.md
    │   │   ├── program_of_thought.md
    │   │   ├── recurs_of_thought.md
    │   │   ├── skeleton_of_thought.md
    │   │   └── tree-of-thought.md
    │   ├── ensembling
    │   │   ├── cosp.md
    │   │   ├── dense.md
    │   │   ├── diverse.md
    │   │   ├── max_mutual_information.md
    │   │   ├── meta_cot.md
    │   │   ├── more.md
    │   │   ├── prompt_paraphrasing.md
    │   │   ├── self_consistency.md
    │   │   ├── universal_self_consistency.md
    │   │   └── usp.md
    │   ├── few_shot
    │   │   ├── cosp.md
    │   │   ├── example_generation
    │   │   │   └── sg_icl.md
    │   │   ├── example_ordering.md
    │   │   └── exemplar_selection
    │   │   │   ├── knn.md
    │   │   │   └── vote_k.md
    │   ├── index.md
    │   ├── self_criticism
    │   │   ├── chain_of_verification.md
    │   │   ├── cumulative_reason.md
    │   │   ├── reversecot.md
    │   │   ├── self_calibration.md
    │   │   ├── self_refine.md
    │   │   └── self_verification.md
    │   ├── thought_generation
    │   │   ├── chain_of_thought_few_shot
    │   │   │   ├── active_prompt.md
    │   │   │   ├── auto_cot.md
    │   │   │   ├── complexity_based.md
    │   │   │   ├── contrastive.md
    │   │   │   ├── memory_of_thought.md
    │   │   │   ├── prompt_mining.md
    │   │   │   └── uncertainty_routed_cot.md
    │   │   └── chain_of_thought_zero_shot
    │   │   │   ├── analogical_prompting.md
    │   │   │   ├── step_back_prompting.md
    │   │   │   ├── tab_cot.md
    │   │   │   └── thread_of_thought.md
    │   └── zero_shot
    │   │   ├── emotion_prompting.md
    │   │   ├── rar.md
    │   │   ├── re2.md
    │   │   ├── role_prompting.md
    │   │   ├── s2a.md
    │   │   ├── self_ask.md
    │   │   ├── simtom.md
    │   │   └── style_prompting.md
    ├── repository-overview.md
    ├── start-here.md
    ├── templates
    │   ├── concept_template.md
    │   ├── cookbook_template.md
    │   └── provider_template.md
    ├── tutorials
    │   ├── 1-introduction.ipynb
    │   ├── 2-tips.ipynb
    │   ├── 3-0-applications-rag.ipynb
    │   ├── 3-1-validation-rag.ipynb
    │   ├── 4-validation.ipynb
    │   ├── 5-knowledge-graphs.ipynb
    │   ├── 6-chain-of-density.ipynb
    │   ├── 7-synthetic-data-generation.ipynb
    │   └── index.md
    └── why.md
├── docs_todo.md
├── ellipsis.yaml
├── examples
    ├── __init__.py
    ├── anthropic-web-tool
    │   └── run.py
    ├── anthropic
    │   └── run.py
    ├── auto-ticketer
    │   ├── run.py
    │   └── tasks.png
    ├── automodel
    │   └── run.py
    ├── avail
    │   ├── run.py
    │   └── run_mixtral.py
    ├── batch-classification
    │   ├── run-cache.py
    │   ├── run.py
    │   └── run_langsmith.py
    ├── caching
    │   ├── example_diskcache.py
    │   ├── example_redis.py
    │   └── lru.py
    ├── chain-of-density
    │   ├── Readme.md
    │   ├── chain_of_density.py
    │   ├── finetune.py
    │   └── requirements.txt
    ├── citation_with_extraction
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── citation_fuzzy_match.py
    │   ├── diagram.py
    │   ├── main.py
    │   ├── modal_main.py
    │   ├── requirements.txt
    │   └── schema.png
    ├── citations
    │   └── run.py
    ├── classification
    │   ├── classifiy_with_validation.py
    │   ├── multi_prediction.py
    │   └── simple_prediction.py
    ├── codegen-from-schema
    │   ├── create_fastapi_app.py
    │   ├── input.json
    │   ├── models.py
    │   ├── readme.md
    │   └── run.py
    ├── cohere
    │   └── cohere.py
    ├── crm
    │   └── run.py
    ├── distilations
    │   ├── math_finetunes.jsonl
    │   ├── math_finetunes_val.jsonl
    │   ├── readme.md
    │   ├── three_digit_mul.py
    │   └── three_digit_mul_dispatch.py
    ├── evals
    │   ├── eval.py
    │   ├── models.py
    │   ├── stats_dict.py
    │   ├── streamlit.py
    │   └── test.jsonl
    ├── extract-table
    │   ├── run_vision.py
    │   ├── run_vision_langsmith.py
    │   ├── run_vision_org.py
    │   ├── run_vision_org_table.py
    │   ├── run_vision_receipt.py
    │   └── test.py
    ├── extracting-pii
    │   └── run.py
    ├── fastapi_app
    │   ├── __init__.py
    │   ├── main.py
    │   └── script.py
    ├── fizzbuzz
    │   └── run.py
    ├── gpt-engineer
    │   ├── changes.diff
    │   ├── generate.py
    │   ├── program.json
    │   └── refactor.py
    ├── groq
    │   ├── groq_example.py
    │   └── groq_example2.py
    ├── hooks
    │   ├── README.md
    │   └── run.py
    ├── iterables
    │   └── run.py
    ├── knowledge-graph
    │   ├── final.png
    │   ├── iteration_0.png
    │   ├── iteration_1.png
    │   ├── iteration_2.png
    │   ├── iteration_3.png
    │   ├── kg.png
    │   ├── run.py
    │   └── run_stream.py
    ├── learn-async
    │   └── run.py
    ├── llm-judge-relevance
    │   └── run.py
    ├── logfire-fastapi
    │   ├── Readme.md
    │   ├── requirements.txt
    │   ├── server.py
    │   └── test.py
    ├── logfire
    │   ├── classify.py
    │   ├── image.py
    │   ├── requirements.txt
    │   └── validate.py
    ├── logging
    │   └── run.py
    ├── match_language
    │   ├── run_v1.py
    │   └── run_v2.py
    ├── mistral
    │   └── mistral.py
    ├── multi-actions
    │   └── run.py
    ├── multiple_search_queries
    │   ├── diagram.py
    │   ├── schema.png
    │   └── segment_search_queries.py
    ├── open_source_examples
    │   ├── README.md
    │   ├── openrouter.py
    │   ├── perplexity.py
    │   └── runpod.py
    ├── openai-audio
    │   ├── output.wav
    │   └── run.py
    ├── parallel
    │   └── run.py
    ├── partial_streaming
    │   ├── benchmark.py
    │   └── run.py
    ├── patching
    │   ├── anyscale.py
    │   ├── oai.py
    │   ├── pcalls.py
    │   └── together.py
    ├── proscons
    │   └── run.py
    ├── query_planner_execution
    │   ├── diagram.py
    │   ├── query_planner_execution.py
    │   └── schema.png
    ├── recursive_filepaths
    │   ├── diagram.py
    │   ├── parse_recursive_paths.py
    │   └── schema.png
    ├── reranker
    │   └── run.py
    ├── resolving-complex-entities
    │   ├── entity.png
    │   └── run.py
    ├── retry
    │   └── run.py
    ├── safer_sql_example
    │   ├── diagram.py
    │   ├── safe_sql.py
    │   └── schema.png
    ├── simple-extraction
    │   ├── maybe_user.py
    │   └── user.py
    ├── situate_context
    │   └── run.py
    ├── sqlmodel
    │   └── run.py
    ├── stream_action_items
    │   └── run.py
    ├── synethic-data
    │   └── run.py
    ├── task_planner
    │   ├── diagram.py
    │   ├── schema.png
    │   └── task_planner_topological_sort.py
    ├── timestamps
    │   └── run.py
    ├── union
    │   └── run.py
    ├── validated-multiclass
    │   ├── output.json
    │   └── run.py
    ├── validators
    │   ├── allm_validator.py
    │   ├── annotator.py
    │   ├── chain_of_thought_validator.py
    │   ├── citations.py
    │   ├── competitors.py
    │   ├── field_validator.py
    │   ├── just_a_guy.py
    │   ├── llm_validator.py
    │   ├── moderation.py
    │   └── readme.md
    ├── vision
    │   ├── image_to_ad_copy.py
    │   ├── run.py
    │   ├── run_raw.py
    │   ├── run_table.py
    │   └── slides.py
    ├── watsonx
    │   └── watsonx.py
    ├── youtube-clips
    │   └── run.py
    ├── youtube-flashcards
    │   └── run.py
    └── youtube
    │   └── run.py
├── instructor
    ├── __init__.py
    ├── _types
    │   ├── __init__.py
    │   └── _alias.py
    ├── auto_client.py
    ├── batch.py
    ├── cli
    │   ├── __init__.py
    │   ├── batch.py
    │   ├── cli.py
    │   ├── deprecated_hub.py
    │   ├── files.py
    │   ├── jobs.py
    │   └── usage.py
    ├── client.py
    ├── client_anthropic.py
    ├── client_bedrock.py
    ├── client_cerebras.py
    ├── client_cohere.py
    ├── client_fireworks.py
    ├── client_gemini.py
    ├── client_genai.py
    ├── client_groq.py
    ├── client_mistral.py
    ├── client_perplexity.py
    ├── client_vertexai.py
    ├── client_writer.py
    ├── distil.py
    ├── dsl
    │   ├── __init__.py
    │   ├── citation.py
    │   ├── iterable.py
    │   ├── maybe.py
    │   ├── parallel.py
    │   ├── partial.py
    │   ├── simple_type.py
    │   └── validators.py
    ├── exceptions.py
    ├── function_calls.py
    ├── hooks.py
    ├── mode.py
    ├── models.py
    ├── multimodal.py
    ├── patch.py
    ├── process_response.py
    ├── py.typed
    ├── reask.py
    ├── retry.py
    ├── templating.py
    ├── utils.py
    └── validators.py
├── make_desc.py
├── make_sitemap.py
├── mkdocs.yml
├── poetry.lock
├── pyproject.toml
├── pyrightconfig.json
├── requirements-doc.txt
├── requirements-examples.txt
├── requirements.txt
├── sitemap.yaml
├── tests
    ├── __init__.py
    ├── assets
    │   ├── gettysburg.wav
    │   ├── image.jpg
    │   └── invoice.pdf
    ├── conftest.py
    ├── dsl
    │   ├── test_partial.py
    │   ├── test_simple_type.py
    │   └── test_simple_type_fix.py
    ├── llm
    │   ├── test_anthropic
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── evals
    │   │   │   └── test_simple.py
    │   │   ├── test_multimodal.py
    │   │   ├── test_reasoning.py
    │   │   ├── test_stream.py
    │   │   ├── test_system.py
    │   │   └── util.py
    │   ├── test_cerebras
    │   │   ├── __init__.py
    │   │   └── modes.py
    │   ├── test_cohere
    │   │   ├── conftest.py
    │   │   ├── test_json_schema.py
    │   │   ├── test_none_response.py
    │   │   └── test_retries.py
    │   ├── test_fireworks
    │   │   ├── __init__.py
    │   │   ├── test_format.py
    │   │   ├── test_simple.py
    │   │   ├── test_stream.py
    │   │   └── util.py
    │   ├── test_gemini
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── evals
    │   │   │   ├── __init__.py
    │   │   │   ├── test_classification_enums.py
    │   │   │   ├── test_classification_literals.py
    │   │   │   ├── test_entities.py
    │   │   │   ├── test_extract_users.py
    │   │   │   └── test_sentiment_analysis.py
    │   │   ├── test_files
    │   │   │   └── sample.mp3
    │   │   ├── test_format.py
    │   │   ├── test_list_content.py
    │   │   ├── test_modes.py
    │   │   ├── test_multimodal_content.py
    │   │   ├── test_patch.py
    │   │   ├── test_retries.py
    │   │   ├── test_roles.py
    │   │   ├── test_simple_types.py
    │   │   ├── test_stream.py
    │   │   └── util.py
    │   ├── test_genai
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── test_format.py
    │   │   ├── test_invalid_schema.py
    │   │   ├── test_long_prompt.py
    │   │   ├── test_multimodal.py
    │   │   ├── test_retries.py
    │   │   ├── test_simple.py
    │   │   ├── test_stream.py
    │   │   ├── test_utils.py
    │   │   └── util.py
    │   ├── test_litellm.py
    │   ├── test_mistral
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── test_modes.py
    │   │   ├── test_multimodal.py
    │   │   ├── test_retries.py
    │   │   ├── test_stream.py
    │   │   └── util.py
    │   ├── test_new_client.py
    │   ├── test_openai
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── docs
    │   │   │   ├── test_concepts.py
    │   │   │   ├── test_docs.py
    │   │   │   ├── test_examples.py
    │   │   │   ├── test_hub.py
    │   │   │   ├── test_mkdocs.py
    │   │   │   ├── test_posts.py
    │   │   │   └── test_prompt_tips.py
    │   │   ├── evals
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.md
    │   │   │   ├── test_classification_enums.py
    │   │   │   ├── test_classification_literals.py
    │   │   │   ├── test_entities.py
    │   │   │   ├── test_extract_users.py
    │   │   │   └── test_sentiment_analysis.py
    │   │   ├── test_attr.py
    │   │   ├── test_hooks.py
    │   │   ├── test_modes.py
    │   │   ├── test_multimodal.py
    │   │   ├── test_multitask.py
    │   │   ├── test_parallel.py
    │   │   ├── test_patch.py
    │   │   ├── test_response.py
    │   │   ├── test_responses_tools.py
    │   │   ├── test_retries.py
    │   │   ├── test_simple_types.py
    │   │   ├── test_stream.py
    │   │   ├── test_validation_context.py
    │   │   ├── test_validators.py
    │   │   └── util.py
    │   ├── test_perplexity
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── test_modes.py
    │   │   └── util.py
    │   ├── test_vertexai
    │   │   ├── __init__.py
    │   │   ├── test_deprecated_async.py
    │   │   ├── test_format.py
    │   │   ├── test_message_parser.py
    │   │   ├── test_modes.py
    │   │   ├── test_retries.py
    │   │   ├── test_simple_types.py
    │   │   ├── test_stream.py
    │   │   └── util.py
    │   └── test_writer
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── evals
    │   │       ├── __init__.py
    │   │       ├── test_classification_enums.py
    │   │       ├── test_classification_literals.py
    │   │       ├── test_entities.py
    │   │       ├── test_extract_users.py
    │   │       └── test_sentiment_analysis.py
    │   │   ├── test_format_common_models.py
    │   │   ├── test_format_difficult_models.py
    │   │   ├── test_retries.py
    │   │   ├── test_streaming.py
    │   │   └── util.py
    ├── test_auto_client.py
    ├── test_dict_operations.py
    ├── test_dict_operations_validation.py
    ├── test_distil.py
    ├── test_dynamic_model_creation.py
    ├── test_fizzbuzz_fix.py
    ├── test_formatting.py
    ├── test_function_calls.py
    ├── test_json_extraction.py
    ├── test_json_extraction_edge_cases.py
    ├── test_message_processing.py
    ├── test_multimodal.py
    ├── test_multitask.py
    ├── test_patch.py
    ├── test_process_response.py
    ├── test_response_model_conversion.py
    ├── test_schema.py
    ├── test_simple_types.py
    └── test_utils.py
└── uv.lock


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source =
3 |     instructor/
4 | omit =
5 |     instructor/cli/*
6 | 


--------------------------------------------------------------------------------
/.cursor/rules/documentation-sync.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: when making code changes or adding documentation
 3 | globs: ["*.py", "*.md"]
 4 | alwaysApply: true
 5 | ---
 6 | 
 7 | - When making code changes:
 8 |     - Update related documentation files to reflect the changes
 9 |     - Check docstrings and type hints are up to date
10 |     - Update any example code in markdown files
11 |     - Review README.md if the changes affect installation or usage
12 | 
13 | - When creating new markdown files:
14 |     - Add the file to mkdocs.yml under the appropriate section
15 |     - Follow the existing hierarchy and indentation
16 |     - Use descriptive nav titles
17 |     - Example:
18 |         ```yaml
19 |         nav:
20 |           - Home: index.md
21 |           - Guides:
22 |               - Getting Started: guides/getting-started.md
23 |               - Your New File: guides/your-new-file.md
24 |         ```
25 | 
26 | - For API documentation:
27 |     - Ensure new functions/classes are documented
28 |     - Include type hints and docstrings
29 |     - Add usage examples
30 |     - Update API reference docs if auto-generated
31 | 
32 | - Documentation Quality:
33 |     - Write at grade 10 reading level (see simple-language.mdc)
34 |     - Include working code examples
35 |     - Add links to related documentation
36 |     - Use consistent formatting and style 


--------------------------------------------------------------------------------
/.cursor/rules/followups.mdc:
--------------------------------------------------------------------------------
1 | ---
2 | description: when AI agents are collaborating on code
3 | globs: "*"
4 | alwaysApply: true
5 | ---
6 | Make sure to come up with follow-up hot keys. They should be thoughtful and actionable and result in small additional code changes based on the context that you have available.
7 | 
8 | using [J], [K], [L]
9 | 


--------------------------------------------------------------------------------
/.cursor/rules/simple-language.mdc:
--------------------------------------------------------------------------------
1 | ---
2 | description: when writing documentation
3 | globs: *.md
4 | alwaysApply: false
5 | ---
6 | 
7 | - When writing documents and concepts make sure that you write at a grade 10 reading level 
8 | - make sure every code block has complete imports and makes no references to previous code blocks, each one needs to be self contained
9 | 


--------------------------------------------------------------------------------
/.cursorignore:
--------------------------------------------------------------------------------
1 | # Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv)
2 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: jxnl


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | ---
 5 | 
 6 | - [ ] This is actually a bug report.
 7 | - [ ] I am not getting good LLM Results
 8 | - [ ] I have tried asking for help in the community on discord or discussions and have not received a response.
 9 | - [ ] I have tried searching the documentation and have not found an answer.
10 | 
11 | **What Model are you using?**
12 | 
13 | - [ ] gpt-3.5-turbo
14 | - [ ] gpt-4-turbo
15 | - [ ] gpt-4
16 | - [ ] Other (please specify)
17 | 
18 | **Describe the bug**
19 | A clear and concise description of what the bug is.
20 | 
21 | **To Reproduce**
22 | Steps to reproduce the behavior, including code snippets of the model and the input data and openai response.
23 | 
24 | **Expected behavior**
25 | A clear and concise description of what you expected to happen.
26 | 
27 | **Screenshots**
28 | If applicable, add screenshots to help explain your problem.
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | ---
 5 | 
 6 | **Is your feature request related to a problem? Please describe.**
 7 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
 8 | 
 9 | **Describe the solution you'd like**
10 | A clear and concise description of what you want to happen.
11 | 
12 | **Describe alternatives you've considered**
13 | A clear and concise description of any alternative solutions or features you've considered.
14 | 
15 | **Additional context**
16 | Add any other context or screenshots about the feature request here.
17 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | > Please use conventional commits to describe your changes. For example, `feat: add new feature` or `fix: fix a bug`. If you are unsure, leave the title as `...` and AI will handle it.
 2 | 
 3 | ## Describe your changes
 4 | 
 5 | ...
 6 | 
 7 | ## Issue ticket number and link
 8 | 
 9 | ## Checklist before requesting a review
10 | 
11 | - [ ] I have performed a self-review of my code
12 | - [ ] If it is a core feature, I have added thorough tests.
13 | - [ ] If it is a core feature, I have added documentation.
14 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 |     groups:
13 |       poetry:
14 |         patterns: ["*"]
15 | 


--------------------------------------------------------------------------------
/.github/workflows/ai-label.yml:
--------------------------------------------------------------------------------
 1 | name: AI Labeler
 2 | 
 3 | on:
 4 |   issues:
 5 |     types: [opened, reopened]
 6 |   pull_request:
 7 |     types: [opened, reopened]
 8 | 
 9 | jobs:
10 |   ai-labeler:
11 |     runs-on: ubuntu-latest
12 |     permissions:
13 |       contents: read
14 |       issues: write
15 |       pull-requests: write
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |       - uses: jlowin/ai-labeler@v0.4.0
19 |         with:
20 |           include-repo-labels: true
21 |           openai-api-key: ${{ secrets.OPENAI_API_KEY }}
22 | 


--------------------------------------------------------------------------------
/.github/workflows/claude.yml:
--------------------------------------------------------------------------------
 1 | name: Claude Code
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 |   pull_request_review_comment:
 7 |     types: [created]
 8 |   issues:
 9 |     types: [opened, assigned]
10 |   pull_request_review:
11 |     types: [submitted]
12 | 
13 | jobs:
14 |   claude:
15 |     if: |
16 |       (contains(fromJSON('["jxnl","ivanleomk"]'), github.actor)) &&
17 |       ((github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
18 |       (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
19 |       (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
20 |       (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))))
21 |     runs-on: ubuntu-latest
22 |     permissions:
23 |       contents: read
24 |       pull-requests: read
25 |       issues: read
26 |       id-token: write
27 |     steps:
28 |       - name: Checkout repository
29 |         uses: actions/checkout@v4
30 |         with:
31 |           fetch-depth: 1
32 | 
33 |       - name: Run Claude Code
34 |         id: claude
35 |         uses: anthropics/claude-code-action@beta
36 |         with:
37 |           anthropic_api_key: ${{ secrets.CLAUDE_API_KEY }}
38 | 


--------------------------------------------------------------------------------
/.github/workflows/evals.yml:
--------------------------------------------------------------------------------
 1 | name: Weekly Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: "0 0 * * 0" # Runs at 00:00 UTC every Sunday
 7 |   push:
 8 |     branches: [main]
 9 |     paths-ignore:
10 |       - "**" # Ignore all paths to ensure it only triggers on schedule
11 | 
12 | jobs:
13 |   weekly-tests:
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v2
18 | 
19 |       - name: Set up Python 3.11
20 |         uses: actions/setup-python@v4
21 |         with:
22 |           python-version: 3.11
23 |           cache: "poetry"
24 | 
25 |       - name: Install Poetry
26 |         uses: snok/install-poetry@v1.3.1
27 | 
28 |       - name: Install dependencies
29 |         run: poetry install --with dev,anthropic
30 | 
31 |       - name: Run all tests
32 |         run: poetry run pytest tests/
33 |         env:
34 |           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
35 | 


--------------------------------------------------------------------------------
/.github/workflows/pyright.yml:
--------------------------------------------------------------------------------
 1 | name: Pyright
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 |   push:
 7 |     branches: [main]
 8 | 
 9 | env:
10 |   WORKING_DIRECTORY: "."
11 |   PYRIGHT_OUTPUT_FILENAME: "pyright.log"
12 | 
13 | jobs:
14 |   Pyright:
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       matrix:
18 |         os: [ubuntu-latest, macos-latest]
19 |         python-version: ["3.9", "3.10", "3.11"]
20 | 
21 |     steps:
22 |       - name: Checkout code
23 |         uses: actions/checkout@v3
24 |       - name: Install uv
25 |         uses: astral-sh/setup-uv@v4
26 |         with:
27 |           enable-cache: true
28 |       - name: Set up Python
29 |         run: uv python install ${{ matrix.python-version }}
30 |       - name: Install the project
31 |         run: uv sync --all-extras
32 |       - name: Run pyright
33 |         run: uv run pyright
34 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   release:
20 |     runs-on: ubuntu-latest
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v2
24 |       - name: Install uv
25 |         uses: astral-sh/setup-uv@v4
26 |         with:
27 |           enable-cache: true
28 |       - name: Set up Python
29 |         run: uv python install 3.10
30 |       - name: Install the project
31 |         run: uv sync --all-extras
32 |       - name: Build the project
33 |         run: uv build
34 |       - name: Build and publish Python package
35 |         run: uv publish
36 |         env:
37 |           UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
38 | 


--------------------------------------------------------------------------------
/.github/workflows/ruff.yml:
--------------------------------------------------------------------------------
 1 | name: Ruff
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |     branches: [main]
 7 | 
 8 | env:
 9 |   WORKING_DIRECTORY: "."
10 |   RUFF_OUTPUT_FILENAME: "ruff.log"
11 |   CUSTOM_FLAGS: ""
12 |   CUSTOM_PACKAGES: "instructor examples tests"
13 | 
14 | jobs:
15 |   Ruff:
16 |     runs-on: ${{ matrix.os }}
17 |     strategy:
18 |       matrix:
19 |         os: [ubuntu-latest, macos-latest]
20 |     steps:
21 |       - name: Checkout code
22 |         uses: actions/checkout@v3
23 |       - name: Install uv
24 |         uses: astral-sh/setup-uv@v4
25 |         with:
26 |           enable-cache: true
27 |       - name: Set up Python
28 |         run: uv python install 3.9
29 |       - name: Install the project
30 |         run: uv sync --all-extras
31 |       - name: Run Continuous Integration Action
32 |         uses: astral-sh/ruff-action@v3
33 |       - name: Upload Artifacts
34 |         uses: actions/upload-artifact@v4
35 |         with:
36 |           name: ruff-log
37 |           path: ${{ env.WORKING_DIRECTORY }}/${{ env.RUFF_OUTPUT_FILENAME }}
38 | 


--------------------------------------------------------------------------------
/.github/workflows/test_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Test Docs
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | jobs:
 8 |   release:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     strategy:
12 |       matrix:
13 |         python-version: ["3.11"]
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v2
17 | 
18 |       - name: Install system dependencies
19 |         run: |
20 |           sudo apt-get update
21 |           sudo apt-get install -y graphviz libcairo2-dev xdg-utils
22 | 
23 |       - name: Install Poetry
24 |         uses: snok/install-poetry@v1.3.1
25 | 
26 |       - name: Set up Python ${{ matrix.python-version }}
27 |         uses: actions/setup-python@v4
28 |         with:
29 |           python-version: ${{ matrix.python-version }}
30 |           cache: "poetry"
31 |       - name: Install uv
32 |         uses: astral-sh/setup-uv@v4
33 |       - name: Install the project
34 |         run: uv sync --all-extras
35 |       - name: Run tests
36 |         run: uv run pytest tests/llm/test_openai/docs
37 |         env:
38 |           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
39 | 


--------------------------------------------------------------------------------
/.grit/.gitignore:
--------------------------------------------------------------------------------
1 | .gritmodules
2 | *.log
3 | 


--------------------------------------------------------------------------------
/.grit/grit.yaml:
--------------------------------------------------------------------------------
1 | version: 0.0.1
2 | patterns:
3 |   - name: github.com/getgrit/python#openai
4 |     level: info
5 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     rev: v0.9.9 # Ruff version
 4 |     hooks:
 5 |       - id: ruff # Run the linter.
 6 |         name: Run Linter Check (Ruff)
 7 |         args: [ --fix, --unsafe-fixes ]
 8 |         files: ^(instructor|tests|examples)/
 9 |       - id: ruff-format       # Run the formatter.
10 |         name: Run Formatter (Ruff)
11 | 
12 |   - repo: local
13 |     hooks:
14 |       - id: uv-lock-check
15 |         name: Check uv.lock is up-to-date
16 |         entry: uv
17 |         args: [lock, --check]
18 |         language: system
19 |         files: ^(pyproject\.toml|uv\.lock)$
20 |         pass_filenames: false
21 |         
22 |       - id: uv-sync-check
23 |         name: Verify dependencies can be installed
24 |         entry: uv
25 |         args: [sync, --check]
26 |         language: system
27 |         files: ^(pyproject\.toml|uv\.lock)$
28 |         pass_filenames: false
29 | 


--------------------------------------------------------------------------------
/.ruff.toml:
--------------------------------------------------------------------------------
 1 | # Exclude a variety of commonly ignored directories.
 2 | exclude = [
 3 |     ".bzr",
 4 |     ".direnv",
 5 |     ".eggs",
 6 |     ".git",
 7 |     ".git-rewrite",
 8 |     ".hg",
 9 |     ".mypy_cache",
10 |     ".nox",
11 |     ".pants.d",
12 |     ".pytype",
13 |     ".ruff_cache",
14 |     ".svn",
15 |     ".tox",
16 |     ".venv",
17 |     "__pypackages__",
18 |     "_build",
19 |     "buck-out",
20 |     "build",
21 |     "dist",
22 |     "node_modules",
23 |     "venv",
24 | ]
25 | 
26 | # Same as Black.
27 | line-length = 88
28 | output-format = "grouped"
29 | 
30 | target-version = "py39"
31 | 
32 | [lint]
33 | select = [
34 |   # bugbear rules
35 |   "B",
36 |   # remove unused imports
37 |   "F401",
38 |   # bare except statements
39 |   "E722",
40 |   # unused arguments
41 |   "ARG",
42 |   # pyupgrade
43 |   "UP",
44 | ]
45 | ignore = [
46 |   # mutable defaults
47 |   "B006",
48 |   "B018",
49 | ]
50 | 
51 | unfixable = [
52 |   # disable auto fix for print statements
53 |   "T201",
54 |   "T203",
55 | ]
56 | ignore-init-module-imports = true
57 | 
58 | [lint.extend-per-file-ignores]
59 | "instructor/distil.py" = ["ARG002"]
60 | "tests/test_distil.py" = ["ARG001"]
61 | "tests/test_patch.py" = ["ARG001"]
62 | "examples/task_planner/task_planner_topological_sort.py" = ["ARG002"]
63 | "examples/citation_with_extraction/main.py" = ["ARG001"]
64 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/.vscode/settings.json


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Jason Liu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/build_mkdocs.sh:
--------------------------------------------------------------------------------
1 | pip install -r requirements.txt
2 | pip install -r requirements-doc.txt
3 | mkdocs build
4 | 


--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: API Reference Guide
 3 | description: Explore the comprehensive API reference with details on instructors, validation, iteration, and function calls.
 4 | ---
 5 | 
 6 | # API Reference
 7 | 
 8 | ::: instructor.from_openai
 9 | 
10 | ::: instructor.dsl.validators
11 | 
12 | ::: instructor.dsl.iterable
13 | 
14 | ::: instructor.dsl.partial
15 | 
16 | ::: instructor.dsl.parallel
17 | 
18 | ::: instructor.dsl.maybe
19 | 
20 | ::: instructor.function_calls
21 | 


--------------------------------------------------------------------------------
/docs/blog/.authors.yml:
--------------------------------------------------------------------------------
 1 | authors:
 2 |   jxnl:
 3 |     name: Jason Liu
 4 |     description: Creator
 5 |     avatar: https://avatars.githubusercontent.com/u/4852235?v=4
 6 |     url: https://twitter.com/intent/follow?screen_name=jxnlco
 7 |   ivanleomk:
 8 |     name: Ivan Leo
 9 |     description: Contributor
10 |     avatar: https://pbs.twimg.com/profile_images/1838778744468836353/utYfioiO_400x400.jpg
11 |     url: https://twitter.com/intent/follow?screen_name=ivanleomk
12 |   anmol:
13 |     name: Anmol Jawandha
14 |     description: Contributor
15 |     avatar: https://pbs.twimg.com/profile_images/1248544843556466693/PgxUIeBs_400x400.jpg
16 |   joschkabraun:
17 |     name: Joschka Braun
18 |     description: Contributor
19 |     avatar: https://pbs.twimg.com/profile_images/1601251353531224065/PYpqKsjL_400x400.jpg
20 |     url: https://twitter.com/joschkabraun
21 |   sarahchieng:
22 |     name: Sarah Chieng
23 |     description: Contributor
24 |     avatar: https://pbs.twimg.com/profile_images/1755455116595834880/Hxh5ceRZ_400x400.jpg
25 |     url: https://twitter.com/sarahchieng
26 |   zilto:
27 |     name: Thierry Jean
28 |     description: Contributor
29 |     avatar: https://avatars.githubusercontent.com/u/68975210?v=4
30 |     url: https://www.linkedin.com/in/thierry-jean/
31 |   yanomaly:
32 |     name: Yan
33 |     description: Contributor
34 |     avatar: https://avatars.githubusercontent.com/u/87994542?v=4
35 | 


--------------------------------------------------------------------------------
/docs/blog/posts/aisummit-2023.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | authors:
 3 | - jxnl
 4 | categories:
 5 | - Pydantic
 6 | comments: true
 7 | date: 2023-11-02
 8 | description: Explore insights on utilizing Pydantic for effective prompt engineering
 9 |   in this AI Engineer Summit keynote.
10 | draft: false
11 | tags:
12 | - Pydantic
13 | - Prompt Engineering
14 | - AI Summit
15 | - Machine Learning
16 | - Data Validation
17 | ---
18 | 
19 | # AI Engineer Keynote: Pydantic is all you need
20 | 
21 | [![Pydantic is all you need](https://img.youtube.com/vi/yj-wSRJwrrc/0.jpg)](https://www.youtube.com/watch?v=yj-wSRJwrrc)
22 | 
23 | [Click here to watch the full talk](https://www.youtube.com/watch?v=yj-wSRJwrrc)
24 | 
25 | <!-- more -->
26 | 
27 | Last month, I ventured back onto the speaking circuit at the inaugural [AI Engineer Summit](https://www.ai.engineer/summit), sharing insights on leveraging [Pydantic](https://docs.pydantic.dev/latest/) for effective prompt engineering. I dove deep into what is covered in our documentation and standard blog posts,
28 | 
29 | I'd genuinely appreciate any feedback on the talk – every bit helps in refining the art. So, take a moment to check out the [full talk here](https://youtu.be/yj-wSRJwrrc?si=vGMIqtTapbIN8SLz), and let's continue pushing the boundaries of what's possible.


--------------------------------------------------------------------------------
/docs/blog/posts/course.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | authors:
 3 | - jxnl
 4 | categories:
 5 | - OpenAI
 6 | comments: true
 7 | date: 2024-02-14
 8 | description: Discover a free one-hour course on Weights and Biases covering essential
 9 |   techniques for language models.
10 | draft: false
11 | slug: weights-and-biases-course
12 | tags:
13 | - Weights and Biases
14 | - AI course
15 | - machine learning
16 | - language models
17 | - free resources
18 | ---
19 | 
20 | # Free course on Weights and Biases
21 | 
22 | I just released a free course on wits and biases. It goes over the material from [tutorial](../../tutorials/1-introduction.ipynb). Check it out at [wandb.courses](https://www.wandb.courses/courses/steering-language-models) its free and open to everyone and just under an hour long!
23 | 
24 | [![](img/course.png)](https://www.wandb.courses/courses/steering-language-models)
25 | 
26 | > Click the image to access the course


--------------------------------------------------------------------------------
/docs/blog/posts/img/Structured_Output_Extraction.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/Structured_Output_Extraction.gif


--------------------------------------------------------------------------------
/docs/blog/posts/img/agent_mcp_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/agent_mcp_example.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/async_type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/async_type.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/chain-of-density.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/chain-of-density.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/classification-logfire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/classification-logfire.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/claude_desktop_mcp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/claude_desktop_mcp.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/claude_desktop_screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/claude_desktop_screenshot.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/course.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/course.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/cursor_mcp_agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/cursor_mcp_agent.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/cursor_mcp_support.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/cursor_mcp_support.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/distil_openai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/distil_openai.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/downloads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/downloads.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/dumb_rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/dumb_rag.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/gemini_citations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/gemini_citations.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/generator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/generator.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/image-logfire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/image-logfire.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/instructor-autocomplete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/instructor-autocomplete.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/iterable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/iterable.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/langsmith.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/langsmith.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/logfire-asyncio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/logfire-asyncio.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/logfire-stream.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/logfire-stream.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/logfire-sync-fastapi-arguments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/logfire-sync-fastapi-arguments.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/logfire-sync-pydantic-validation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/logfire-sync-pydantic-validation.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/mcp_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/mcp_architecture.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/mcp_stars.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/mcp_stars.webp


--------------------------------------------------------------------------------
/docs/blog/posts/img/meta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/meta.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/parea/form-mode.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/parea/form-mode.gif


--------------------------------------------------------------------------------
/docs/blog/posts/img/parea/trace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/parea/trace.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/parea/validation-error-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/parea/validation-error-chart.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/query_understanding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/query_understanding.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/statista-image.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/statista-image.jpeg


--------------------------------------------------------------------------------
/docs/blog/posts/img/style_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_1.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/style_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_2.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/style_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_3.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/style_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_4.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/style_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_5.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/style_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_6.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/type.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/untidy_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/untidy_table.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/validation-logfire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/validation-logfire.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/with_completion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/with_completion.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/youtube-clips.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/youtube-clips.gif


--------------------------------------------------------------------------------
/docs/blog/posts/img/youtube-flashcards/annotations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/youtube-flashcards/annotations.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/youtube-flashcards/flashcards.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/youtube-flashcards/flashcards.png


--------------------------------------------------------------------------------
/docs/blog/posts/img/youtube-flashcards/telemetry.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/youtube-flashcards/telemetry.gif


--------------------------------------------------------------------------------
/docs/concepts/alias.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Pydantic Aliases Overview
3 | description: Explore the concept of aliases in Pydantic. Discover the latest documentation and features for better data validation.
4 | ---
5 | 
6 | !!! warning "This page is a work in progress"
7 | 
8 |     This page is a work in progress. Check out [Pydantic's documentation](https://docs.pydantic.dev/latest/concepts/alias/)
9 | 


--------------------------------------------------------------------------------
/docs/concepts/enums.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Using Enums and Literals in Pydantic for Role Management
 3 | description: Learn how to implement Enums and Literals in Pydantic to manage standardized user roles with a fallback option.
 4 | ---
 5 | 
 6 | To prevent data misalignment, we can use Enums for standardized fields. Always include an "Other" option as a fallback so the model can signal uncertainty.
 7 | 
 8 | ```python hl_lines="7 12"
 9 | from pydantic import BaseModel, Field
10 | from enum import Enum
11 | 
12 | 
13 | class Role(Enum):
14 |     PRINCIPAL = "PRINCIPAL"
15 |     TEACHER = "TEACHER"
16 |     STUDENT = "STUDENT"
17 |     OTHER = "OTHER"
18 | 
19 | 
20 | class UserDetail(BaseModel):
21 |     age: int
22 |     name: str
23 |     role: Role = Field(
24 |         description="Correctly assign one of the predefined roles to the user."
25 |     )
26 | ```
27 | 
28 | If you're having a hard time with `Enum` an alternative is to use `Literal` instead.
29 | 
30 | ```python hl_lines="4"
31 | from typing import Literal
32 | from pydantic import BaseModel
33 | 
34 | 
35 | class UserDetail(BaseModel):
36 |     age: int
37 |     name: str
38 |     role: Literal["PRINCIPAL", "TEACHER", "STUDENT", "OTHER"]
39 | ```
40 | 


--------------------------------------------------------------------------------
/docs/concepts/response.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/concepts/response.png


--------------------------------------------------------------------------------
/docs/concepts/typeadapter.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Pydantic Type Adapter Overview
3 | description: Explore the ongoing updates of Pydantic's Type Adapter concepts and access the official documentation.
4 | ---
5 | 
6 | !!! warning "This page is a work in progress"
7 | 
8 |     This page is a work in progress. Check out [Pydantic's documentation](https://docs.pydantic.dev/latest/concepts/type_adapter/)
9 | 


--------------------------------------------------------------------------------
/docs/concepts/typeddicts.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Using TypedDicts with OpenAI API
 3 | description: Learn how to utilize TypedDicts in Python with the OpenAI API for structured data responses.
 4 | ---
 5 | 
 6 | # TypedDicts
 7 | 
 8 | We also support typed dicts.
 9 | 
10 | ```python
11 | from typing_extensions import TypedDict
12 | from openai import OpenAI
13 | import instructor
14 | 
15 | 
16 | class User(TypedDict):
17 |     name: str
18 |     age: int
19 | 
20 | 
21 | client = instructor.from_openai(OpenAI())
22 | 
23 | 
24 | response = client.chat.completions.create(
25 |     model="gpt-3.5-turbo",
26 |     response_model=User,
27 |     messages=[
28 |         {
29 |             "role": "user",
30 |             "content": "Timothy is a man from New York who is turning 32 this year",
31 |         }
32 |     ],
33 | )
34 | ```


--------------------------------------------------------------------------------
/docs/concepts/union.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Using Union Types in Pydantic Models
 3 | description: Learn how to implement Union types in Pydantic models to handle multiple action types in Python.
 4 | ---
 5 | 
 6 | !!! note "Redirect Notice"
 7 |     This page has been consolidated into the comprehensive [Union Types](./unions.md) guide.
 8 |     Please visit that page for complete information about working with union types in Instructor.
 9 | 
10 | <!-- Redirect to the consolidated page -->
11 | <meta http-equiv="refresh" content="0; url=./unions.md">
12 | 


--------------------------------------------------------------------------------
/docs/examples/db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/examples/db.png


--------------------------------------------------------------------------------
/docs/examples/entity_resolution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/examples/entity_resolution.png


--------------------------------------------------------------------------------
/docs/examples/groq.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: 'Using Groq for Inference: Setup and Example'
 3 | description: Learn how to use Groq for inference with the mixtral-8x7b model, including API setup and a practical Python example.
 4 | ---
 5 | 
 6 | # Structured Outputs using Groq
 7 | Instead of using openai or antrophic you can now also use groq for inference by using from_groq.
 8 | 
 9 | The examples are using mixtral-8x7b model.
10 | 
11 | ## GroqCloud API
12 | To use groq you need to obtain a groq API key.
13 | Goto [groqcloud](https://console.groq.com) and login. Select API Keys from the left menu and then select Create API key to create a new key.
14 | 
15 | ## Use example
16 | Some pip packages need to be installed to use the example:
17 | ```
18 | pip install instructor groq pydantic openai anthropic
19 | ```
20 | You need to export the groq API key:
21 | ```
22 | export GROQ_API_KEY=<your-api-key>
23 | ```
24 | 
25 | An example:
26 | ```python
27 | import os
28 | from pydantic import BaseModel, Field
29 | from typing import List
30 | from groq import Groq
31 | import instructor
32 | 
33 | 
34 | class Character(BaseModel):
35 |     name: str
36 |     fact: List[str] = Field(..., description="A list of facts about the subject")
37 | 
38 | 
39 | client = Groq(
40 |     api_key=os.environ.get('GROQ_API_KEY'),
41 | )
42 | 
43 | client = instructor.from_groq(client, mode=instructor.Mode.TOOLS)
44 | 
45 | resp = client.chat.completions.create(
46 |     model="mixtral-8x7b-32768",
47 |     messages=[
48 |         {
49 |             "role": "user",
50 |             "content": "Tell me about the company Tesla",
51 |         }
52 |     ],
53 |     response_model=Character,
54 | )
55 | print(resp.model_dump_json(indent=2))
56 | """
57 | {
58 |   "name": "Tesla",
59 |   "fact": [
60 |     "electric vehicle manufacturer",
61 |     "solar panel producer",
62 |     "based in Palo Alto, California",
63 |     "founded in 2003 by Elon Musk"
64 |   ]
65 | }
66 | """
67 | ```
68 | You can find another example called groq_example2.py under examples/groq of this repository.
69 | 


--------------------------------------------------------------------------------
/docs/examples/knowledge_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/examples/knowledge_graph.png


--------------------------------------------------------------------------------
/docs/examples/mistral.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Using MistralAI for Structured Outputs
 3 | description: Learn how to use MistralAI models for inference, including setup, API key generation, and example code.
 4 | ---
 5 | 
 6 | # Structured Outputs using Mistral
 7 | You can now also use mistralai models for inference by using from_mistral.
 8 | 
 9 | The examples are using mistral-large-latest.
10 | 
11 | ## MistralAI API
12 | To use mistral you need to obtain a mistral API key.
13 | Goto [mistralai](https://mistral.ai/) click on Build Now and login. Select API Keys from the left menu and then select 
14 | Create API key to create a new key.
15 | 
16 | ## Use example
17 | Some pip packages need to be installed to use the example:
18 | ```
19 | pip install instructor mistralai pydantic
20 | ```
21 | You need to export the mistral API key:
22 | ```
23 | export MISTRAL_API_KEY=<your-api-key>
24 | ```
25 | 
26 | An example:
27 | ```python
28 | import os
29 | from pydantic import BaseModel
30 | from mistralai import Mistral
31 | from instructor import from_mistral, Mode
32 | 
33 | 
34 | class UserDetails(BaseModel):
35 |     name: str
36 |     age: int
37 | 
38 | 
39 | # enables `response_model` in chat call
40 | client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"))
41 | 
42 | instructor_client = from_mistral(
43 |     client=client,
44 |     model="mistral-large-latest",
45 |     mode=Mode.MISTRAL_TOOLS,
46 |     max_tokens=1000,
47 | )
48 | 
49 | resp = instructor_client.messages.create(
50 |     response_model=UserDetails,
51 |     messages=[{"role": "user", "content": "Jason is 10"}],
52 |     temperature=0,
53 | )
54 | 
55 | print(resp)
56 | #> name='Jason' age=10
57 | 
58 | # output: UserDetails(name='Jason', age=10)
59 | ```
60 | 


--------------------------------------------------------------------------------
/docs/examples/open_source.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Open Source Model Providers for Chat API
 3 | description: Explore tested open source models compatible with the OpenAI chat API, including OpenRouter, Perplexity, and RunPod LLMs.
 4 | ---
 5 | 
 6 | # Instructor with open source models
 7 | Instructor works with Open source model providers that support the [OpenAI API chat endpoint](https://platform.openai.com/docs/api-reference/chat)
 8 | 
 9 | See examples README [here](https://github.com/jxnl/instructor/tree/main/examples/open_source_examples)
10 | 
11 | # Currently tested open source model providers
12 | - [OpenRouter](https://openrouter.ai/)
13 | - [Perplexity](https://www.perplexity.ai/)
14 | - [RunPod TheBloke LLMs](https://github.com/TheBlokeAI/dockerLLM/blob/main/README_Runpod_LocalLLMsUI.md) **
15 | 
16 | 
17 | ** This utilizes text-generation-webui w/ Openai plugin under the hood. 


--------------------------------------------------------------------------------
/docs/help.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Getting Started with Instructor: Help and Resources
 3 | description: Explore key resources for getting help with Instructor, including Discord, blog, concepts, cookbooks, and GitHub discussions.
 4 | ---
 5 | 
 6 | # Getting help with Instructor
 7 | 
 8 | If you need help getting started with Instructor or with advanced usage, the following sources may be useful.
 9 | 
10 | ## :material-discord: Discord
11 | 
12 | The [Discord](https://discord.gg/bD9YE9JArw) is a great place to ask questions and get help from the community.
13 | 
14 | ## :material-creation: Concepts
15 | 
16 | The [concepts](concepts/prompting.md) section explains the core concepts of Instructor and how to prompt with models.
17 | 
18 | ## :material-chef-hat: Cookbooks
19 | 
20 | The [cookbooks](examples/index.md) are a great place to start. They contain a variety of examples that demonstrate how to use Instructor in different scenarios.
21 | 
22 | ## :material-book: Blog
23 | 
24 | The [blog](blog/index.md) contains articles that explain how to use Instructor in different scenarios.
25 | 
26 | ## :material-github: GitHub Discussions
27 | 
28 | [GitHub discussions](https://github.com/jxnl/instructor/discussions) are useful for asking questions, your question and the answer will help everyone.
29 | 
30 | ## :material-github: GitHub Issues
31 | 
32 | [GitHub issues](https://github.com/jxnl/instructor/issues) are useful for reporting bugs or requesting new features.
33 | 
34 | ## :material-twitter: Twitter
35 | 
36 | You can also reach out to me on [Twitter](https://twitter.com/jxnlco) if you have any questions or ideas.
37 | 


--------------------------------------------------------------------------------
/docs/hooks/hide_lines.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | import mkdocs.plugins
 3 | from pymdownx import highlight  # type: ignore
 4 | 
 5 | 
 6 | @mkdocs.plugins.event_priority(0)
 7 | # pylint: disable=unused-argument
 8 | def on_startup(command: str, dirty: bool) -> None:  # noqa: ARG001
 9 |     """Monkey patch Highlight extension to hide lines in code blocks."""
10 |     original = highlight.Highlight.highlight  # type: ignore
11 | 
12 |     def patched(self: Any, src: str, *args: Any, **kwargs: Any) -> Any:
13 |         lines = src.splitlines(keepends=True)
14 | 
15 |         final_lines = []
16 | 
17 |         remove_lines = False
18 |         for line in lines:
19 |             if line.strip() == "# <%hide%>":
20 |                 remove_lines = not remove_lines
21 |             elif not remove_lines:
22 |                 final_lines.append(line)
23 | 
24 |         return original(self, "".join(final_lines), *args, **kwargs)
25 | 
26 |     highlight.Highlight.highlight = patched
27 | 


--------------------------------------------------------------------------------
/docs/img/action_items.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/action_items.png


--------------------------------------------------------------------------------
/docs/img/analogical_prompting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/analogical_prompting.png


--------------------------------------------------------------------------------
/docs/img/cosp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/cosp.png


--------------------------------------------------------------------------------
/docs/img/cosp_entropy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/cosp_entropy.png


--------------------------------------------------------------------------------
/docs/img/cosp_redundancy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/cosp_redundancy.png


--------------------------------------------------------------------------------
/docs/img/error2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/error2.png


--------------------------------------------------------------------------------
/docs/img/faithful_cot_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/faithful_cot_example.png


--------------------------------------------------------------------------------
/docs/img/ide_support.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/ide_support.png


--------------------------------------------------------------------------------
/docs/img/more.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/more.png


--------------------------------------------------------------------------------
/docs/img/mrr_eqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/mrr_eqn.png


--------------------------------------------------------------------------------
/docs/img/mutual_information.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/mutual_information.png


--------------------------------------------------------------------------------
/docs/img/partial.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/partial.gif


--------------------------------------------------------------------------------
/docs/img/partial_streaming.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/partial_streaming.gif


--------------------------------------------------------------------------------
/docs/img/plan_and_solve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/plan_and_solve.png


--------------------------------------------------------------------------------
/docs/img/pot.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/pot.jpeg


--------------------------------------------------------------------------------
/docs/img/recall_eqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/recall_eqn.png


--------------------------------------------------------------------------------
/docs/img/retriever.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/retriever.png


--------------------------------------------------------------------------------
/docs/img/universal_self_adaptive_prompting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/universal_self_adaptive_prompting.png


--------------------------------------------------------------------------------
/docs/img/universal_self_consistency.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/universal_self_consistency.png


--------------------------------------------------------------------------------
/docs/img/youtube.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/youtube.gif


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Installing Instructor with Pip
 3 | description: Learn how to install Instructor and its dependencies using pip for Python 3.9+. Simple setup guide included.
 4 | ---
 5 | 
 6 | Installation is as simple as:
 7 | 
 8 | ```bash
 9 | pip install instructor
10 | ```
11 | 
12 | Instructor has a few dependencies:
13 | 
14 | - [`openai`](https://pypi.org/project/openai/): OpenAI's Python client.
15 | - [`typer`](https://pypi.org/project/typer/): Build great CLIs. Easy to code. Based on Python type hints.
16 | - [`docstring-parser`](https://pypi.org/project/docstring-parser/): A parser for Python docstrings, to improve the experience of working with docstrings in jsonschema.
17 | - [`pydantic`](https://pypi.org/project/pydantic/): Data validation and settings management using python type annotations.
18 | 
19 | If you've got Python 3.9+ and `pip` installed, you're good to go.
20 | 


--------------------------------------------------------------------------------
/docs/javascripts/katex.js:
--------------------------------------------------------------------------------
 1 | document$.subscribe(({ body }) => { 
 2 |     renderMathInElement(body, {
 3 |       delimiters: [
 4 |         { left: "$$",  right: "$$",  display: true },
 5 |         { left: "$",   right: "$",   display: false },
 6 |         { left: "\\(", right: "\\)", display: false },
 7 |         { left: "\\[", right: "\\]", display: true }
 8 |       ],
 9 |     })
10 |   })


--------------------------------------------------------------------------------
/docs/jobs.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/jobs.md


--------------------------------------------------------------------------------
/docs/learning/index.md:
--------------------------------------------------------------------------------
 1 | ## Structured Outputs by Example
 2 | 
 3 | This section contains straightforward examples for using Instructor to extract structured data from language models. Each example progresses from basic to more advanced concepts, with clear Python code and minimal complexity.
 4 | 
 5 | ## [Getting Started](#getting-started)
 6 |   * [Installation](getting_started/installation.md) - Setting up Instructor
 7 |   * [Your First Extraction](getting_started/first_extraction.md) - Create your first structured output
 8 |   * [Response Models](getting_started/response_models.md) - Understanding model types
 9 |   * [Client Setup](getting_started/client_setup.md) - Configure for different providers
10 | ## [Basic Extraction Patterns](#basic-extraction-patterns)
11 |   * [Simple Object Extraction](patterns/simple_object.md) - Extract basic objects
12 |   * [List Extraction](patterns/list_extraction.md) - Extract lists of items
13 |   * [Nested Structure](patterns/nested_structure.md) - Work with nested data
14 |   * [Optional Fields](patterns/optional_fields.md) - Handle missing information
15 |   * [Field Validation](patterns/field_validation.md) - Add basic validation
16 |   * [Prompt Templates](patterns/prompt_templates.md) - Improve extraction with templates
17 | ## [Validation](#validation)
18 |   * [Validation Basics](validation/basics.md) - Core validation concepts
19 |   * [Field-level Validation](validation/field_level_validation.md) - Validate specific fields
20 |   * [Custom Validators](validation/custom_validators.md) - Create your own validators
21 |   * [Retry Mechanisms](validation/retry_mechanisms.md) - Handle validation failures
22 | ## [Streaming](#streaming)
23 |   * [Streaming Basics](streaming/basics.md) - Get results as they generate
24 |   * [Streaming Lists](streaming/lists.md) - Stream collections of data


--------------------------------------------------------------------------------
/docs/newsletter.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Subscribe to Instructor Newsletter for AI Updates
 3 | description: Get notified about AI tips, blog posts, and research. Stay informed with Instructor's latest features and community insights.
 4 | ---
 5 | 
 6 | # Instructor Newsletter
 7 | 
 8 | If you want to be notified of tips, new blog posts, and research, subscribe to our newsletter. Here's what you can expect:
 9 | 
10 | - Updates on Instructor features and releases
11 | - Blog posts on AI and structured outputs
12 | - Tips and tricks from our community
13 | - Research in the field of LLMs and structured outputs
14 | - Information on AI development skills with Instructor
15 | 
16 | Subscribe to our newsletter for updates on AI development. We provide content to keep you informed and help you use Instructor in projects.
17 | 
18 | <iframe src="https://embeds.beehiiv.com/2faf420d-8480-4b6e-8d6f-9c5a105f917a?slim=true" data-test-id="beehiiv-embed" height="52" width="80%" frameborder="0" scrolling="no" style="margin: 0; border-radius: 0px !important; background-color: transparent;"></iframe>
19 | 


--------------------------------------------------------------------------------
/docs/prompting/decomposition/recurs_of_thought.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: ""
3 | description: ""
4 | keywords: ""
5 | ---
6 | 
7 | [wip]
8 | 


--------------------------------------------------------------------------------
/docs/prompting/decomposition/tree-of-thought.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: ""
3 | description: ""
4 | keywords: ""
5 | ---
6 | 
7 | [wip]
8 | 


--------------------------------------------------------------------------------
/docs/prompting/few_shot/exemplar_selection/vote_k.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: ""
3 | description: ""
4 | keywords: ""
5 | ---
6 | 
7 | [wip]
8 | 


--------------------------------------------------------------------------------
/docs/prompting/thought_generation/chain_of_thought_few_shot/memory_of_thought.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: ""
3 | description: ""
4 | keywords: ""
5 | ---
6 | 
7 | [wip]
8 | 


--------------------------------------------------------------------------------
/docs/prompting/zero_shot/re2.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: "Re2 (Re-Reading) is a technique that asks the model to read the question again."
 3 | ---
 4 | 
 5 | How can we enhance a model's understanding of a query?
 6 | 
 7 | Re2 (**Re** - **R** eading) is a technique that asks the model to read the question again.
 8 | 
 9 | !!! example "Re-Reading Prompting"
10 |     **Prompt Template**: Read the question again: <*query*> <*critical thinking prompt*><sup><a href="https://arxiv.org/abs/2309.06275">1</a></sup>
11 |     
12 |     A common critical thinking prompt is: "Let's think step by step."
13 | 
14 | ## Implementation
15 | 
16 | ```python hl_lines="20"
17 | import instructor
18 | from openai import OpenAI
19 | from pydantic import BaseModel
20 | 
21 | 
22 | client = instructor.from_openai(OpenAI())
23 | 
24 | 
25 | class Response(BaseModel):
26 |     answer: int
27 | 
28 | 
29 | def re2(query, thinking_prompt):
30 |     return client.chat.completions.create(
31 |         model="gpt-4o",
32 |         response_model=Response,
33 |         messages=[
34 |             {
35 |                 "role": "system",
36 |                 "content": f"Read the question again: {query} {thinking_prompt}",
37 |             },
38 |         ],
39 |     )
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     query = """Roger has 5 tennis balls.
44 |         He buys 2 more cans of tennis balls.
45 |         Each can has 3 tennis balls.
46 |         How many tennis balls does he have now?
47 |         """
48 |     thinking_prompt = "Let's think step by step."
49 | 
50 |     response = re2(query=query, thinking_prompt=thinking_prompt)
51 |     print(response.answer)
52 |     #> 11
53 | ```
54 | 
55 | ## References
56 | 
57 | <sup id="ref-1">1</sup>: [Re-Reading Improves Reasoning in Large Language Models](https://arxiv.org/abs/2309.06275)
58 | 


--------------------------------------------------------------------------------
/docs/repository-overview.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Repository Overview
 3 | description: Learn the structure of the Instructor repository and the purpose of each major directory.
 4 | ---
 5 | 
 6 | # Repository Overview
 7 | 
 8 | This page explains the layout of the Instructor codebase and what each key directory contains.
 9 | 
10 | ## Directory Summary
11 | 
12 | ### `instructor/`
13 | Core library with clients, adapters, and utilities for structured outputs.
14 | 
15 | ### `cli/`
16 | Command-line interface code used for tasks like job management and usage tracking.
17 | 
18 | ### `docs/`
19 | Documentation source files for the website built with MkDocs.
20 | 
21 | ### `examples/`
22 | Practical examples and cookbooks demonstrating how to use Instructor.
23 | 
24 | ### `tests/`
25 | Test suite and evaluation scripts ensuring the library functions correctly.
26 | 
27 | 


--------------------------------------------------------------------------------
/ellipsis.yaml:
--------------------------------------------------------------------------------
 1 | # Reference: https://docs.ellipsis.dev
 2 | version: 1.1
 3 | pr_review:
 4 |   auto_review_enabled: true
 5 |   auto_summarize_pr: true
 6 |   confidence_threshold: 0.85
 7 |   rules:
 8 |     # Control what gets flagged during PR review with custom rules. Here are some to get you started:
 9 |     - "Code should be DRY (Dont Repeat Yourself)"
10 |     - "Extremely Complicated Code Needs Comments"
11 |     - "Use Descriptive Variable and Constant Names"
12 |     - "Function and Method Naming Should Follow Consistent Patterns"
13 |     - "If library code changes, expect documentation to be updated"
14 |     - "If library code changes, check if tests are updated"
15 |     - "If a new `md` file is created in `docs` make sure its added to mkdocs.yml"
16 |     - "Assertions should always have an error message that is formatted well. "
17 |     - "Make sure hub examples are added to mkdocs.yml"
18 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/__init__.py


--------------------------------------------------------------------------------
/examples/anthropic-web-tool/run.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | # Noticed thhat we use JSON not TOOLS mode
 6 | client = instructor.from_provider(
 7 |     "anthropic/claude-3-7-sonnet-latest",
 8 |     mode=instructor.Mode.ANTHROPIC_JSON,
 9 |     async_client=False,
10 | )
11 | 
12 | 
13 | class Citation(BaseModel):
14 |     id: int
15 |     url: str
16 | 
17 | 
18 | class Response(BaseModel):
19 |     citations: list[Citation]
20 |     response: str
21 | 
22 | 
23 | response_data, completion_details = client.messages.create_with_completion(
24 |     messages=[
25 |         {
26 |             "role": "system",
27 |             "content": "You are a helpful assistant that summarizes news articles. Your final response should be only contain a single JSON object returned in your final message to the user. Make sure to provide the exact ids for the citations that support the information you provide in the form of inline citations as [1] [2] [3] which correspond to a unique id you generate for a url that you find in the web search tool which is relevant to your final response.",
28 |         },
29 |         {
30 |             "role": "user",
31 |             "content": "What are the latest results for the UFC and who won? Answer this in a concise response that's under 3 sentences.",
32 |         },
33 |     ],
34 |     tools=[{"type": "web_search_20250305", "name": "web_search", "max_uses": 3}],
35 |     response_model=Response,
36 | )
37 | 
38 | print("Response:")
39 | print(response_data.response)
40 | print("\nCitations:")
41 | for citation in response_data.citations:
42 |     print(f"{citation.id}: {citation.url}")
43 | 


--------------------------------------------------------------------------------
/examples/anthropic/run.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | import anthropic
 3 | import instructor
 4 | 
 5 | # Patching the Anthropics client with the instructor for enhanced capabilities
 6 | client = instructor.from_anthropic(anthropic.Anthropic())
 7 | 
 8 | 
 9 | class Properties(BaseModel):
10 |     key: str
11 |     value: str
12 | 
13 | 
14 | class User(BaseModel):
15 |     name: str
16 |     age: int
17 |     properties: list[Properties]
18 | 
19 | 
20 | user = client.messages.create(
21 |     model="claude-3-haiku-20240307",
22 |     max_tokens=1024,
23 |     max_retries=0,
24 |     messages=[
25 |         {
26 |             "role": "user",
27 |             "content": "Create a user for a model with a name, age, and properties.",
28 |         }
29 |     ],
30 |     response_model=User,
31 | )
32 | 
33 | print(user.model_dump_json(indent=2))
34 | 


--------------------------------------------------------------------------------
/examples/auto-ticketer/tasks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/auto-ticketer/tasks.png


--------------------------------------------------------------------------------
/examples/caching/lru.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | from openai import OpenAI
 3 | from pydantic import BaseModel
 4 | import functools
 5 | 
 6 | client = instructor.from_openai(OpenAI())
 7 | 
 8 | 
 9 | class UserDetail(BaseModel):
10 |     name: str
11 |     age: int
12 | 
13 | 
14 | @functools.lru_cache
15 | def extract(data):
16 |     return client.chat.completions.create(
17 |         model="gpt-3.5-turbo",
18 |         response_model=UserDetail,
19 |         messages=[
20 |             {"role": "user", "content": data},
21 |         ],
22 |     )
23 | 
24 | 
25 | def test_extract():
26 |     import time
27 | 
28 |     start = time.perf_counter()
29 |     model = extract("Extract jason is 25 years old")
30 |     assert model.name.lower() == "jason"
31 |     assert model.age == 25
32 |     print(f"Time taken: {time.perf_counter() - start}")
33 | 
34 |     start = time.perf_counter()
35 |     model = extract("Extract jason is 25 years old")
36 |     assert model.name.lower() == "jason"
37 |     assert model.age == 25
38 |     print(f"Time taken: {time.perf_counter() - start}")
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     test_extract()
43 |     # Time taken: 0.9267581660533324
44 |     # Time taken: 1.2080417945981026e-06
45 | 


--------------------------------------------------------------------------------
/examples/chain-of-density/finetune.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from chain_of_density import summarize_article
 3 | import csv
 4 | import logging
 5 | import instructor
 6 | from pydantic import BaseModel, Field
 7 | 
 8 | logging.basicConfig(level=logging.INFO)
 9 | 
10 | client = instructor.from_openai(OpenAI())
11 | 
12 | instructions = instructor.Instructions(
13 |     name="Chain Of Density",
14 |     finetune_format="messages",
15 |     # log handler is used to save the data to a file
16 |     # you can imagine saving it to a database or other storage
17 |     # based on your needs!
18 |     log_handlers=[logging.FileHandler("generated.jsonl")],
19 |     openai_client=client,
20 | )
21 | 
22 | 
23 | class GeneratedSummary(BaseModel):
24 |     """
25 |     This represents a highly concise summary that includes as many entities as possible from the original source article.
26 | 
27 |     An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title.
28 | 
29 |     Guidelines
30 |     - Make every word count
31 |     - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article.
32 |     - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses"
33 |     """
34 | 
35 |     summary: str = Field(
36 |         ...,
37 |         description="This represents the final summary generated that captures the meaning of the original article which is as concise as possible. ",
38 |     )
39 | 
40 | 
41 | @instructions.distil
42 | def distil_summarization(text: str) -> GeneratedSummary:
43 |     summary_chain: list[str] = summarize_article(text)
44 |     return GeneratedSummary(summary=summary_chain[-1])
45 | 
46 | 
47 | with open("test.csv") as file:
48 |     reader = csv.reader(file)
49 |     next(reader)  # Skip the header
50 |     for article, _summary in reader:
51 |         distil_summarization(article)
52 | 


--------------------------------------------------------------------------------
/examples/chain-of-density/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | pydantic
3 | instructor
4 | nltk
5 | rich


--------------------------------------------------------------------------------
/examples/citation_with_extraction/Dockerfile:
--------------------------------------------------------------------------------
 1 | # https://hub.docker.com/_/python
 2 | FROM python:3.10-slim-bullseye
 3 | 
 4 | ENV PYTHONUNBUFFERED True
 5 | ENV APP_HOME /app
 6 | WORKDIR $APP_HOME
 7 | COPY requirements.txt ./
 8 | RUN pip install -r requirements.txt
 9 | 
10 | 
11 | COPY . ./
12 | 
13 | 
14 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]


--------------------------------------------------------------------------------
/examples/citation_with_extraction/diagram.py:
--------------------------------------------------------------------------------
1 | import erdantic as erd
2 | 
3 | from citation_fuzzy_match import QuestionAnswer
4 | 
5 | diagram = erd.create(QuestionAnswer)
6 | diagram.draw("examples/citation_fuzzy_match/schema.png")
7 | 


--------------------------------------------------------------------------------
/examples/citation_with_extraction/modal_main.py:
--------------------------------------------------------------------------------
 1 | from main import app
 2 | import modal
 3 | 
 4 | stub = modal.Stub("rag-citation")
 5 | 
 6 | image = modal.Image.debian_slim().pip_install("fastapi", "instructor>=0.2.1", "regex")
 7 | 
 8 | 
 9 | @stub.function(image=image)
10 | @modal.asgi_app()
11 | def fastapi_app():
12 |     return app
13 | 


--------------------------------------------------------------------------------
/examples/citation_with_extraction/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn
3 | openai>=1.0.0
4 | pydantic
5 | instructor
6 | regex


--------------------------------------------------------------------------------
/examples/citation_with_extraction/schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/citation_with_extraction/schema.png


--------------------------------------------------------------------------------
/examples/classification/multi_prediction.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | import instructor
 3 | 
 4 | from openai import OpenAI
 5 | from pydantic import BaseModel
 6 | 
 7 | client = instructor.from_openai(OpenAI())
 8 | 
 9 | 
10 | # Define new Enum class for multiple labels
11 | class MultiLabels(str, enum.Enum):
12 |     BILLING = "billing"
13 |     GENERAL_QUERY = "general_query"
14 |     HARDWARE = "hardware"
15 | 
16 | 
17 | # Adjust the prediction model to accommodate a list of labels
18 | class MultiClassPrediction(BaseModel):
19 |     predicted_labels: list[MultiLabels]
20 | 
21 | 
22 | # Modify the classify function
23 | def multi_classify(data: str) -> MultiClassPrediction:
24 |     return client.chat.completions.create(
25 |         model="gpt-3.5-turbo-0613",
26 |         response_model=MultiClassPrediction,
27 |         messages=[
28 |             {
29 |                 "role": "user",
30 |                 "content": f"Classify the following support ticket: {data}",
31 |             },
32 |         ],
33 |     )  # type: ignore
34 | 
35 | 
36 | # Example using a support ticket
37 | ticket = (
38 |     "My account is locked and I can't access my billing info. Phone is also broken."
39 | )
40 | prediction = multi_classify(ticket)
41 | print(prediction)
42 | 


--------------------------------------------------------------------------------
/examples/classification/simple_prediction.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | import instructor
 3 | from openai import OpenAI
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | client = instructor.from_openai(OpenAI())
 8 | 
 9 | 
10 | class Labels(str, enum.Enum):
11 |     SPAM = "spam"
12 |     NOT_SPAM = "not_spam"
13 | 
14 | 
15 | class SinglePrediction(BaseModel):
16 |     """
17 |     Correct class label for the given text
18 |     """
19 | 
20 |     class_label: Labels
21 | 
22 | 
23 | def classify(data: str) -> SinglePrediction:
24 |     return client.chat.completions.create(
25 |         model="gpt-3.5-turbo-0613",
26 |         response_model=SinglePrediction,
27 |         messages=[
28 |             {
29 |                 "role": "user",
30 |                 "content": f"Classify the following text: {data}",
31 |             },
32 |         ],
33 |     )  # type: ignore
34 | 
35 | 
36 | prediction = classify("Hello there I'm a nigerian prince and I want to give you money")
37 | assert prediction.class_label == Labels.SPAM
38 | 


--------------------------------------------------------------------------------
/examples/codegen-from-schema/input.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "http://json-schema.org/draft-07/schema#",
 3 |   "type": "object",
 4 |   "title": "ExtractPerson",
 5 |   "properties": {
 6 |     "name": {
 7 |       "type": "string"
 8 |     },
 9 |     "age": {
10 |       "type": "integer"
11 |     },
12 |     "phoneNumbers": {
13 |       "type": "array",
14 |       "items": {
15 |         "type": "object",
16 |         "properties": {
17 |           "type": {
18 |             "type": "string",
19 |             "enum": ["home", "work", "mobile"]
20 |           },
21 |           "number": {
22 |             "type": "string"
23 |           }
24 |         },
25 |         "required": ["type", "number"]
26 |       }
27 |     }
28 |   },
29 |   "required": ["name", "age", "phoneNumbers"]
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/codegen-from-schema/models.py:
--------------------------------------------------------------------------------
 1 | # generated by datamodel-codegen:
 2 | #   filename:  input.json
 3 | #   timestamp: 2023-09-10T00:33:42+00:00
 4 | 
 5 | from __future__ import annotations
 6 | 
 7 | from enum import Enum
 8 | 
 9 | from pydantic import BaseModel
10 | 
11 | 
12 | class Type(Enum):
13 |     home = "home"
14 |     work = "work"
15 |     mobile = "mobile"
16 | 
17 | 
18 | class PhoneNumber(BaseModel):
19 |     type: Type
20 |     number: str
21 | 
22 | 
23 | class ExtractPerson(BaseModel):
24 |     name: str
25 |     age: int
26 |     phoneNumbers: list[PhoneNumber]
27 | 


--------------------------------------------------------------------------------
/examples/codegen-from-schema/readme.md:
--------------------------------------------------------------------------------
 1 | # FastAPI Code Generator
 2 | 
 3 | ## Overview
 4 | 
 5 | Generates FastAPI application code from API path, task name, JSON schema path, and Jinja2 prompt template. Also creates a `models.py` file for Pydantic models.
 6 | 
 7 | ## Dependencies
 8 | 
 9 | - FastAPI
10 | - Pydantic
11 | - Jinja2
12 | - datamodel-code-generator
13 | 
14 | ## Functions
15 | 
16 | ### `create_app(api_path: str, task_name: str, json_schema_path: str, prompt_template: str) -> str`
17 | 
18 | Main function to generate FastAPI application code.
19 | 
20 | ## Usage
21 | 
22 | Run the script with required parameters.
23 | 
24 | Example:
25 | 
26 | ```python
27 | fastapi_code = create_app(
28 |     api_path="/api/v1/extract_person",
29 |     task_name="extract_person",
30 |     json_schema_path="./input.json",
31 |     prompt_template="Extract the person from the following: {{biography}}",
32 | )
33 | ```
34 | 
35 | Outputs FastAPI application code to `./run.py` and a Pydantic model to `./models.py`.


--------------------------------------------------------------------------------
/examples/codegen-from-schema/run.py:
--------------------------------------------------------------------------------
 1 | # This file was generated by instructor
 2 | #   timestamp: 2023-09-09T20:33:42.572627
 3 | #   task_name: extract_person
 4 | #   api_path: /api/v1/extract_person
 5 | #   json_schema_path: ./input.json
 6 | 
 7 | import instructor
 8 | 
 9 | from fastapi import FastAPI
10 | from pydantic import BaseModel
11 | from jinja2 import Template
12 | from models import ExtractPerson
13 | from openai import AsyncOpenAI
14 | 
15 | aclient = instructor.apatch(AsyncOpenAI())
16 | 
17 | app = FastAPI()
18 | 
19 | 
20 | class TemplateVariables(BaseModel):
21 |     biography: str
22 | 
23 | 
24 | class RequestSchema(BaseModel):
25 |     template_variables: TemplateVariables
26 |     model: str
27 |     temperature: int
28 | 
29 | 
30 | PROMPT_TEMPLATE = Template(
31 |     """Extract the person from the following: {{biography}}""".strip()
32 | )
33 | 
34 | 
35 | @app.post("/api/v1/extract_person", response_model=ExtractPerson)
36 | async def extract_person(input: RequestSchema) -> ExtractPerson:
37 |     rendered_prompt = PROMPT_TEMPLATE.render(**input.template_variables.model_dump())
38 |     return await aclient.chat.completions.create(
39 |         model=input.model,
40 |         temperature=input.temperature,
41 |         response_model=ExtractPerson,
42 |         messages=[{"role": "user", "content": rendered_prompt}],
43 |     )  # type: ignore
44 | 


--------------------------------------------------------------------------------
/examples/cohere/cohere.py:
--------------------------------------------------------------------------------
 1 | import cohere
 2 | import instructor
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | # Patching the Cohere client with the instructor for enhanced capabilities
 7 | client = instructor.from_cohere(
 8 |     cohere.Client(),
 9 |     max_tokens=1000,
10 |     model="command-r-plus",
11 | )
12 | 
13 | 
14 | class Person(BaseModel):
15 |     name: str = Field(description="name of the person")
16 |     country_of_origin: str = Field(description="country of origin of the person")
17 | 
18 | 
19 | class Group(BaseModel):
20 |     group_name: str = Field(description="name of the group")
21 |     members: list[Person] = Field(description="list of members in the group")
22 | 
23 | 
24 | task = """\
25 | Given the following text, create a Group object for 'The Beatles' band
26 | 
27 | Text:
28 | The Beatles were an English rock band formed in Liverpool in 1960. With a line-up comprising John Lennon, Paul McCartney, George Harrison and Ringo Starr, they are regarded as the most influential band of all time. The group were integral to the development of 1960s counterculture and popular music's recognition as an art form.
29 | """
30 | group = client.messages.create(
31 |     response_model=Group,
32 |     messages=[{"role": "user", "content": task}],
33 |     temperature=0,
34 | )
35 | 
36 | print(group.model_dump_json(indent=2))
37 | """
38 | {
39 |   "group_name": "The Beatles",
40 |   "members": [
41 |     {
42 |       "name": "John Lennon",
43 |       "country_of_origin": "England"
44 |     },
45 |     {
46 |       "name": "Paul McCartney",
47 |       "country_of_origin": "England"
48 |     },
49 |     {
50 |       "name": "George Harrison",
51 |       "country_of_origin": "England"
52 |     },
53 |     {
54 |       "name": "Ringo Starr",
55 |       "country_of_origin": "England"
56 |     }
57 |   ]
58 | }
59 | """
60 | 


--------------------------------------------------------------------------------
/examples/distilations/readme.md:
--------------------------------------------------------------------------------
 1 | # What to Expect
 2 | This script demonstrates how to use the `Instructor` library for fine-tuning a Python function that performs three-digit multiplication. It uses Pydantic for type validation and logging features to generate a fine-tuning dataset.
 3 | 
 4 | ## How to Run
 5 | 
 6 | ### Prerequisites
 7 | - Python 3.9
 8 | - `Instructor` library
 9 | 
10 | ### Steps
11 | 1. **Install Dependencies**  
12 |    If you haven't already installed the required libraries, you can do so using pip:
13 |     ```
14 |     pip install instructor pydantic
15 |     ```
16 | 
17 | 2. **Set Up Logging**  
18 |    The script uses Python's built-in `logging` module to log the fine-tuning process. Ensure you have write permissions in the directory where the log file `math_finetunes.jsonl` will be saved.
19 | 
20 | 3. **Run the Script**  
21 |     Navigate to the directory containing `script.py` and run it:
22 |     ```
23 |     python three_digit_mul.py
24 |     ```
25 | 
26 |     This will execute the script, running the function ten times with random three-digit numbers for multiplication. The function outputs and logs are saved in `math_finetunes.jsonl`.
27 | 
28 | 4. **Fine-Tuning**  
29 |     Once you have the log file, you can run a fine-tuning job using the following `Instructor` CLI command:
30 |     ```
31 |     instructor jobs create-from-file math_finetunes.jsonl
32 |     ```
33 |     Wait for the fine-tuning job to complete.
34 | 
35 |     If you have validation date you can run:
36 | 
37 |     ```
38 |     instructor jobs create-from-file math_finetunes.jsonl --n-epochs 4 --validation-file math_finetunes_val.jsonl 
39 |     ```
40 | 
41 | ### Output
42 | 
43 | That's it! You've successfully run the script and can now proceed to fine-tune your model.
44 | 
45 | ### Dispatch 
46 | 
47 | Once you have the model you can replace the model in `three_digit_mul_dispatch.py` with the model you just fine-tuned and run the script again. This time, the script will use the fine-tuned model to predict the output of the function.


--------------------------------------------------------------------------------
/examples/distilations/three_digit_mul_dispatch.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | from instructor import Instructions
 5 | import instructor
 6 | from openai import OpenAI
 7 | 
 8 | client = instructor.from_openai(OpenAI())
 9 | 
10 | logging.basicConfig(level=logging.INFO)
11 | 
12 | # Usage
13 | instructions = Instructions(
14 |     name="three_digit_multiply",
15 |     finetune_format="messages",
16 |     include_code_body=True,
17 |     log_handlers=[
18 |         logging.FileHandler("math_finetunes.jsonl"),
19 |     ],
20 |     openai_client=client,
21 | )
22 | 
23 | 
24 | class Multiply(BaseModel):
25 |     a: int
26 |     b: int
27 |     result: int = Field(..., description="The result of the multiplication")
28 | 
29 | 
30 | @instructions.distil(mode="dispatch", model="ft:gpt-3.5-turbo-0125:personal::9i1JeuxJ")
31 | def fn(a: int, b: int) -> Multiply:
32 |     """Return the result of the multiplication as an integer"""
33 |     resp = a * b
34 |     return Multiply(a=a, b=b, result=resp)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     import random
39 | 
40 |     for _ in range(5):
41 |         a = random.randint(100, 999)
42 |         b = random.randint(100, 999)
43 |         result = fn(a, b)
44 |         print(f"{a} * {b} = {result.result}, expected {a * b}")
45 |     """
46 |     972 * 508 = 493056, expected 493776
47 |     145 * 369 = 53505, expected 53505
48 |     940 * 440 = 413600, expected 413600
49 |     114 * 213 = 24282, expected 24282
50 |     259 * 650 = 168350, expected 168350
51 |     """
52 | 


--------------------------------------------------------------------------------
/examples/evals/models.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from pydantic import BaseModel, Field
 3 | from enum import Enum
 4 | 
 5 | 
 6 | class SourceType(str, Enum):
 7 |     CRM = "CRM"
 8 |     WEB = "WEB"
 9 |     EMAIL = "EMAIL"
10 |     SOCIAL_MEDIA = "SOCIAL_MEDIA"
11 |     OTHER = "OTHER"
12 | 
13 | 
14 | class Search(BaseModel):
15 |     query: str
16 |     source_type: SourceType
17 |     results_limit: Optional[int] = Field(10)
18 |     is_priority: Optional[bool] = None
19 |     tags: Optional[list[str]] = None
20 | 
21 | 
22 | class MultiSearch(BaseModel):
23 |     queries: list[Search]
24 |     user_id: Optional[str]
25 | 


--------------------------------------------------------------------------------
/examples/extract-table/run_vision_receipt.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, model_validator
 2 | from openai import OpenAI
 3 | import instructor
 4 | 
 5 | 
 6 | client = instructor.from_openai(
 7 |     client=OpenAI(),
 8 |     mode=instructor.Mode.TOOLS,
 9 | )
10 | 
11 | 
12 | class Item(BaseModel):
13 |     name: str
14 |     price: float
15 |     quantity: int
16 | 
17 | 
18 | class Receipt(BaseModel):
19 |     items: list[Item]
20 |     total: float
21 | 
22 |     @model_validator(mode="after")
23 |     def check_total(cls, values: "Receipt"):
24 |         items = values.items
25 |         total = values.total
26 |         calculated_total = sum(item.price * item.quantity for item in items)
27 |         if calculated_total != total:
28 |             raise ValueError(
29 |                 f"Total {total} does not match the sum of item prices {calculated_total}"
30 |             )
31 |         return values
32 | 
33 | 
34 | def extract(url: str) -> Receipt:
35 |     return client.chat.completions.create(
36 |         model="gpt-4o",
37 |         max_tokens=4000,
38 |         response_model=Receipt,
39 |         messages=[
40 |             {
41 |                 "role": "user",
42 |                 "content": [
43 |                     {
44 |                         "type": "image_url",
45 |                         "image_url": {"url": url},
46 |                     },
47 |                     {
48 |                         "type": "text",
49 |                         "text": "Analyze the image and return the items in the receipt and the total amount.",
50 |                     },
51 |                 ],
52 |             }
53 |         ],
54 |     )
55 | 
56 | 
57 | # URLs of images containing receipts. Exhibits the use of the model validator to check the total amount.
58 | urls = [
59 |     "https://templates.mediamodifier.com/645124ff36ed2f5227cbf871/supermarket-receipt-template.jpg",
60 |     "https://ocr.space/Content/Images/receipt-ocr-original.jpg",
61 | ]
62 | 
63 | for url in urls:
64 |     receipt = extract(url)
65 |     print(receipt)
66 | 


--------------------------------------------------------------------------------
/examples/fastapi_app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/fastapi_app/__init__.py


--------------------------------------------------------------------------------
/examples/fastapi_app/main.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from instructor import OpenAISchema
 3 | import instructor.dsl as dsl
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | app = FastAPI(title="Example Application using instructor")
 7 | 
 8 | 
 9 | class SearchRequest(BaseModel):
10 |     body: str
11 | 
12 | 
13 | class SearchQuery(OpenAISchema):
14 |     title: str = Field(..., description="Question that the query answers")
15 |     query: str = Field(
16 |         ...,
17 |         description="Detailed, comprehensive, and specific query to be used for semantic search",
18 |     )
19 | 
20 | 
21 | SearchResponse = dsl.MultiTask(
22 |     subtask_class=SearchQuery,
23 |     description="Correctly segmented set of search queries",
24 | )
25 | 
26 | 
27 | @app.post("/search", response_model=SearchResponse)
28 | async def search(request: SearchRequest):
29 |     task = (
30 |         dsl.ChatCompletion(name="Segmenting Search requests example")
31 |         | dsl.SystemTask(task="Segment search results")
32 |         | dsl.TaggedMessage(content=request.body, tag="query")
33 |         | dsl.TipsMessage(
34 |             tips=[
35 |                 "Expand query to contain multiple forms of the same word (SSO -> Single Sign On)",
36 |                 "Use the title to explain what the query should return, but use the query to complete the search",
37 |                 "The query should be detailed, specific, and cast a wide net when possible",
38 |             ]
39 |         )
40 |         | SearchRequest
41 |     )
42 |     return await task.acreate()
43 | 


--------------------------------------------------------------------------------
/examples/fastapi_app/script.py:
--------------------------------------------------------------------------------
 1 | from instructor import OpenAISchema, dsl
 2 | from pydantic import Field
 3 | import json
 4 | 
 5 | 
 6 | class SearchQuery(OpenAISchema):
 7 |     query: str = Field(
 8 |         ...,
 9 |         description="Detailed, comprehensive, and specific query to be used for semantic search",
10 |     )
11 | 
12 | 
13 | SearchResponse = dsl.MultiTask(
14 |     subtask_class=SearchQuery,
15 |     description="Correctly segmented set of search queries",
16 | )
17 | 
18 | 
19 | task = (
20 |     dsl.ChatCompletion(name="Segmenting Search requests example")
21 |     | dsl.SystemTask(task="Segment search results")
22 |     | dsl.TaggedMessage(
23 |         content="can you send me the data about the video investment and the one about spot the dog?",
24 |         tag="query",
25 |     )
26 |     | dsl.TipsMessage(
27 |         tips=[
28 |             "Expand query to contain multiple forms of the same word (SSO -> Single Sign On)",
29 |             "Use the title to explain what the query should return, but use the query to complete the search",
30 |             "The query should be detailed, specific, and cast a wide net when possible",
31 |         ]
32 |     )
33 |     | SearchResponse
34 | )
35 | 
36 | 
37 | print(json.dumps(task.kwargs, indent=1))
38 | """
39 | {
40 |   "tasks": [
41 |     {
42 |       "query": "data about video investment"
43 |     },
44 |     {
45 |       "query": "data about spot the dog"
46 |     }
47 |   ]
48 | }
49 | """
50 | 


--------------------------------------------------------------------------------
/examples/fizzbuzz/run.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from openai import OpenAI
 4 | import instructor
 5 | 
 6 | client = instructor.from_openai(OpenAI())
 7 | 
 8 | 
 9 | def fizzbuzz_gpt(n) -> list[int | str]:
10 |     return client.chat.completions.create(
11 |         model="gpt-3.5-turbo",
12 |         response_model=list[int | str],
13 |         messages=[
14 |             {
15 |                 "role": "user",
16 |                 "content": f"Return the first {n} numbers in fizzbuzz",
17 |             },
18 |         ],
19 |     )  # type: ignore
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     print(fizzbuzz_gpt(n=15))
24 |     # > [1, 2, 'Fizz', 4, 'Buzz', 'Fizz', 7, 8, 'Fizz', 'Buzz', 11, 'Fizz', 13, 14, 'FizzBuzz']
25 | 


--------------------------------------------------------------------------------
/examples/gpt-engineer/program.json:
--------------------------------------------------------------------------------
1 | {"files": [{"file_name": "readme.md", "body": "# FastAPI App\n\nThis is a FastAPI app that provides some basic math functions.\n\n## Usage\n\nTo use this app, follow the instructions below:\n\n1. Install the required dependencies by running `pip install -r requirements.txt`.\n2. Start the app by running `uvicorn main:app --reload`.\n3. Open your browser and navigate to `http://localhost:8000/docs` to access the Swagger UI documentation.\n\n## Example\n\nTo perform a basic math operation, you can use the following curl command:\n\n```bash\ncurl -X POST -H \"Content-Type: application/json\" -d '{\"operation\": \"add\", \"operands\": [2, 3]}' http://localhost:8000/calculate\n```\n"}, {"file_name": "main.py", "body": "from fastapi import FastAPI\nfrom pydantic import BaseModel\n\napp = FastAPI()\n\n\nclass Operation(BaseModel):\n    operation: str\n    operands: list\n\n\n@app.post('/calculate')\nasync def calculate(operation: Operation):\n    if operation.operation == 'add':\n        result = sum(operation.operands)\n    elif operation.operation == 'subtract':\n        result = operation.operands[0] - sum(operation.operands[1:])\n    elif operation.operation == 'multiply':\n        result = 1\n        for operand in operation.operands:\n            result *= operand\n    elif operation.operation == 'divide':\n        result = operation.operands[0]\n        for operand in operation.operands[1:]:\n            result /= operand\n    else:\n        result = None\n    return {'result': result}\n"}, {"file_name": "requirements.txt", "body": "fastapi\nuvicorn\npydantic"}]}


--------------------------------------------------------------------------------
/examples/groq/groq_example.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pydantic import BaseModel, Field
 3 | from groq import Groq
 4 | import instructor
 5 | 
 6 | 
 7 | class Character(BaseModel):
 8 |     name: str
 9 |     fact: list[str] = Field(..., description="A list of facts about the subject")
10 | 
11 | 
12 | client = Groq(
13 |     api_key=os.environ.get("GROQ_API_KEY"),
14 | )
15 | 
16 | client = instructor.from_groq(client, mode=instructor.Mode.TOOLS)
17 | 
18 | resp = client.chat.completions.create(
19 |     model="mixtral-8x7b-32768",
20 |     messages=[
21 |         {
22 |             "role": "user",
23 |             "content": "Tell me about the company Tesla",
24 |         }
25 |     ],
26 |     response_model=Character,
27 | )
28 | print(resp.model_dump_json(indent=2))
29 | """
30 | {
31 |   "name": "Tesla",
32 |   "fact": [
33 |     "An American electric vehicle and clean energy company.",
34 |     "Co-founded by Elon Musk, JB Straubel, Martin Eberhard, Marc Tarpenning, and Ian Wright in 2003.",
35 |     "Headquartered in Austin, Texas.",
36 |     "Produces electric vehicles, energy storage solutions, and more recently, solar energy products.",
37 |     "Known for its premium electric vehicles, such as the Model S, Model 3, Model X, and Model Y.",
38 |     "One of the world's most valuable car manufacturers by market capitalization.",
39 |     "Tesla's CEO, Elon Musk, is also the CEO of SpaceX, Neuralink, and The Boring Company.",
40 |     "Tesla operates the world's largest global network of electric vehicle supercharging stations.",
41 |     "The company aims to accelerate the world's transition to sustainable transport and energy through innovative technologies and products."
42 |   ]
43 | }
44 | """
45 | 


--------------------------------------------------------------------------------
/examples/groq/groq_example2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pydantic import BaseModel
 3 | from groq import Groq
 4 | import instructor
 5 | 
 6 | client = Groq(
 7 |     api_key=os.environ.get("GROQ_API_KEY"),
 8 | )
 9 | 
10 | client = instructor.from_groq(client, mode=instructor.Mode.TOOLS)
11 | 
12 | 
13 | class UserExtract(BaseModel):
14 |     name: str
15 |     age: int
16 | 
17 | 
18 | user: UserExtract = client.chat.completions.create(
19 |     model="mixtral-8x7b-32768",
20 |     response_model=UserExtract,
21 |     messages=[
22 |         {"role": "user", "content": "Extract jason is 25 years old"},
23 |     ],
24 | )
25 | 
26 | assert isinstance(user, UserExtract), "Should be instance of UserExtract"
27 | assert user.name.lower() == "jason"
28 | assert user.age == 25
29 | 
30 | print(user.model_dump_json(indent=2))
31 | """
32 | {
33 |   "name": "jason",
34 |   "age": 25
35 | }
36 | """
37 | 


--------------------------------------------------------------------------------
/examples/iterables/run.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from collections.abc import Iterable
 4 | from openai import OpenAI
 5 | from pydantic import BaseModel
 6 | 
 7 | import instructor
 8 | 
 9 | 
10 | client = instructor.from_openai(OpenAI())
11 | 
12 | 
13 | class User(BaseModel):
14 |     name: str
15 |     job: str
16 |     age: int
17 | 
18 | 
19 | def stream_extract(input: str) -> Iterable[User]:
20 |     return client.chat.completions.create_iterable(
21 |         model="gpt-4o",
22 |         temperature=0.1,
23 |         stream=True,
24 |         response_model=User,
25 |         messages=[
26 |             {
27 |                 "role": "system",
28 |                 "content": "You are a perfect entity extraction system",
29 |             },
30 |             {
31 |                 "role": "user",
32 |                 "content": (
33 |                     f"Consider the data below:\n{input}"
34 |                     "Correctly segment it into entitites"
35 |                     "Make sure the JSON is correct"
36 |                 ),
37 |             },
38 |         ],
39 |         max_tokens=1000,
40 |     )
41 | 
42 | 
43 | start = time.time()
44 | for user in stream_extract(
45 |     input="Create 5 characters from the book Three Body Problem"
46 | ):
47 |     delay = round(time.time() - start, 1)
48 |     print(f"{delay} s: User({user})")
49 |     """
50 |     0.8 s: User(name='Ye Wenjie' job='Astrophysicist' age=60)
51 |     1.1 s: User(name='Wang Miao' job='Nanomaterials Researcher' age=40)
52 |     1.7 s: User(name='Shi Qiang' job='Detective' age=50)
53 |     1.9 s: User(name='Ding Yi' job='Theoretical Physicist' age=45)
54 |     1.9 s: User(name='Chang Weisi' job='Military Strategist' age=55)
55 |     """
56 |     # Notice that the first one would return at 5s bu the last one returned in 10s!
57 | 


--------------------------------------------------------------------------------
/examples/knowledge-graph/final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/final.png


--------------------------------------------------------------------------------
/examples/knowledge-graph/iteration_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/iteration_0.png


--------------------------------------------------------------------------------
/examples/knowledge-graph/iteration_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/iteration_1.png


--------------------------------------------------------------------------------
/examples/knowledge-graph/iteration_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/iteration_2.png


--------------------------------------------------------------------------------
/examples/knowledge-graph/iteration_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/iteration_3.png


--------------------------------------------------------------------------------
/examples/knowledge-graph/kg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/kg.png


--------------------------------------------------------------------------------
/examples/knowledge-graph/run.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | 
 3 | from graphviz import Digraph
 4 | from pydantic import BaseModel, Field
 5 | from openai import OpenAI
 6 | 
 7 | 
 8 | client = instructor.from_openai(OpenAI())
 9 | 
10 | 
11 | class Node(BaseModel):
12 |     id: int
13 |     label: str
14 |     color: str
15 | 
16 | 
17 | class Edge(BaseModel):
18 |     source: int
19 |     target: int
20 |     label: str
21 |     color: str = "black"
22 | 
23 | 
24 | class KnowledgeGraph(BaseModel):
25 |     nodes: list[Node] = Field(..., default_factory=list)
26 |     edges: list[Edge] = Field(..., default_factory=list)
27 | 
28 | 
29 | def generate_graph(input) -> KnowledgeGraph:
30 |     return client.chat.completions.create(
31 |         model="gpt-3.5-turbo-16k",
32 |         messages=[
33 |             {
34 |                 "role": "user",
35 |                 "content": f"Help me understand following by describing as a detailed knowledge graph: {input}",
36 |             }
37 |         ],
38 |         response_model=KnowledgeGraph,
39 |     )  # type: ignore
40 | 
41 | 
42 | def visualize_knowledge_graph(kg: KnowledgeGraph):
43 |     dot = Digraph(comment="Knowledge Graph")
44 | 
45 |     # Add nodes
46 |     for node in kg.nodes:
47 |         dot.node(str(node.id), node.label, color=node.color)
48 | 
49 |     # Add edges
50 |     for edge in kg.edges:
51 |         dot.edge(str(edge.source), str(edge.target), label=edge.label, color=edge.color)
52 | 
53 |     # Render the graph
54 |     dot.render("knowledge_graph.gv", view=True)
55 | 
56 | 
57 | graph: KnowledgeGraph = generate_graph("Teach me about quantum mechanics")
58 | visualize_knowledge_graph(graph)
59 | 


--------------------------------------------------------------------------------
/examples/logfire-fastapi/Readme.md:
--------------------------------------------------------------------------------
 1 | # Instructions
 2 | 
 3 | 1. Create a virtual environment and install all of the packages inside `requirements.txt`
 4 | 
 5 | 2. Run the server using
 6 | 
 7 | ```
 8 | uvicorn server:app --reload
 9 | ```
10 | 
11 | 3. Open up the documentation at `http://127.0.0.1:8000/docs` to start experimenting with fastapi! You can print out the streaming example using `test.py`.
12 | 


--------------------------------------------------------------------------------
/examples/logfire-fastapi/requirements.txt:
--------------------------------------------------------------------------------
1 | pydantic==2.7.1
2 | openai==1.24.1
3 | instructor==1.0.3
4 | logfire==0.28.0
5 | fastapi==0.110.3
6 | uvicorn[standard]
7 | logfire[fastapi]


--------------------------------------------------------------------------------
/examples/logfire-fastapi/test.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | response = requests.post(
 4 |     "http://127.0.0.1:3000/extract",
 5 |     json={
 6 |         "query": "Alice and Bob are best friends. They are currently 32 and 43 respectively. "
 7 |     },
 8 |     stream=True,
 9 | )
10 | 
11 | for chunk in response.iter_content(chunk_size=1024):
12 |     if chunk:
13 |         print(str(chunk, encoding="utf-8"), end="\n")
14 | 


--------------------------------------------------------------------------------
/examples/logfire/classify.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | from pydantic import BaseModel
 3 | from openai import OpenAI
 4 | import instructor
 5 | import logfire
 6 | 
 7 | 
 8 | class Labels(str, enum.Enum):
 9 |     """Enumeration for single-label text classification."""
10 | 
11 |     SPAM = "spam"
12 |     NOT_SPAM = "not_spam"
13 | 
14 | 
15 | class SinglePrediction(BaseModel):
16 |     """
17 |     Class for a single class label prediction.
18 |     """
19 | 
20 |     class_label: Labels
21 | 
22 | 
23 | openai_client = OpenAI()
24 | logfire.configure(pydantic_plugin=logfire.PydanticPlugin(record="all"))
25 | logfire.instrument_openai(openai_client)
26 | client = instructor.from_openai(openai_client)
27 | 
28 | 
29 | @logfire.instrument("classification", extract_args=True)
30 | def classify(data: str) -> SinglePrediction:
31 |     """Perform single-label classification on the input text."""
32 |     return client.chat.completions.create(
33 |         model="gpt-3.5-turbo-0613",
34 |         response_model=SinglePrediction,
35 |         messages=[
36 |             {
37 |                 "role": "user",
38 |                 "content": f"Classify the following text: {data}",
39 |             },
40 |         ],
41 |     )
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     emails = [
46 |         "Hello there I'm a Nigerian prince and I want to give you money",
47 |         "Meeting with Thomas has been set at Friday next week",
48 |         "Here are some weekly product updates from our marketing team",
49 |     ]
50 | 
51 |     for email in emails:
52 |         classify(email)
53 | 


--------------------------------------------------------------------------------
/examples/logfire/requirements.txt:
--------------------------------------------------------------------------------
1 | pydantic==2.7.1
2 | openai==1.24.1
3 | instructor==1.0.3
4 | logfire==0.28.0


--------------------------------------------------------------------------------
/examples/logfire/validate.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | from pydantic import BaseModel, ValidationError
 3 | from pydantic.functional_validators import AfterValidator
 4 | from instructor import llm_validator
 5 | import logfire
 6 | import instructor
 7 | from openai import OpenAI
 8 | 
 9 | openai_client = OpenAI()
10 | logfire.configure(pydantic_plugin=logfire.PydanticPlugin(record="all"))
11 | logfire.instrument_openai(openai_client)
12 | client = instructor.from_openai(openai_client)
13 | 
14 | 
15 | class Statement(BaseModel):
16 |     message: Annotated[
17 |         str,
18 |         AfterValidator(
19 |             llm_validator("Don't allow any objectionable content", client=client)
20 |         ),
21 |     ]
22 | 
23 | 
24 | messages = [
25 |     "I think we should always treat violence as the best solution",
26 |     "There are some great pastries down the road at this bakery I know",
27 | ]
28 | 
29 | for message in messages:
30 |     try:
31 |         Statement(message=message)
32 |     except ValidationError as e:
33 |         print(e)
34 | 


--------------------------------------------------------------------------------
/examples/mistral/mistral.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | from mistralai.client import MistralClient
 3 | from instructor import from_mistral
 4 | from instructor.function_calls import Mode
 5 | import os
 6 | 
 7 | 
 8 | class UserDetails(BaseModel):
 9 |     name: str
10 |     age: int
11 | 
12 | 
13 | # enables `response_model` in chat call
14 | client = MistralClient(api_key=os.environ.get("MISTRAL_API_KEY"))
15 | instructor_client = from_mistral(
16 |     client=client,
17 |     model="mistral-large-latest",
18 |     mode=Mode.MISTRAL_TOOLS,
19 |     max_tokens=1000,
20 | )
21 | 
22 | resp = instructor_client.messages.create(
23 |     response_model=UserDetails,
24 |     messages=[{"role": "user", "content": "Jason is 10"}],
25 |     temperature=0,
26 | )
27 | 
28 | print(resp)
29 | 


--------------------------------------------------------------------------------
/examples/multiple_search_queries/diagram.py:
--------------------------------------------------------------------------------
1 | import erdantic as erd
2 | 
3 | from segment_search_queries import MultiSearch
4 | 
5 | diagram = erd.create(MultiSearch)
6 | diagram.draw("examples/segment_search_queries/schema.png")
7 | 


--------------------------------------------------------------------------------
/examples/multiple_search_queries/schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/multiple_search_queries/schema.png


--------------------------------------------------------------------------------
/examples/openai-audio/output.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/openai-audio/output.wav


--------------------------------------------------------------------------------
/examples/openai-audio/run.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from pydantic import BaseModel
 3 | import instructor
 4 | from instructor.multimodal import Audio
 5 | import base64
 6 | 
 7 | client = instructor.from_openai(OpenAI())
 8 | 
 9 | 
10 | class Person(BaseModel):
11 |     name: str
12 |     age: int
13 | 
14 | 
15 | with open("./output.wav", "rb") as f:
16 |     encoded_string = base64.b64encode(f.read()).decode("utf-8")
17 | 
18 | resp = client.chat.completions.create(
19 |     model="gpt-4o-audio-preview",
20 |     response_model=Person,
21 |     modalities=["text"],
22 |     audio={"voice": "alloy", "format": "wav"},
23 |     messages=[
24 |         {
25 |             "role": "user",
26 |             "content": [
27 |                 "Extract the following information from the audio",
28 |                 Audio.from_path("./output.wav"),
29 |             ],
30 |         },
31 |     ],
32 | )  # type: ignore
33 | 
34 | print(resp)
35 | # > Person(name='Jason', age=20)
36 | 


--------------------------------------------------------------------------------
/examples/parallel/run.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import openai
 4 | import instructor
 5 | 
 6 | from typing import Literal
 7 | from collections.abc import Iterable
 8 | from pydantic import BaseModel
 9 | 
10 | 
11 | class Weather(BaseModel):
12 |     location: str
13 |     units: Literal["imperial", "metric"]
14 | 
15 | 
16 | class GoogleSearch(BaseModel):
17 |     query: str
18 | 
19 | 
20 | client = openai.OpenAI()
21 | 
22 | client = instructor.from_openai(client, mode=instructor.Mode.PARALLEL_TOOLS)
23 | 
24 | resp = client.chat.completions.create(
25 |     model="gpt-4-turbo-preview",
26 |     messages=[
27 |         {"role": "system", "content": "You must always use tools"},
28 |         {
29 |             "role": "user",
30 |             "content": "What is the weather in toronto and dallas and who won the super bowl?",
31 |         },
32 |     ],
33 |     response_model=Iterable[Weather | GoogleSearch],
34 | )
35 | 
36 | for r in resp:
37 |     print(r)
38 | 


--------------------------------------------------------------------------------
/examples/partial_streaming/run.py:
--------------------------------------------------------------------------------
 1 | # Part of this code is adapted from the following examples from OpenAI Cookbook:
 2 | # https://cookbook.openai.com/examples/how_to_stream_completions
 3 | # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
 4 | import instructor
 5 | from openai import OpenAI
 6 | from pydantic import BaseModel
 7 | 
 8 | client = instructor.from_openai(OpenAI(), mode=instructor.Mode.TOOLS)
 9 | 
10 | 
11 | class User(BaseModel):
12 |     name: str
13 |     role: str
14 | 
15 | 
16 | extraction_stream = client.chat.completions.create_partial(
17 |     model="gpt-4",
18 |     response_model=User,
19 |     messages=[
20 |         {
21 |             "role": "user",
22 |             "content": "give me a harry pottery character in json, name, role, age",
23 |         }
24 |     ],
25 | )
26 | 
27 | for chunk in extraction_stream:
28 |     print(chunk)
29 | 


--------------------------------------------------------------------------------
/examples/patching/anyscale.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import instructor
 3 | 
 4 | from openai import OpenAI
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | # By default, the patch function will patch the ChatCompletion.create and ChatCompletion.acreate methods. to support response_model parameter
 9 | client = instructor.from_openai(
10 |     OpenAI(
11 |         base_url="https://api.endpoints.anyscale.com/v1",
12 |         api_key=os.environ["ANYSCALE_API_KEY"],
13 |     ),
14 |     mode=instructor.Mode.JSON_SCHEMA,
15 | )
16 | 
17 | 
18 | # Now, we can use the response_model parameter using only a base model
19 | # rather than having to use the OpenAISchema class
20 | class UserExtract(BaseModel):
21 |     name: str
22 |     age: int
23 | 
24 | 
25 | user: UserExtract = client.chat.completions.create(
26 |     model="mistralai/Mixtral-8x7B-Instruct-v0.1",
27 |     response_model=UserExtract,
28 |     messages=[
29 |         {"role": "user", "content": "Extract jason is 25 years old"},
30 |     ],
31 | )  # type: ignore
32 | 
33 | print(user)
34 | {
35 |     "name": "Jason",
36 |     "age": 25,
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/patching/oai.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | 
 3 | from openai import OpenAI
 4 | from pydantic import BaseModel
 5 | 
 6 | 
 7 | # By default, the patch function will patch the ChatCompletion.create and ChatCompletion.acreate methods. to support response_model parameter
 8 | client = instructor.from_openai(
 9 |     OpenAI(),
10 |     mode=instructor.Mode.TOOLS,
11 | )
12 | 
13 | 
14 | # Now, we can use the response_model parameter using only a base model
15 | # rather than having to use the OpenAISchema class
16 | class UserExtract(BaseModel):
17 |     name: str
18 |     age: int
19 | 
20 | 
21 | user: UserExtract = client.chat.completions.create(
22 |     model="gpt-3.5-turbo",
23 |     response_model=UserExtract,
24 |     messages=[
25 |         {"role": "user", "content": "Extract jason is 25 years old"},
26 |     ],
27 | )  # type: ignore
28 | 
29 | print(user)
30 | {
31 |     "name": "Jason",
32 |     "age": 25,
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/patching/together.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import openai
 3 | from pydantic import BaseModel
 4 | import instructor
 5 | 
 6 | client = openai.OpenAI(
 7 |     base_url="https://api.together.xyz/v1",
 8 |     api_key=os.environ["TOGETHER_API_KEY"],
 9 | )
10 | 
11 | 
12 | # By default, the patch function will patch the ChatCompletion.create and ChatCompletion.acreate methods. to support response_model parameter
13 | client = instructor.from_openai(client, mode=instructor.Mode.TOOLS)
14 | 
15 | 
16 | # Now, we can use the response_model parameter using only a base model
17 | # rather than having to use the OpenAISchema class
18 | class UserExtract(BaseModel):
19 |     name: str
20 |     age: int
21 | 
22 | 
23 | user: UserExtract = client.chat.completions.create(
24 |     model="mistralai/Mixtral-8x7B-Instruct-v0.1",
25 |     response_model=UserExtract,
26 |     messages=[
27 |         {"role": "user", "content": "Extract jason is 25 years old"},
28 |     ],
29 | )  # type: ignore
30 | 
31 | print(user.model_dump_json(indent=2))
32 | {
33 |     "name": "Jason",
34 |     "age": 25,
35 | }
36 | 


--------------------------------------------------------------------------------
/examples/proscons/run.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from pydantic import BaseModel, Field
 3 | 
 4 | import instructor
 5 | 
 6 | 
 7 | class Character(BaseModel):
 8 |     name: str
 9 |     age: int
10 |     fact: list[str] = Field(..., description="A list of facts about the character")
11 | 
12 | 
13 | # enables `response_model` in create call
14 | client = instructor.from_openai(
15 |     OpenAI(
16 |         base_url="http://localhost:11434/v1",
17 |         api_key="ollama",  # required, but unused
18 |     ),
19 |     mode=instructor.Mode.JSON,
20 | )
21 | 
22 | resp = client.chat.completions.create(
23 |     model="llama2",
24 |     messages=[
25 |         {
26 |             "role": "user",
27 |             "content": "Tell me about the Harry Potter",
28 |         }
29 |     ],
30 |     response_model=Character,
31 | )
32 | print(resp.model_dump_json(indent=2))
33 | """ 
34 | {
35 |   "name": "Harry James Potter",
36 |   "age": 37,
37 |   "fact": [
38 |     "He is the chosen one.",
39 |     "He has a lightning-shaped scar on his forehead.",
40 |     "He is the son of James and Lily Potter.",
41 |     "He attended Hogwarts School of Witchcraft and Wizardry.",
42 |     "He is a skilled wizard and sorcerer.",
43 |     "He fought against Lord Voldemort and his followers.",
44 |     "He has a pet owl named Snowy."
45 |   ]
46 | }
47 | """
48 | 


--------------------------------------------------------------------------------
/examples/query_planner_execution/diagram.py:
--------------------------------------------------------------------------------
1 | from erdantic import erd
2 | 
3 | from query_planner_execution import QueryPlan
4 | 
5 | diagram = erd.create(QueryPlan)
6 | diagram.draw("examples/query_planner_execution/schema.png")
7 | 


--------------------------------------------------------------------------------
/examples/query_planner_execution/schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/query_planner_execution/schema.png


--------------------------------------------------------------------------------
/examples/recursive_filepaths/diagram.py:
--------------------------------------------------------------------------------
1 | import erdantic as erd
2 | 
3 | from parse_recursive_paths import DirectoryTree
4 | 
5 | diagram = erd.create(DirectoryTree)
6 | diagram.draw("examples/parse_recursive_paths/schema.png")
7 | 


--------------------------------------------------------------------------------
/examples/recursive_filepaths/schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/recursive_filepaths/schema.png


--------------------------------------------------------------------------------
/examples/resolving-complex-entities/entity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/resolving-complex-entities/entity.png


--------------------------------------------------------------------------------
/examples/retry/run.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, field_validator
 2 | from openai import OpenAI
 3 | import instructor
 4 | import tenacity
 5 | 
 6 | client = OpenAI()
 7 | client = instructor.from_openai(client)
 8 | 
 9 | 
10 | class User(BaseModel):
11 |     name: str
12 |     age: int
13 | 
14 |     @field_validator("name")
15 |     def name_is_uppercase(cls, v: str):
16 |         assert v.isupper(), "Name must be uppercase"
17 |         return v
18 | 
19 | 
20 | resp = client.messages.create(
21 |     model="gpt-3.5-turbo",
22 |     max_tokens=1024,
23 |     max_retries=tenacity.Retrying(
24 |         stop=tenacity.stop_after_attempt(3),
25 |         before=lambda _: print("before:", _),
26 |         after=lambda _: print("after:", _),
27 |     ),
28 |     messages=[
29 |         {
30 |             "role": "user",
31 |             "content": "Extract John is 18 years old.",
32 |         }
33 |     ],
34 |     response_model=User,
35 | )  # type: ignore
36 | 
37 | assert isinstance(resp, User)
38 | assert resp.name == "JOHN"  # due to validation
39 | assert resp.age == 18
40 | print(resp)
41 | 
42 | """
43 | before: <RetryCallState 4421908816: attempt #1; slept for 0.0; last result: none yet>
44 | after: <RetryCallState 4421908816: attempt #1; slept for 0.0; last result: failed (ValidationError 1 validation error for User
45 | name
46 |   Assertion failed, Name must be uppercase [type=assertion_error, input_value='John', input_type=str]
47 |     For further information visit https://errors.pydantic.dev/2.6/v/assertion_error)>
48 | before: <RetryCallState 4421908816: attempt #2; slept for 0.0; last result: none yet>
49 | 
50 | name='JOHN' age=18
51 | """
52 | 


--------------------------------------------------------------------------------
/examples/safer_sql_example/diagram.py:
--------------------------------------------------------------------------------
1 | import erdantic as erd
2 | 
3 | from safe_sql import SQL
4 | 
5 | diagram = erd.create(SQL)
6 | diagram.draw("examples/safe_sql/schema.png")
7 | 


--------------------------------------------------------------------------------
/examples/safer_sql_example/schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/safer_sql_example/schema.png


--------------------------------------------------------------------------------
/examples/simple-extraction/maybe_user.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | 
 3 | from openai import OpenAI
 4 | from pydantic import BaseModel, Field
 5 | from typing import Optional
 6 | 
 7 | client = instructor.from_openai(OpenAI())
 8 | 
 9 | 
10 | class UserDetail(BaseModel):
11 |     age: int
12 |     name: str
13 |     role: Optional[str] = Field(default=None)
14 | 
15 | 
16 | MaybeUser = instructor.Maybe(UserDetail)
17 | 
18 | 
19 | def get_user_detail(string) -> MaybeUser:  # type: ignore
20 |     return client.chat.completions.create(
21 |         model="gpt-3.5-turbo-0613",
22 |         response_model=MaybeUser,
23 |         messages=[
24 |             {
25 |                 "role": "user",
26 |                 "content": f"Get user details for {string}",
27 |             },
28 |         ],
29 |     )  # type: ignore
30 | 
31 | 
32 | user = get_user_detail("Jason is 25 years old")
33 | print(user.model_dump_json(indent=2))
34 | """
35 | {
36 |   "user": {
37 |     "age": 25,
38 |     "name": "Jason",
39 |     "role": null
40 |   },
41 |   "error": false,
42 |   "message": null
43 | }
44 | """
45 | 
46 | user = get_user_detail("Jason is a 25 years old scientist")
47 | print(user.model_dump_json(indent=2))
48 | """
49 | {
50 |   "user": {
51 |     "age": 25,
52 |     "name": "Jason",
53 |     "role": "scientist"
54 |     },
55 |   "error": false,
56 |   "message": null
57 | }
58 | """
59 | 
60 | # ! notice that the string should not contain anything
61 | # ! but a user and age was still extracted ?!
62 | user = get_user_detail("User not found")
63 | print(user.model_dump_json(indent=2))
64 | """
65 | {
66 |   "user": null,
67 |   "error": true,
68 |   "message": "User not found"
69 | }
70 | """
71 | 
72 | # ! due to the __bool__ method, you can use the MaybeUser object as a boolean
73 | 
74 | if not user:
75 |     print("Detected error")
76 | """
77 | Detected error
78 | """
79 | 


--------------------------------------------------------------------------------
/examples/simple-extraction/user.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | 
 3 | from openai import OpenAI
 4 | from pydantic import BaseModel, Field
 5 | from typing import Optional
 6 | 
 7 | client = instructor.from_openai(OpenAI())
 8 | 
 9 | 
10 | class UserDetail(BaseModel):
11 |     age: int
12 |     name: str
13 |     role: Optional[str] = Field(default=None)
14 | 
15 | 
16 | def get_user_detail(string) -> UserDetail:
17 |     return client.chat.completions.create(
18 |         model="gpt-3.5-turbo-0613",
19 |         response_model=UserDetail,
20 |         messages=[
21 |             {
22 |                 "role": "user",
23 |                 "content": f"Get user details for {string}",
24 |             },
25 |         ],
26 |     )  # type: ignore
27 | 
28 | 
29 | user = get_user_detail("Jason is 25 years old")
30 | print(user.model_dump_json(indent=2))
31 | """
32 | {
33 |   "age": 25,
34 |   "name": "Jason",
35 |   "role": null
36 | }
37 | """
38 | 
39 | user = get_user_detail("Jason is a 25 years old scientist")
40 | print(user.model_dump_json(indent=2))
41 | """
42 | {
43 |   "age": 25,
44 |   "name": "Jason",
45 |   "role": "scientist"
46 | }
47 | """
48 | 
49 | # ! notice that the string should not contain anything
50 | # ! but a user and age was still extracted ?!
51 | user = get_user_detail("User not found")
52 | print(user.model_dump_json(indent=2))
53 | """
54 | {
55 |   "age": 25,
56 |   "name": "John Doe",
57 |   "role": "null"
58 | }
59 | """
60 | 


--------------------------------------------------------------------------------
/examples/sqlmodel/run.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | from openai import OpenAI
 3 | from typing import Optional
 4 | from sqlmodel import Field, SQLModel, create_engine, Session
 5 | 
 6 | 
 7 | # Define the model that will serve as a Table for the database
 8 | class Hero(SQLModel, instructor.OpenAISchema, table=True):
 9 |     id: Optional[int] = Field(default=None, primary_key=True)
10 |     name: str
11 |     secret_name: str
12 |     age: Optional[int] = None
13 | 
14 | 
15 | # Function to query OpenAI for a Hero record
16 | client = instructor.from_openai(OpenAI())
17 | 
18 | 
19 | def create_hero() -> Hero:
20 |     return client.chat.completions.create(
21 |         model="gpt-3.5-turbo",
22 |         response_model=Hero,
23 |         messages=[
24 |             {"role": "user", "content": "Make a new superhero"},
25 |         ],
26 |     )
27 | 
28 | 
29 | # Insert the response into the database
30 | engine = create_engine("sqlite:///database.db")
31 | SQLModel.metadata.create_all(engine)
32 | 
33 | hero = create_hero()
34 | print(hero.model_dump())
35 | 
36 | 
37 | with Session(engine) as session:
38 |     session.add(hero)
39 |     session.commit()
40 | 


--------------------------------------------------------------------------------
/examples/synethic-data/run.py:
--------------------------------------------------------------------------------
 1 | import openai
 2 | import instructor
 3 | from collections.abc import Iterable
 4 | from pydantic import BaseModel, ConfigDict
 5 | 
 6 | client = instructor.from_openai(openai.OpenAI())
 7 | 
 8 | 
 9 | class SyntheticQA(BaseModel):
10 |     question: str
11 |     answer: str
12 | 
13 |     model_config = ConfigDict(
14 |         json_schema_extra={
15 |             "examples": [
16 |                 {"question": "What is the capital of France?", "answer": "Paris"},
17 |                 {
18 |                     "question": "What is the largest planet in our solar system?",
19 |                     "answer": "Jupiter",
20 |                 },
21 |                 {
22 |                     "question": "Who wrote 'To Kill a Mockingbird'?",
23 |                     "answer": "Harper Lee",
24 |                 },
25 |                 {
26 |                     "question": "What element does 'O' represent on the periodic table?",
27 |                     "answer": "Oxygen",
28 |                 },
29 |             ]
30 |         }
31 |     )
32 | 
33 | 
34 | def get_synthetic_data() -> Iterable[SyntheticQA]:
35 |     return client.chat.completions.create(
36 |         model="gpt-3.5-turbo",
37 |         messages=[
38 |             {"role": "system", "content": "Generate synthetic examples"},
39 |             {
40 |                 "role": "user",
41 |                 "content": "Generate the exact examples you see in the examples of this prompt. ",
42 |             },
43 |         ],
44 |         response_model=Iterable[SyntheticQA],
45 |     )  # type: ignore
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     for example in get_synthetic_data():
50 |         print(example)
51 |         """
52 |         question='What is the capital of France?' answer='Paris'
53 |         question='What is the largest planet in our solar system?' answer='Jupiter'
54 |         question="Who wrote 'To Kill a Mockingbird'?" answer='Harper Lee'
55 |         question="What element does 'O' represent on the periodic table?" answer='Oxygen'
56 |         """
57 | 


--------------------------------------------------------------------------------
/examples/task_planner/diagram.py:
--------------------------------------------------------------------------------
1 | import erdantic as erd
2 | 
3 | from task_planner_topological_sort import TaskPlan
4 | 
5 | diagram = erd.create(TaskPlan)
6 | diagram.draw("examples/task_planner_topological_sort/schema.png")
7 | 


--------------------------------------------------------------------------------
/examples/task_planner/schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/task_planner/schema.png


--------------------------------------------------------------------------------
/examples/union/run.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | from typing import Union
 3 | import instructor
 4 | from openai import OpenAI
 5 | 
 6 | 
 7 | class Search(BaseModel):
 8 |     """Search action class with a 'query' field and a process method."""
 9 | 
10 |     query: str = Field(description="The search query")
11 | 
12 |     def process(self):
13 |         """Process the search action."""
14 |         return f"Search method called for query: {self.query}"
15 | 
16 | 
17 | class Lookup(BaseModel):
18 |     """Lookup action class with a 'keyword' field and a process method."""
19 | 
20 |     keyword: str = Field(description="The lookup keyword")
21 | 
22 |     def process(self):
23 |         """Process the lookup action."""
24 |         return f"Lookup method called for keyword: {self.keyword}"
25 | 
26 | 
27 | class Finish(BaseModel):
28 |     """Finish action class with an 'answer' field and a process method."""
29 | 
30 |     answer: str = Field(description="The answer for finishing the process")
31 | 
32 |     def process(self):
33 |         """Process the finish action."""
34 |         return f"Finish method called with answer: {self.answer}"
35 | 
36 | 
37 | # Union of Search, Lookup, and Finish
38 | class TakeAction(BaseModel):
39 |     action: Union[Search, Lookup, Finish]
40 | 
41 |     def process(self):
42 |         """Process the action."""
43 |         return self.action.process()
44 | 
45 | 
46 | try:
47 |     # Enables `response_model`
48 |     client = instructor.from_openai(OpenAI())
49 |     action = client.chat.completions.create(
50 |         model="gpt-3.5-turbo",
51 |         response_model=TakeAction,
52 |         messages=[
53 |             {"role": "user", "content": "Please choose one action"},
54 |         ],
55 |     )
56 |     assert isinstance(action, TakeAction), "The action is not TakeAction"
57 |     print(action.process())
58 | except Exception as e:
59 |     print(f"An error occurred: {e}")
60 | 


--------------------------------------------------------------------------------
/examples/validated-multiclass/output.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "texts": [
 3 |     "What is your phone number?",
 4 |     "What is your email address?",
 5 |     "What is your address?",
 6 |     "What is your privacy policy?"
 7 |   ],
 8 |   "predictions": [
 9 |     {
10 |       "id": 1,
11 |       "name": "phone"
12 |     },
13 |     {
14 |       "id": 2,
15 |       "name": "email"
16 |     },
17 |     {
18 |       "id": 3,
19 |       "name": "address"
20 |     },
21 |     {
22 |       "id": 4,
23 |       "name": "Other"
24 |     }
25 |   ]
26 | }


--------------------------------------------------------------------------------
/examples/validators/allm_validator.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from typing import Annotated
 3 | from pydantic import BaseModel, BeforeValidator
 4 | from instructor import llm_validator, patch
 5 | from openai import AsyncOpenAI
 6 | 
 7 | aclient = AsyncOpenAI()
 8 | 
 9 | patch()
10 | 
11 | 
12 | class QuestionAnswerNoEvil(BaseModel):
13 |     question: str
14 |     answer: Annotated[
15 |         str,
16 |         BeforeValidator(
17 |             llm_validator("don't say objectionable things", allow_override=True)
18 |         ),
19 |     ]
20 | 
21 | 
22 | async def main():
23 |     context = "The according to the devil is to live a life of sin and debauchery."
24 |     question = "What is the meaning of life?"
25 | 
26 |     try:
27 |         qa: QuestionAnswerNoEvil = await aclient.chat.completions.create(
28 |             model="gpt-3.5-turbo",
29 |             response_model=QuestionAnswerNoEvil,
30 |             max_retries=2,
31 |             messages=[
32 |                 {
33 |                     "role": "system",
34 |                     "content": "You are a system that answers questions based on the context. Answer exactly what the question asks using the context.",
35 |                 },
36 |                 {
37 |                     "role": "user",
38 |                     "content": f"using the context: {context}\n\nAnswer the following question: {question}",
39 |                 },
40 |             ],
41 |         )  # type: ignore
42 |         print(qa)
43 |     except Exception as e:
44 |         print(e)
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     asyncio.run(main())
49 | 


--------------------------------------------------------------------------------
/examples/validators/annotator.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | from pydantic import BaseModel, ValidationError
 3 | from pydantic.functional_validators import AfterValidator
 4 | 
 5 | 
 6 | def name_must_contain_space(v: str) -> str:
 7 |     if " " not in v:
 8 |         raise ValueError("name must be a first and last name separated by a space")
 9 |     return v.lower()
10 | 
11 | 
12 | class UserDetail(BaseModel):
13 |     age: int
14 |     name: Annotated[str, AfterValidator(name_must_contain_space)]
15 | 
16 | 
17 | # Example 1) Valid input, notice that the name is lowercased
18 | person: UserDetail = UserDetail(age=29, name="Jason Liu")
19 | print(person.model_dump_json(indent=2))
20 | """
21 | {
22 |     "age": 29,
23 |     "name": "jason liu"
24 | }
25 | """
26 | 
27 | # Example 2) Invalid input, we'll get a validation error
28 | # In the future this validation error will be raised by the API and
29 | # used by the LLM to generate a better response
30 | try:
31 |     person: UserDetail = UserDetail(age=29, name="Jason")
32 | except ValidationError as e:
33 |     print(e)
34 |     """
35 |     1 validation error for UserDetail
36 |     name
37 |         Value error, name must be a first and last name separated by a space [type=value_error, input_value='Jason', input_type=str]
38 |         For further information visit https://errors.pydantic.dev/2.3/v/value_error
39 |     """
40 | 


--------------------------------------------------------------------------------
/examples/validators/citations.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | from pydantic import BaseModel, ValidationError, ValidationInfo, AfterValidator
 3 | from openai import OpenAI
 4 | import instructor
 5 | 
 6 | client = instructor.from_openai(OpenAI())
 7 | 
 8 | 
 9 | def citation_exists(v: str, info: ValidationInfo):
10 |     context = info.context
11 |     if context:
12 |         context = context.get("text_chunk")
13 |         if v not in context:
14 |             raise ValueError(f"Citation `{v}` not found in text")
15 |     return v
16 | 
17 | 
18 | Citation = Annotated[str, AfterValidator(citation_exists)]
19 | 
20 | 
21 | class AnswerWithCitation(BaseModel):
22 |     answer: str
23 |     citation: Citation
24 | 
25 | 
26 | try:
27 |     q = "Are blue berries high in protein?"
28 |     text_chunk = """
29 |     Blueberries are a good source of vitamin K.
30 |     They also contain vitamin C, fibre, manganese and other antioxidants (notably anthocyanins).    
31 |     """
32 | 
33 |     resp = client.chat.completions.create(
34 |         model="gpt-3.5-turbo",
35 |         response_model=AnswerWithCitation,
36 |         messages=[
37 |             {
38 |                 "role": "user",
39 |                 "content": f"Answer the question `{q}` using the text chunk\n`{text_chunk}`",
40 |             },
41 |         ],
42 |         validation_context={"text_chunk": text_chunk},
43 |     )  # type: ignore
44 |     print(resp.model_dump_json(indent=2))
45 | except ValidationError as e:
46 |     print(e)
47 | 


--------------------------------------------------------------------------------
/examples/validators/competitors.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | from pydantic import BaseModel, ValidationError, AfterValidator
 3 | from openai import OpenAI
 4 | 
 5 | import instructor
 6 | 
 7 | client = instructor.from_openai(OpenAI())
 8 | 
 9 | 
10 | def no_competitors(v: str) -> str:
11 |     # does not allow the competitors of mcdonalds
12 |     competitors = ["burger king", "wendy's", "carl's jr", "jack in the box"]
13 |     for competitor in competitors:
14 |         if competitor in v.lower():
15 |             raise ValueError(
16 |                 f"""Let them know that you are work for and are only allowed to talk about mcdonalds.
17 |                 Do not apologize. Do not even mention `{competitor}` since they are a a competitor of McDonalds"""
18 |             )
19 |     return v
20 | 
21 | 
22 | class Response(BaseModel):
23 |     message: Annotated[str, AfterValidator(no_competitors)]
24 | 
25 | 
26 | try:
27 |     resp = client.chat.completions.create(
28 |         model="gpt-3.5-turbo",
29 |         response_model=Response,
30 |         max_retries=2,
31 |         messages=[
32 |             {
33 |                 "role": "user",
34 |                 "content": "What is your favourite order at burger king?",
35 |             },
36 |         ],
37 |     )  # type: ignore
38 |     print(resp.model_dump_json(indent=2))
39 | except ValidationError as e:
40 |     print(e)
41 | 


--------------------------------------------------------------------------------
/examples/validators/field_validator.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, ValidationError, field_validator
 2 | 
 3 | 
 4 | class UserDetail(BaseModel):
 5 |     age: int
 6 |     name: str
 7 | 
 8 |     @field_validator("name", mode="before")
 9 |     def name_must_contain_space(cls, v):
10 |         """
11 |         This validator will be called after the default validator,
12 |         and will raise a validation error if the name does not contain a space.
13 |         then it will set the name to be lower case
14 |         """
15 |         if " " not in v:
16 |             raise ValueError("name be a first and last name separated by a space")
17 |         return v.lower()
18 | 
19 | 
20 | # Example 1) Valid input, notice that the name is lowercased
21 | person = UserDetail(age=29, name="Jason Liu")
22 | print(person.model_dump_json(indent=2))
23 | """
24 | {
25 |     "age": 29,
26 |     "name": "jason liu"
27 | }
28 | """
29 | 
30 | # Example 2) Invalid input, we'll get a validation error
31 | # In the future this validation error will be raised by the API and
32 | # used by the LLM to generate a better response
33 | try:
34 |     person = UserDetail(age=29, name="Jason")
35 | except ValidationError as e:
36 |     print(e)
37 |     """
38 |     1 validation error for UserDetail 
39 |         name
40 |     Value error, must contain a space [type=value_error, input_value='Jason', input_type=str]
41 |         For further information visit https://errors.pydantic.dev/2.3/v/value_error
42 |     """
43 | 


--------------------------------------------------------------------------------
/examples/validators/just_a_guy.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, ValidationError, field_validator, ValidationInfo
 2 | 
 3 | 
 4 | class AnswerWithCitation(BaseModel):
 5 |     answer: str
 6 |     citation: str
 7 | 
 8 |     @field_validator("citation")
 9 |     @classmethod
10 |     def remove_stopwords(cls, v: str, info: ValidationInfo):
11 |         context = info.context
12 |         if context:
13 |             text_chunks = context.get("text_chunk")
14 |             if v not in text_chunks:
15 |                 raise ValueError(f"Citation `{v}` not found in text chunks")
16 |         return v
17 | 
18 | 
19 | try:
20 |     AnswerWithCitation.model_validate(
21 |         {"answer": "Jason is a cool guy", "citation": "Jason is cool"},
22 |         context={"text_chunk": "Jason is just a guy"},
23 |     )
24 | except ValidationError as e:
25 |     print(e)
26 |     """
27 |     1 validation error for AnswerWithCitation
28 |     citation
29 |     Value error, Citation `Jason is cool`` not found in text chunks [type=value_error, input_value='Jason is cool', input_type=str]
30 |         For further information visit https://errors.pydantic.dev/2.4/v/value_error
31 |     """
32 | 


--------------------------------------------------------------------------------
/examples/validators/moderation.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | 
 3 | from instructor import openai_moderation
 4 | 
 5 | from typing import Annotated
 6 | from pydantic import BaseModel, AfterValidator
 7 | from openai import OpenAI
 8 | 
 9 | client = instructor.from_openai(OpenAI())
10 | 
11 | 
12 | class Response(BaseModel):
13 |     message: Annotated[str, AfterValidator(openai_moderation(client=client))]
14 | 
15 | 
16 | response = Response(message="I want to make them suffer the consequences")
17 | 


--------------------------------------------------------------------------------
/examples/watsonx/watsonx.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import litellm
 4 | from litellm import completion
 5 | from pydantic import BaseModel, Field
 6 | 
 7 | import instructor
 8 | from instructor import Mode
 9 | 
10 | litellm.drop_params = True  # watsonx.ai doesn't support `json_mode`
11 | 
12 | os.environ["WATSONX_URL"] = "https://us-south.ml.cloud.ibm.com"
13 | os.environ["WATSONX_API_KEY"] = ""
14 | os.environ["WATSONX_PROJECT_ID"] = ""
15 | # Additional options: https://docs.litellm.ai/docs/providers/watsonx
16 | 
17 | 
18 | class Company(BaseModel):
19 |     name: str = Field(description="name of the company")
20 |     year_founded: int = Field(description="year the company was founded")
21 | 
22 | 
23 | client = instructor.from_litellm(completion, mode=Mode.JSON)
24 | 
25 | resp = client.chat.completions.create(
26 |     model="watsonx/meta-llama/llama-3-8b-instruct",
27 |     max_tokens=1024,
28 |     messages=[
29 |         {
30 |             "role": "user",
31 |             "content": """\
32 | Given the following text, create a Company object:
33 | 
34 | IBM was founded in 1911 as the Computing-Tabulating-Recording Company (CTR), a holding company of manufacturers of record-keeping and measuring systems.
35 | """,
36 |         }
37 |     ],
38 |     project_id=os.environ["WATSONX_PROJECT_ID"],
39 |     response_model=Company,
40 | )
41 | 
42 | print(resp.model_dump_json(indent=2))
43 | """
44 | {
45 |   "name": "IBM",
46 |   "year_founded": 1911
47 | }
48 | """
49 | 


--------------------------------------------------------------------------------
/instructor/_types/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/instructor/_types/__init__.py


--------------------------------------------------------------------------------
/instructor/_types/_alias.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from typing_extensions import TypeAlias
 4 | 
 5 | ModelNames: TypeAlias = Literal[
 6 |     "gpt-4o",
 7 |     "gpt-4-0125-preview",
 8 |     "gpt-4-turbo-preview",
 9 |     "gpt-4-1106-preview",
10 |     "gpt-4-vision-preview",
11 |     "gpt-4",
12 |     "gpt-4-0314",
13 |     "gpt-4-0613",
14 |     "gpt-4-32k",
15 |     "gpt-4-32k-0314",
16 |     "gpt-4-32k-0613",
17 |     "gpt-3.5-turbo",
18 |     "gpt-3.5-turbo-16k",
19 |     "gpt-3.5-turbo-0301",
20 |     "gpt-3.5-turbo-0613",
21 |     "gpt-3.5-turbo-1106",
22 |     "gpt-3.5-turbo-0125",
23 |     "gpt-3.5-turbo-16k-0613",
24 |     "gpt-3.5-turbo-instruct",
25 |     "text-embedding-ada-002",
26 |     "text-embedding-ada-002-v2",
27 |     "text-embedding-3-small",
28 |     "text-embedding-3-large",
29 | ]
30 | 


--------------------------------------------------------------------------------
/instructor/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/instructor/cli/__init__.py


--------------------------------------------------------------------------------
/instructor/cli/cli.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import typer
 3 | from typer import Typer, launch
 4 | import instructor.cli.jobs as jobs
 5 | import instructor.cli.files as files
 6 | import instructor.cli.usage as usage
 7 | import instructor.cli.deprecated_hub as hub
 8 | import instructor.cli.batch as batch
 9 | 
10 | app: Typer = typer.Typer()
11 | 
12 | app.add_typer(jobs.app, name="jobs", help="Monitor and create fine tuning jobs")
13 | app.add_typer(files.app, name="files", help="Manage files on OpenAI's servers")
14 | app.add_typer(usage.app, name="usage", help="Check OpenAI API usage data")
15 | app.add_typer(
16 |     hub.app, name="hub", help="[DEPRECATED] The instructor hub is no longer available"
17 | )
18 | app.add_typer(batch.app, name="batch", help="Manage OpenAI Batch jobs")
19 | 
20 | 
21 | @app.command()
22 | def docs(
23 |     query: Optional[str] = typer.Argument(None, help="Search the documentation"),
24 | ) -> None:
25 |     """
26 |     Open the instructor documentation website.
27 |     """
28 |     if query:
29 |         launch(f"https://python.useinstructor.com/?q={query}")
30 |     else:
31 |         launch("https://python.useinstructor.com/")
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     app()
36 | 


--------------------------------------------------------------------------------
/instructor/cli/deprecated_hub.py:
--------------------------------------------------------------------------------
 1 | from typer import Exit, echo, Typer
 2 | 
 3 | app: Typer = Typer(help="Instructor Hub CLI (Deprecated)")
 4 | 
 5 | 
 6 | @app.command(name="hub")
 7 | def hub() -> None:
 8 |     """
 9 |     This command has been deprecated. The instructor hub is no longer available.
10 |     Please refer to our cookbook examples at https://python.useinstructor.com/examples/
11 |     """
12 |     echo(
13 |         "The instructor hub has been deprecated. Please refer to our cookbook examples at https://python.useinstructor.com/examples/"
14 |     )
15 |     raise Exit(1)
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     app()
20 | 


--------------------------------------------------------------------------------
/instructor/client_groq.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import overload, Any
 4 | 
 5 | import groq
 6 | import instructor
 7 | 
 8 | 
 9 | @overload
10 | def from_groq(
11 |     client: groq.Groq,
12 |     mode: instructor.Mode = instructor.Mode.TOOLS,
13 |     **kwargs: Any,
14 | ) -> instructor.Instructor: ...
15 | 
16 | 
17 | @overload
18 | def from_groq(
19 |     client: groq.AsyncGroq,
20 |     mode: instructor.Mode = instructor.Mode.TOOLS,
21 |     **kwargs: Any,
22 | ) -> instructor.AsyncInstructor: ...
23 | 
24 | 
25 | def from_groq(
26 |     client: groq.Groq | groq.AsyncGroq,
27 |     mode: instructor.Mode = instructor.Mode.TOOLS,
28 |     **kwargs: Any,
29 | ) -> instructor.Instructor | instructor.AsyncInstructor:
30 |     valid_modes = {
31 |         instructor.Mode.JSON,
32 |         instructor.Mode.TOOLS,
33 |     }
34 | 
35 |     if mode not in valid_modes:
36 |         from instructor.exceptions import ModeError
37 | 
38 |         raise ModeError(
39 |             mode=str(mode), provider="Groq", valid_modes=[str(m) for m in valid_modes]
40 |         )
41 | 
42 |     if not isinstance(client, (groq.Groq, groq.AsyncGroq)):
43 |         from instructor.exceptions import ClientError
44 | 
45 |         raise ClientError(
46 |             f"Client must be an instance of groq.Groq or groq.AsyncGroq. "
47 |             f"Got: {type(client).__name__}"
48 |         )
49 | 
50 |     if isinstance(client, groq.Groq):
51 |         return instructor.Instructor(
52 |             client=client,
53 |             create=instructor.patch(create=client.chat.completions.create, mode=mode),
54 |             provider=instructor.Provider.GROQ,
55 |             mode=mode,
56 |             **kwargs,
57 |         )
58 | 
59 |     else:
60 |         return instructor.AsyncInstructor(
61 |             client=client,
62 |             create=instructor.patch(create=client.chat.completions.create, mode=mode),
63 |             provider=instructor.Provider.GROQ,
64 |             mode=mode,
65 |             **kwargs,
66 |         )
67 | 


--------------------------------------------------------------------------------
/instructor/client_writer.py:
--------------------------------------------------------------------------------
 1 | # Future imports to ensure compatibility with Python 3.9
 2 | from __future__ import annotations
 3 | 
 4 | 
 5 | import instructor
 6 | from writerai import AsyncWriter, Writer
 7 | from typing import overload, Any
 8 | 
 9 | 
10 | @overload
11 | def from_writer(
12 |     client: Writer,
13 |     mode: instructor.Mode = instructor.Mode.WRITER_TOOLS,
14 |     **kwargs: Any,
15 | ) -> instructor.Instructor: ...
16 | 
17 | 
18 | @overload
19 | def from_writer(
20 |     client: AsyncWriter,
21 |     mode: instructor.Mode = instructor.Mode.WRITER_TOOLS,
22 |     **kwargs: Any,
23 | ) -> instructor.AsyncInstructor: ...
24 | 
25 | 
26 | def from_writer(
27 |     client: Writer | AsyncWriter,
28 |     mode: instructor.Mode = instructor.Mode.WRITER_TOOLS,
29 |     **kwargs: Any,
30 | ) -> instructor.client.Instructor | instructor.client.AsyncInstructor:
31 |     valid_modes = {instructor.Mode.WRITER_TOOLS, instructor.Mode.WRITER_JSON}
32 | 
33 |     if mode not in valid_modes:
34 |         from instructor.exceptions import ModeError
35 | 
36 |         raise ModeError(
37 |             mode=str(mode), provider="Writer", valid_modes=[str(m) for m in valid_modes]
38 |         )
39 | 
40 |     if not isinstance(client, (Writer, AsyncWriter)):
41 |         from instructor.exceptions import ClientError
42 | 
43 |         raise ClientError(
44 |             f"Client must be an instance of Writer or AsyncWriter. "
45 |             f"Got: {type(client).__name__}"
46 |         )
47 | 
48 |     if isinstance(client, Writer):
49 |         return instructor.Instructor(
50 |             client=client,
51 |             create=instructor.patch(create=client.chat.chat, mode=mode),
52 |             provider=instructor.Provider.WRITER,
53 |             mode=mode,
54 |             **kwargs,
55 |         )
56 | 
57 |     return instructor.AsyncInstructor(
58 |         client=client,
59 |         create=instructor.patch(create=client.chat.chat, mode=mode),
60 |         provider=instructor.Provider.WRITER,
61 |         mode=mode,
62 |         **kwargs,
63 |     )
64 | 


--------------------------------------------------------------------------------
/instructor/dsl/__init__.py:
--------------------------------------------------------------------------------
 1 | from .iterable import IterableModel
 2 | from .maybe import Maybe
 3 | from .partial import Partial
 4 | from .validators import llm_validator, openai_moderation
 5 | from .citation import CitationMixin
 6 | from .simple_type import is_simple_type, ModelAdapter
 7 | 
 8 | __all__ = [  # noqa: F405
 9 |     "CitationMixin",
10 |     "IterableModel",
11 |     "Maybe",
12 |     "Partial",
13 |     "llm_validator",
14 |     "openai_moderation",
15 |     "is_simple_type",
16 |     "ModelAdapter",
17 | ]
18 | 


--------------------------------------------------------------------------------
/instructor/py.typed:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pyrightconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "include": [
 3 |     "instructor"
 4 |   ],
 5 |   "exclude": [
 6 |     "instructor/client_bedrock.py",
 7 |     "instructor/client_cerebras.py"
 8 |   ],
 9 |   "typeCheckingMode": "basic"
10 | } 


--------------------------------------------------------------------------------
/requirements-doc.txt:
--------------------------------------------------------------------------------
1 | mkdocs
2 | cairosvg
3 | pillow
4 | mkdocs-minify-plugin
5 | mkdocstrings 
6 | mkdocstrings-python 
7 | mkdocs-jupyter 
8 | mkdocs-redirects


--------------------------------------------------------------------------------
/requirements-examples.txt:
--------------------------------------------------------------------------------
 1 | openai>=1.1.0
 2 | pydantic
 3 | docstring-parser
 4 | rich
 5 | aiohttp
 6 | ruff==0.11.13
 7 | pre-commit==4.2.0
 8 | pyright==1.1.401
 9 | typer
10 | cohere
11 | datasets
12 | trafilatura


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/__init__.py


--------------------------------------------------------------------------------
/tests/assets/gettysburg.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/assets/gettysburg.wav


--------------------------------------------------------------------------------
/tests/assets/image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/assets/image.jpg


--------------------------------------------------------------------------------
/tests/assets/invoice.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/assets/invoice.pdf


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | 
3 | # Support .env for local development
4 | load_dotenv()
5 | 


--------------------------------------------------------------------------------
/tests/dsl/test_simple_type.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from instructor.dsl.simple_type import is_simple_type
 3 | from pydantic import BaseModel
 4 | from enum import Enum
 5 | import typing
 6 | 
 7 | 
 8 | class SimpleTypeTests(unittest.TestCase):
 9 |     def test_is_simple_type_with_base_model(self):
10 |         class MyModel(BaseModel):
11 |             label: str
12 | 
13 |         self.assertFalse(is_simple_type(MyModel))
14 | 
15 |     def test_is_simple_type_with_str(self):
16 |         self.assertTrue(is_simple_type(str))
17 | 
18 |     def test_is_simple_type_with_int(self):
19 |         self.assertTrue(is_simple_type(int))
20 | 
21 |     def test_is_simple_type_with_float(self):
22 |         self.assertTrue(is_simple_type(float))
23 | 
24 |     def test_is_simple_type_with_bool(self):
25 |         self.assertTrue(is_simple_type(bool))
26 | 
27 |     def test_is_simple_type_with_enum(self):
28 |         class MyEnum(Enum):
29 |             VALUE = 1
30 | 
31 |         self.assertTrue(is_simple_type(MyEnum))
32 | 
33 |     def test_is_simple_type_with_annotated(self):
34 |         AnnotatedType = typing.Annotated[int, "example"]
35 |         self.assertTrue(is_simple_type(AnnotatedType))
36 | 
37 |     def test_is_simple_type_with_literal(self):
38 |         LiteralType = typing.Literal[1, 2, 3]
39 |         self.assertTrue(is_simple_type(LiteralType))
40 | 
41 |     def test_is_simple_type_with_union(self):
42 |         UnionType = typing.Union[int, str]
43 |         self.assertTrue(is_simple_type(UnionType))
44 | 
45 |     def test_is_simple_type_with_iterable(self):
46 |         IterableType = typing.Iterable[int]
47 |         self.assertFalse(is_simple_type(IterableType))
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     unittest.main()
52 | 


--------------------------------------------------------------------------------
/tests/dsl/test_simple_type_fix.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | from typing import Union, List  # noqa: UP035
 4 | from typing import get_origin, get_args
 5 | from instructor.dsl.simple_type import is_simple_type
 6 | 
 7 | 
 8 | class TestSimpleTypeFix(unittest.TestCase):
 9 |     def test_list_with_union_type(self):
10 |         """Test that list[int | str] is correctly identified as a simple type."""
11 |         # This is the type that was failing in Python 3.10
12 |         if sys.version_info < (3, 10):
13 |             self.skipTest("Union pipe syntax is only available in Python 3.10+")
14 |         response_model = list[int | str]
15 |         self.assertTrue(
16 |             is_simple_type(response_model),
17 |             f"list[int | str] should be a simple type in Python {sys.version_info.major}.{sys.version_info.minor}. Instead it was identified as {type(response_model)} with origin {get_origin(response_model)} and args {get_args(response_model)}",
18 |         )
19 | 
20 |     def test_list_with_union_type_alternative_syntax(self):
21 |         """Test that List[Union[int, str]] is correctly identified as a simple type."""
22 |         # Alternative syntax
23 |         response_model = List[Union[int, str]]  # noqa: UP006
24 |         self.assertTrue(
25 |             is_simple_type(response_model),
26 |             f"List[Union[int, str]] should be a simple type in Python {sys.version_info.major}.{sys.version_info.minor}",
27 |         )
28 | 
29 | 


--------------------------------------------------------------------------------
/tests/llm/test_anthropic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_anthropic/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_anthropic/conftest.py:
--------------------------------------------------------------------------------
 1 | # conftest.py
 2 | from anthropic import AsyncAnthropic, Anthropic
 3 | import pytest
 4 | import os
 5 | 
 6 | try:
 7 |     import braintrust
 8 | 
 9 |     wrap_anthropic = braintrust.wrap_anthropic
10 | except ImportError:
11 | 
12 |     def wrap_anthropic(x):
13 |         return x
14 | 
15 | 
16 | @pytest.fixture(scope="session")
17 | def client():
18 |     if os.environ.get("BRAINTRUST_API_KEY"):
19 |         yield wrap_anthropic(
20 |             Anthropic(
21 |                 api_key=os.environ["BRAINTRUST_API_KEY"],
22 |                 base_url="https://braintrustproxy.com/v1",
23 |             )
24 |         )
25 |     else:
26 |         yield Anthropic()
27 | 
28 | 
29 | @pytest.fixture(scope="session")
30 | def aclient():
31 |     if os.environ.get("BRAINTRUST_API_KEY"):
32 |         yield wrap_anthropic(
33 |             AsyncAnthropic(
34 |                 api_key=os.environ["BRAINTRUST_API_KEY"],
35 |                 base_url="https://braintrustproxy.com/v1",
36 |             )
37 |         )
38 |     else:
39 |         yield AsyncAnthropic()
40 | 


--------------------------------------------------------------------------------
/tests/llm/test_anthropic/test_reasoning.py:
--------------------------------------------------------------------------------
 1 | import anthropic
 2 | import pytest
 3 | import instructor
 4 | from pydantic import BaseModel
 5 | 
 6 | 
 7 | class Answer(BaseModel):
 8 |     answer: float
 9 | 
10 | 
11 | modes = [
12 |     instructor.Mode.ANTHROPIC_REASONING_TOOLS,
13 |     instructor.Mode.ANTHROPIC_JSON,
14 | ]
15 | 
16 | 
17 | @pytest.mark.parametrize("mode", modes)
18 | def test_reasoning(mode):
19 |     anthropic_client = anthropic.Anthropic()
20 |     client = instructor.from_anthropic(anthropic_client, mode=mode)
21 |     response = client.chat.completions.create(
22 |         model="claude-3-7-sonnet-latest",
23 |         response_model=Answer,
24 |         messages=[
25 |             {
26 |                 "role": "user",
27 |                 "content": "Which is larger, 9.11 or 9.8",
28 |             },
29 |         ],
30 |         temperature=1,
31 |         max_tokens=2000,
32 |         thinking={"type": "enabled", "budget_tokens": 1024},
33 |     )
34 | 
35 |     # Assertions to validate the response
36 |     assert isinstance(response, Answer)
37 |     assert response.answer == 9.8
38 | 


--------------------------------------------------------------------------------
/tests/llm/test_anthropic/util.py:
--------------------------------------------------------------------------------
1 | import instructor
2 | 
3 | models = ["claude-3-5-haiku-20241022"]
4 | modes = [
5 |     instructor.Mode.ANTHROPIC_TOOLS,
6 | ]
7 | 


--------------------------------------------------------------------------------
/tests/llm/test_cerebras/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_cerebras/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_cohere/conftest.py:
--------------------------------------------------------------------------------
 1 | # conftest.py
 2 | from cohere import Client, AsyncClient
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.fixture(scope="session")
 7 | def client():
 8 |     yield Client()
 9 | 
10 | 
11 | @pytest.fixture(scope="session")
12 | def aclient():
13 |     yield AsyncClient()
14 | 


--------------------------------------------------------------------------------
/tests/llm/test_cohere/test_none_response.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from instructor import from_cohere
 3 | 
 4 | 
 5 | def test_none_response_model(client):
 6 |     client = from_cohere(client, model_name="command-r", max_tokens=1000)
 7 | 
 8 |     response = client.messages.create(
 9 |         messages=[{"role": "user", "content": "Tell me about your day"}],
10 |         response_model=None,
11 |         temperature=0,
12 |     )
13 | 
14 |     assert response.text
15 | 
16 | 
17 | @pytest.mark.asyncio()
18 | async def test_none_response_model_async(aclient):
19 |     async_client = from_cohere(aclient, model_name="command-r", max_tokens=1000)
20 | 
21 |     response = await async_client.messages.create(
22 |         messages=[{"role": "user", "content": "Tell me about your day"}],
23 |         response_model=None,
24 |         temperature=0,
25 |     )
26 | 
27 |     assert response.text
28 | 


--------------------------------------------------------------------------------
/tests/llm/test_fireworks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_fireworks/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_fireworks/test_format.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | from fireworks.client import Fireworks, AsyncFireworks
 3 | from pydantic import BaseModel
 4 | import pytest
 5 | from .util import modes
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("mode, model", modes)
 9 | def test_fireworks_sync(mode: instructor.Mode, model: str):
10 |     class User(BaseModel):
11 |         name: str
12 |         age: int
13 | 
14 |     client = instructor.from_fireworks(Fireworks(), mode=mode)
15 | 
16 |     resp = client.chat.completions.create(
17 |         model=model,
18 |         messages=[
19 |             {
20 |                 "role": "user",
21 |                 "content": "Extract a user from this sentence : {{ name }} is {{ age }} and lives in Singapore",
22 |             },
23 |         ],
24 |         context={
25 |             "name": "Ivan",
26 |             "age": 27,
27 |         },
28 |         response_model=User,
29 |     )
30 | 
31 |     assert resp.name.lower() == "ivan"
32 |     assert resp.age == 27
33 | 
34 | 
35 | @pytest.mark.parametrize("mode, model", modes)
36 | @pytest.mark.asyncio
37 | async def test_fireworks_async(mode: instructor.Mode, model: str):
38 |     class User(BaseModel):
39 |         name: str
40 |         age: int
41 | 
42 |     client = instructor.from_fireworks(AsyncFireworks(), mode=mode)
43 | 
44 |     resp = await client.chat.completions.create(
45 |         model=model,
46 |         messages=[
47 |             {
48 |                 "role": "user",
49 |                 "content": "Extract a user from this sentence : {{ name }} is {{ age }} and lives in Singapore",
50 |             },
51 |         ],
52 |         context={
53 |             "name": "Ivan",
54 |             "age": 27,
55 |         },
56 |         response_model=User,
57 |     )
58 | 
59 |     assert resp.name.lower() == "ivan"
60 |     assert resp.age == 27
61 | 


--------------------------------------------------------------------------------
/tests/llm/test_fireworks/util.py:
--------------------------------------------------------------------------------
1 | import instructor
2 | 
3 | modes = [
4 |     (instructor.Mode.FIREWORKS_JSON, "accounts/fireworks/models/llama-v3-70b-instruct"),
5 |     (instructor.Mode.FIREWORKS_TOOLS, "accounts/fireworks/models/firefunction-v1"),
6 | ]
7 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_gemini/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_gemini/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from google import generativeai as genai
 4 | 
 5 | 
 6 | @pytest.fixture(scope="session", autouse=True)
 7 | def configure_genai():
 8 |     api_key = os.getenv("GOOGLE_API_KEY")
 9 |     if not api_key:
10 |         pytest.skip("GOOGLE_API_KEY environment variable not set")
11 |     genai.configure(api_key=api_key)
12 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_gemini/evals/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_gemini/evals/test_extract_users.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from itertools import product
 3 | from pydantic import BaseModel
 4 | import instructor
 5 | import google.generativeai as genai
 6 | from ..util import models, modes
 7 | 
 8 | 
 9 | class UserDetails(BaseModel):
10 |     name: str
11 |     age: int
12 | 
13 | 
14 | # Lists for models, test data, and modes
15 | test_data = [
16 |     ("Jason is 10", "Jason", 10),
17 |     ("Alice is 25", "Alice", 25),
18 |     ("Bob is 35", "Bob", 35),
19 | ]
20 | 
21 | 
22 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
23 | def test_extract(model, data, mode):
24 |     sample_data, expected_name, expected_age = data
25 | 
26 |     client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode)
27 | 
28 |     # Calling the extract function with the provided model, sample data, and mode
29 |     response = client.chat.completions.create(
30 |         response_model=UserDetails,
31 |         messages=[
32 |             {"role": "user", "content": sample_data},
33 |         ],
34 |     )
35 | 
36 |     # Assertions
37 |     assert (
38 |         response.name == expected_name
39 |     ), f"Expected name {expected_name}, got {response.name}"
40 |     assert (
41 |         response.age == expected_age
42 |     ), f"Expected age {expected_age}, got {response.age}"
43 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/evals/test_sentiment_analysis.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | from itertools import product
 3 | from pydantic import BaseModel
 4 | import pytest
 5 | import instructor
 6 | import google.generativeai as genai
 7 | from ..util import models, modes
 8 | 
 9 | 
10 | class Sentiment(str, enum.Enum):
11 |     POSITIVE = "positive"
12 |     NEGATIVE = "negative"
13 |     NEUTRAL = "neutral"
14 | 
15 | 
16 | class SentimentAnalysis(BaseModel):
17 |     sentiment: Sentiment
18 | 
19 | 
20 | test_data = [
21 |     (
22 |         "I absolutely love this product! It has exceeded all my expectations.",
23 |         Sentiment.POSITIVE,
24 |     ),
25 |     (
26 |         "The service was terrible. I will never use this company again.",
27 |         Sentiment.NEGATIVE,
28 |     ),
29 |     (
30 |         "The movie was okay. It had some good moments but overall it was average.",
31 |         Sentiment.NEUTRAL,
32 |     ),
33 | ]
34 | 
35 | 
36 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
37 | def test_sentiment_analysis(model, data, mode):
38 |     sample_data, expected_sentiment = data
39 | 
40 |     client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode)
41 | 
42 |     response = client.chat.completions.create(
43 |         response_model=SentimentAnalysis,
44 |         messages=[
45 |             {
46 |                 "role": "system",
47 |                 "content": "You are a sentiment analysis model. Analyze the sentiment of the given text and provide the sentiment (positive, negative, or neutral).",
48 |             },
49 |             {"role": "user", "content": sample_data},
50 |         ],
51 |     )
52 | 
53 |     assert response.sentiment == expected_sentiment
54 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/test_files/sample.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_gemini/test_files/sample.mp3


--------------------------------------------------------------------------------
/tests/llm/test_gemini/test_format.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | import google.generativeai as genai
 3 | from pydantic import BaseModel
 4 | from .util import models, modes
 5 | 
 6 | 
 7 | class User(BaseModel):
 8 |     first_name: str
 9 |     age: int
10 | 
11 | 
12 | import pytest
13 | from itertools import product
14 | 
15 | 
16 | @pytest.mark.parametrize("model, mode, is_list", product(models, modes, [True, False]))
17 | def test_format_string(model: str, mode: instructor.Mode, is_list: bool):
18 |     client = instructor.from_gemini(
19 |         client=genai.GenerativeModel(
20 |             model_name=model,
21 |             system_instruction="You are a helpful assistant that excels at extracting user information.",
22 |         ),
23 |         mode=mode,
24 |     )
25 | 
26 |     content = (
27 |         ["Extract {{name}} is {{age}} years old."]
28 |         if is_list
29 |         else "Extract {{name}} is {{age}} years old."
30 |     )
31 | 
32 |     # note that client.chat.completions.create will also work
33 |     resp = client.messages.create(
34 |         messages=[
35 |             {
36 |                 "role": "user",
37 |                 "content": content,
38 |             }
39 |         ],
40 |         response_model=User,
41 |         context={"name": "Jason", "age": 25},
42 |     )
43 | 
44 |     assert isinstance(resp, User)
45 |     assert resp.first_name == "Jason"
46 |     assert resp.age == 25
47 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/test_list_content.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | import google.generativeai as genai
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class User(BaseModel):
 7 |     name: str
 8 |     age: int
 9 | 
10 | 
11 | class UserList(BaseModel):
12 |     items: list[User]
13 | 
14 | 
15 | def test_list_of_strings():
16 |     client = instructor.from_gemini(
17 |         genai.GenerativeModel("gemini-1.5-flash-latest"),
18 |         mode=instructor.Mode.GEMINI_JSON,
19 |     )
20 | 
21 |     users = [
22 |         {
23 |             "name": "Jason",
24 |             "age": 25,
25 |         },
26 |         {
27 |             "name": "Elizabeth",
28 |             "age": 12,
29 |         },
30 |         {
31 |             "name": "Chris",
32 |             "age": 27,
33 |         },
34 |     ]
35 | 
36 |     prompt = """
37 |     Extract a list of users from the following text:
38 | 
39 |     {% for user in users %}
40 |     - Name: {{ user.name }}, Age: {{ user.age }}
41 |     {% endfor %}
42 |     """
43 | 
44 |     result = client.chat.completions.create(
45 |         response_model=UserList,
46 |         messages=[
47 |             {"role": "user", "content": prompt},
48 |         ],
49 |         context={"users": users},
50 |     )
51 | 
52 |     assert isinstance(result, UserList), "Result should be an instance of UserList"
53 |     assert isinstance(result.items, list), "items should be a list"
54 |     assert len(result.items) == 3, "List should contain 3 items"
55 | 
56 |     names = [item.name.upper() for item in result.items]
57 |     assert "JASON" in names, "'JASON' should be in the list"
58 |     assert "ELIZABETH" in names, "'ELIZABETH' should be in the list"
59 |     assert "CHRIS" in names, "'CHRIS' should be in the list"
60 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/test_retries.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | from pydantic import AfterValidator, BaseModel, Field
 3 | import pytest
 4 | import instructor
 5 | from itertools import product
 6 | import google.generativeai as genai
 7 | 
 8 | from .util import models, modes
 9 | 
10 | 
11 | def uppercase_validator(v):
12 |     if v.islower():
13 |         raise ValueError("Name must be ALL CAPS")
14 |     return v
15 | 
16 | 
17 | class UserDetail(BaseModel):
18 |     name: Annotated[str, AfterValidator(uppercase_validator)] = Field(
19 |         ..., description="The name of the user"
20 |     )
21 |     age: int
22 | 
23 | 
24 | @pytest.mark.parametrize("model, mode", product(models, modes))
25 | def test_upper_case(model, mode):
26 |     client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode)
27 |     response = client.chat.completions.create(
28 |         response_model=UserDetail,
29 |         messages=[
30 |             {"role": "user", "content": "Extract `jason is 12`"},
31 |         ],
32 |         max_retries=3,
33 |     )
34 |     assert response.name == "JASON"
35 | 
36 | 
37 | @pytest.mark.parametrize("model, mode", product(models, modes))
38 | def test_upper_case_tenacity(model, mode):
39 |     client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode)
40 |     from tenacity import Retrying, stop_after_attempt, wait_fixed
41 | 
42 |     retries = Retrying(
43 |         stop=stop_after_attempt(2),
44 |         wait=wait_fixed(1),
45 |     )
46 | 
47 |     response = client.chat.completions.create(
48 |         response_model=UserDetail,
49 |         messages=[
50 |             {"role": "user", "content": "Extract `jason is 12`"},
51 |         ],
52 |         max_retries=retries,
53 |     )
54 |     assert response.name == "JASON"
55 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/test_roles.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | import google.generativeai as genai
 3 | from pydantic import BaseModel
 4 | 
 5 | roles = [
 6 |     "system",
 7 |     "user",
 8 |     "assistant",
 9 | ]
10 | 
11 | 
12 | def test_roles():
13 |     client = instructor.from_gemini(
14 |         client=genai.GenerativeModel(
15 |             model_name="models/gemini-1.5-flash-latest",
16 |         ),
17 |         mode=instructor.Mode.GEMINI_JSON,
18 |     )
19 | 
20 |     class Description(BaseModel):
21 |         description: str
22 | 
23 |     for role in roles:
24 |         resp = client.create(
25 |             response_model=Description,
26 |             messages=[
27 |                 {
28 |                     "role": role,
29 |                     "content": "Describe what a sunset in the desert looks like.",
30 |                 },
31 |                 {
32 |                     "role": "user",
33 |                     "content": "Please adhere to the instructions",
34 |                 },
35 |             ],
36 |         )
37 | 
38 |         assert isinstance(resp, Description)
39 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/test_simple_types.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | import enum
 3 | 
 4 | import google.generativeai as genai
 5 | from typing import Literal
 6 | 
 7 | 
 8 | def test_literal():
 9 |     client = instructor.from_gemini(
10 |         genai.GenerativeModel("models/gemini-1.5-flash-latest")
11 |     )
12 | 
13 |     response = client.chat.completions.create(
14 |         response_model=Literal["1231", "212", "331"],
15 |         messages=[
16 |             {
17 |                 "role": "user",
18 |                 "content": "Produce a Random but correct response given the desired output",
19 |             },
20 |         ],
21 |     )
22 |     assert response in ["1231", "212", "331"]
23 | 
24 | 
25 | def test_enum():
26 |     class Options(enum.Enum):
27 |         A = "A"
28 |         B = "B"
29 |         C = "C"
30 | 
31 |     client = instructor.from_gemini(
32 |         genai.GenerativeModel("models/gemini-1.5-flash-latest")
33 |     )
34 | 
35 |     response = client.chat.completions.create(
36 |         response_model=Options,
37 |         messages=[
38 |             {
39 |                 "role": "user",
40 |                 "content": "Produce a Random but correct response given the desired output",
41 |             },
42 |         ],
43 |     )
44 |     assert response in [Options.A, Options.B, Options.C]
45 | 
46 | 
47 | def test_bool():
48 |     client = instructor.from_gemini(
49 |         genai.GenerativeModel("models/gemini-1.5-flash-latest")
50 |     )
51 | 
52 |     response = client.chat.completions.create(
53 |         response_model=bool,
54 |         messages=[
55 |             {
56 |                 "role": "user",
57 |                 "content": "Produce a Random but correct response given the desired output",
58 |             },
59 |         ],
60 |     )
61 |     assert type(response) == bool
62 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/test_stream.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | from collections.abc import Iterable
 3 | from pydantic import BaseModel
 4 | import pytest
 5 | import instructor
 6 | import google.generativeai as genai
 7 | from instructor.dsl.partial import Partial
 8 | 
 9 | from .util import models, modes
10 | 
11 | 
12 | class UserExtract(BaseModel):
13 |     name: str
14 |     age: int
15 | 
16 | 
17 | @pytest.mark.parametrize("model, mode, stream", product(models, modes, [True, False]))
18 | def test_iterable_model(model, mode, stream):
19 |     client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode)
20 |     model = client.chat.completions.create(
21 |         response_model=Iterable[UserExtract],
22 |         max_retries=2,
23 |         stream=stream,
24 |         messages=[
25 |             {"role": "user", "content": "Make two up people"},
26 |         ],
27 |     )
28 |     for m in model:
29 |         assert isinstance(m, UserExtract)
30 | 
31 | 
32 | @pytest.mark.parametrize("model,mode", product(models, modes))
33 | def test_partial_model(model, mode):
34 |     client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode)
35 |     model = client.chat.completions.create(
36 |         response_model=Partial[UserExtract],
37 |         max_retries=2,
38 |         stream=True,
39 |         messages=[
40 |             {"role": "user", "content": "{{ name }} is {{ age }} years old"},
41 |         ],
42 |         context={"name": "Jason", "age": 12},
43 |     )
44 |     final_model = None
45 |     for m in model:
46 |         assert isinstance(m, UserExtract)
47 |         final_model = m
48 | 
49 |     assert final_model.age == 12
50 |     assert final_model.name == "Jason"
51 | 


--------------------------------------------------------------------------------
/tests/llm/test_gemini/util.py:
--------------------------------------------------------------------------------
1 | import instructor
2 | 
3 | models: list[str] = ["models/gemini-1.5-flash-8b"]
4 | modes = [instructor.Mode.GEMINI_TOOLS, instructor.Mode.GEMINI_JSON]
5 | 


--------------------------------------------------------------------------------
/tests/llm/test_genai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_genai/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_genai/conftest.py:
--------------------------------------------------------------------------------
 1 | # conftest.py
 2 | from google.genai import Client
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.fixture(scope="function")
 7 | def client():
 8 |     yield Client()
 9 | 
10 | 
11 | @pytest.fixture(scope="function")
12 | def aclient():
13 |     yield Client()
14 | 


--------------------------------------------------------------------------------
/tests/llm/test_genai/test_retries.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | from pydantic import AfterValidator, BaseModel, Field
 3 | import pytest
 4 | import instructor
 5 | from itertools import product
 6 | from .util import models, modes
 7 | 
 8 | 
 9 | def uppercase_validator(v):
10 |     if v.islower():
11 |         raise ValueError("Name must be ALL CAPS")
12 |     return v
13 | 
14 | 
15 | class UserDetail(BaseModel):
16 |     name: Annotated[str, AfterValidator(uppercase_validator)] = Field(
17 |         ..., description="The name of the user"
18 |     )
19 |     age: int
20 | 
21 | 
22 | @pytest.mark.parametrize("model, mode", product(models, modes))
23 | def test_upper_case(model, mode, client):
24 |     client = instructor.from_genai(client, mode=mode)
25 |     response = client.chat.completions.create(
26 |         model=model,
27 |         response_model=UserDetail,
28 |         messages=[
29 |             {"role": "user", "content": "Extract `jason is 12`"},
30 |         ],
31 |         max_retries=3,
32 |     )
33 |     assert response.name == "JASON"
34 | 
35 | 
36 | @pytest.mark.parametrize("model, mode", product(models, modes))
37 | def test_upper_case_tenacity(model, mode, client):
38 |     client = instructor.from_genai(client, mode=mode)
39 |     from tenacity import Retrying, stop_after_attempt, wait_fixed
40 | 
41 |     retries = Retrying(
42 |         stop=stop_after_attempt(2),
43 |         wait=wait_fixed(1),
44 |     )
45 | 
46 |     response = client.chat.completions.create(
47 |         model=model,
48 |         response_model=UserDetail,
49 |         messages=[
50 |             {"role": "user", "content": "Extract `jason is 12`"},
51 |         ],
52 |         max_retries=retries,
53 |     )
54 |     assert response.name == "JASON"
55 | 


--------------------------------------------------------------------------------
/tests/llm/test_genai/test_simple.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pydantic import BaseModel
 3 | import instructor
 4 | from .util import models, modes
 5 | 
 6 | 
 7 | class User(BaseModel):
 8 |     name: str
 9 |     age: int
10 | 
11 | 
12 | class Users(BaseModel):
13 |     users: list[User]
14 | 
15 | 
16 | @pytest.mark.parametrize("model", models)
17 | @pytest.mark.parametrize("mode", modes)
18 | def test_simple_extraction(client, model, mode):
19 |     client = instructor.from_genai(client, mode=mode)
20 |     response = client.chat.completions.create(
21 |         model=model,
22 |         messages=[
23 |             {
24 |                 "role": "user",
25 |                 "content": "Ivan is 28 years old",
26 |             },
27 |         ],
28 |         response_model=Users,
29 |     )
30 |     assert isinstance(response, Users)
31 |     assert len(response.users) > 0
32 |     assert response.users[0].name == "Ivan"
33 |     assert response.users[0].age == 28
34 | 
35 | 
36 | @pytest.mark.asyncio
37 | @pytest.mark.parametrize("model", models)
38 | @pytest.mark.parametrize("mode", modes)
39 | async def test_simple_extraction_async(aclient, model, mode):
40 |     aclient = instructor.from_genai(aclient, mode=mode, use_async=True)
41 |     response = await aclient.chat.completions.create(
42 |         model=model,
43 |         messages=[
44 |             {
45 |                 "role": "user",
46 |                 "content": "Ivan is 28 years old",
47 |             },
48 |         ],
49 |         response_model=Users,
50 |     )
51 |     assert isinstance(response, Users)
52 |     assert len(response.users) > 0
53 |     assert response.users[0].name == "Ivan"
54 |     assert response.users[0].age == 28
55 | 


--------------------------------------------------------------------------------
/tests/llm/test_genai/util.py:
--------------------------------------------------------------------------------
1 | import instructor
2 | 
3 | models = ["gemini-2.0-flash"]
4 | modes = [instructor.Mode.GENAI_STRUCTURED_OUTPUTS, instructor.Mode.GENAI_TOOLS]
5 | 


--------------------------------------------------------------------------------
/tests/llm/test_litellm.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | from litellm import acompletion, completion
 3 | 
 4 | 
 5 | def test_litellm_create():
 6 |     client = instructor.from_litellm(completion)
 7 | 
 8 |     assert isinstance(client, instructor.Instructor)
 9 | 
10 | 
11 | def test_async_litellm_create():
12 |     client = instructor.from_litellm(acompletion)
13 | 
14 |     assert isinstance(client, instructor.AsyncInstructor)
15 | 


--------------------------------------------------------------------------------
/tests/llm/test_mistral/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_mistral/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_mistral/conftest.py:
--------------------------------------------------------------------------------
 1 | # conftest.py
 2 | import pytest
 3 | import os
 4 | from mistralai import Mistral
 5 | 
 6 | 
 7 | @pytest.fixture(scope="function")
 8 | def client():
 9 |     yield Mistral(api_key=os.environ["MISTRAL_API_KEY"])
10 | 
11 | 
12 | @pytest.fixture(scope="function")
13 | def aclient():
14 |     yield Mistral(api_key=os.environ["MISTRAL_API_KEY"])
15 | 


--------------------------------------------------------------------------------
/tests/llm/test_mistral/test_multimodal.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pydantic import BaseModel
 3 | import instructor
 4 | from .util import modes, models
 5 | 
 6 | pdf_url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/invoice.pdf"
 7 | 
 8 | 
 9 | class Invoice(BaseModel):
10 |     total: float
11 |     items: list[str]
12 | 
13 | 
14 | @pytest.mark.parametrize("mode", modes)
15 | @pytest.mark.parametrize("model", models)
16 | def test_mistral_retry_validation(client, model, mode):
17 |     client = instructor.from_mistral(client, mode=mode)
18 |     response = client.chat.completions.create(
19 |         model=model,
20 |         messages=[
21 |             {
22 |                 "role": "user",
23 |                 "content": [
24 |                     "Extract information from the invoice.",
25 |                     instructor.multimodal.PDF.from_url(pdf_url),
26 |                 ],
27 |             }
28 |         ],
29 |         response_model=Invoice,
30 |     )
31 |     assert response.total == 220
32 |     assert len(response.items) == 2
33 | 
34 | 
35 | @pytest.mark.parametrize("mode", modes)
36 | @pytest.mark.parametrize("model", models)
37 | @pytest.mark.asyncio
38 | async def test_mistral_retry_validation_async(client, model, mode):
39 |     client = instructor.from_mistral(client, mode=mode, use_async=True)
40 |     response = await client.chat.completions.create(
41 |         model=model,
42 |         messages=[
43 |             {
44 |                 "role": "user",
45 |                 "content": [
46 |                     "Extract information from the invoice.",
47 |                     instructor.multimodal.PDF.from_url(pdf_url),
48 |                 ],
49 |             }
50 |         ],
51 |         response_model=Invoice,
52 |     )
53 |     assert response.total == 220
54 |     assert len(response.items) == 2
55 | 


--------------------------------------------------------------------------------
/tests/llm/test_mistral/test_retries.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pydantic import BaseModel, field_validator
 3 | import instructor
 4 | from .util import modes, models
 5 | 
 6 | 
 7 | class User(BaseModel):
 8 |     name: str
 9 |     age: int
10 | 
11 |     @field_validator("age")
12 |     def validate_age(cls, v):
13 |         if v > 0:
14 |             raise ValueError(
15 |                 "Age must be expressed as a negative number (Eg. 25 is -25 )"
16 |             )
17 |         return v
18 | 
19 | 
20 | @pytest.mark.parametrize("mode", modes)
21 | @pytest.mark.parametrize("model", models)
22 | def test_mistral_retry_validation(client, model, mode):
23 |     patched_client = instructor.from_mistral(client, mode=mode)
24 | 
25 |     # Test extracting structured data with validation that should trigger retry
26 |     response = patched_client.chat.completions.create(
27 |         model=model,
28 |         messages=[{"role": "user", "content": "Ivan is 25 years old"}],
29 |         response_model=User,
30 |     )
31 | 
32 |     # Validate response has correct negative age after retry
33 |     assert isinstance(response, User)
34 |     assert response.name == "Ivan"
35 |     assert response.age == -25
36 | 
37 | 
38 | @pytest.mark.asyncio
39 | @pytest.mark.parametrize("mode", modes)
40 | @pytest.mark.parametrize("model", models)
41 | async def test_mistral_retry_validation_async(aclient, model, mode):
42 |     patched_client = instructor.from_mistral(aclient, mode=mode, use_async=True)
43 | 
44 |     # Test extracting structured data with validation that should trigger retry
45 |     response = await patched_client.chat.completions.create(
46 |         model=model,
47 |         messages=[{"role": "user", "content": "Jack is 28 years old"}],
48 |         response_model=User,
49 |     )
50 | 
51 |     # Validate response has correct negative age after retry
52 |     assert isinstance(response, User)
53 |     assert response.name == "Jack"
54 |     assert response.age == -28
55 | 


--------------------------------------------------------------------------------
/tests/llm/test_mistral/util.py:
--------------------------------------------------------------------------------
1 | import instructor
2 | 
3 | models: list[str] = ["ministral-8b-latest"]
4 | modes = [instructor.Mode.MISTRAL_STRUCTURED_OUTPUTS, instructor.Mode.MISTRAL_TOOLS]
5 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_openai/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_openai/conftest.py:
--------------------------------------------------------------------------------
 1 | # conftest.py
 2 | from openai import AsyncOpenAI, OpenAI
 3 | import pytest
 4 | import os
 5 | 
 6 | try:
 7 |     import braintrust
 8 | 
 9 |     wrap_openai = braintrust.wrap_openai
10 | except ImportError:
11 | 
12 |     def wrap_openai(x):
13 |         return x
14 | 
15 | 
16 | @pytest.fixture(scope="function")
17 | def client():
18 |     if os.environ.get("BRAINTRUST_API_KEY"):
19 |         yield wrap_openai(
20 |             OpenAI(
21 |                 api_key=os.environ["BRAINTRUST_API_KEY"],
22 |                 base_url="https://braintrustproxy.com/v1",
23 |             )
24 |         )
25 |     else:
26 |         yield OpenAI()
27 | 
28 | 
29 | @pytest.fixture(scope="function")
30 | def aclient():
31 |     if os.environ.get("BRAINTRUST_API_KEY"):
32 |         yield wrap_openai(
33 |             AsyncOpenAI(
34 |                 api_key=os.environ["BRAINTRUST_API_KEY"],
35 |                 base_url="https://braintrustproxy.com/v1",
36 |             )
37 |         )
38 |     else:
39 |         yield AsyncOpenAI()
40 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/docs/test_concepts.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pytest_examples import find_examples, CodeExample, EvalExample
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("example", find_examples("docs/concepts"), ids=str)
 6 | def test_format_concepts(example: CodeExample, eval_example: EvalExample):
 7 |     if eval_example.update_examples:
 8 |         eval_example.format(example)
 9 |         eval_example.run_print_update(example)
10 |     else:
11 |         eval_example.lint(example)
12 |         eval_example.run(example)
13 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/docs/test_docs.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pytest_examples import find_examples, CodeExample, EvalExample
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("example", find_examples("README.md"), ids=str)
 6 | def test_readme(example: CodeExample, eval_example: EvalExample):
 7 |     if eval_example.update_examples:
 8 |         eval_example.format(example)
 9 |     else:
10 |         eval_example.lint(example)
11 | 
12 | 
13 | @pytest.mark.parametrize("example", find_examples("docs/index.md"), ids=str)
14 | def test_index(example: CodeExample, eval_example: EvalExample):
15 |     if eval_example.update_examples:
16 |         eval_example.format(example)
17 |     else:
18 |         eval_example.lint(example)
19 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/docs/test_examples.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pytest_examples import find_examples, CodeExample, EvalExample
 3 | import glob
 4 | import os
 5 | 
 6 | exclusions = ["ollama.md", "watsonx.md", "local_classification.md"]
 7 | 
 8 | markdown_files = [
 9 |     file
10 |     for file in glob.glob("docs/examples/*.md")
11 |     if os.path.basename(file) not in exclusions
12 | ]
13 | 
14 | code_examples = []
15 | 
16 | for markdown_file in markdown_files:
17 |     code_examples.extend(find_examples(markdown_file))
18 | 
19 | 
20 | @pytest.mark.parametrize("example", code_examples, ids=str)
21 | def test_index(example: CodeExample, eval_example: EvalExample):
22 |     if eval_example.update_examples:
23 |         eval_example.format(example)
24 |         eval_example.run_print_update(example)
25 |     else:
26 |         eval_example.lint(example)
27 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/docs/test_hub.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pytest_examples import CodeExample, EvalExample
 3 | 
 4 | 
 5 | @pytest.mark.skip(reason="Hub functionality is being removed")
 6 | def test_format_blog(example: CodeExample, eval_example: EvalExample) -> None:
 7 |     """This test is being skipped as the hub functionality is being removed."""
 8 |     excluded_sources: list[str] = [
 9 |         "mistral",
10 |         "ollama",
11 |         "llama_cpp",
12 |         "groq",
13 |         "youtube",
14 |         "contact",
15 |         "langsmith",
16 |     ]  # sources that are not supported in testing
17 |     if any(source in example.source for source in excluded_sources):
18 |         return
19 | 
20 |     if eval_example.update_examples:
21 |         eval_example.format(example)
22 |         eval_example.run_print_update(example)
23 |     else:
24 |         eval_example.lint(example)
25 |         eval_example.run(example)
26 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/docs/test_mkdocs.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import pytest
 3 | 
 4 | 
 5 | # Note the use of `str`, makes for pretty output
 6 | @pytest.mark.parametrize(
 7 |     "fpath", pathlib.Path("docs/examples").glob("**/*.md"), ids=str
 8 | )
 9 | @pytest.mark.skip(reason="This test is not yet implemented")
10 | def test_files_good(fpath):
11 |     from mktestdocs import check_md_file
12 | 
13 |     check_md_file(fpath=fpath, memory=True)
14 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/docs/test_posts.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pytest_examples import find_examples, CodeExample, EvalExample
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("example", find_examples("docs/blog/posts"), ids=str)
 6 | def test_index(example: CodeExample, eval_example: EvalExample):
 7 |     if eval_example.update_examples:
 8 |         eval_example.format(example)
 9 |         eval_example.run_print_update(example)
10 |     else:
11 |         eval_example.lint(example)
12 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/docs/test_prompt_tips.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pytest_examples import find_examples, CodeExample, EvalExample
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("example", find_examples("docs/prompting"), ids=str)
 6 | @pytest.mark.skip(reason="Skipping this for now")
 7 | def test_format_concepts(example: CodeExample, eval_example: EvalExample):
 8 |     if eval_example.update_examples:
 9 |         eval_example.format(example)
10 |         # eval_example.run_print_update(example)
11 |     else:
12 |         eval_example.lint(example)
13 |         # eval_example.run(example)
14 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_openai/evals/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_openai/evals/readme.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute: Writing and Running Evaluation Tests
 2 | 
 3 | We welcome contributors to expand our suite of evaluation tests for data extraction. This guide provides instructions on creating tests with `pytest`, `pydantic`, and other tools, focusing on broad coverage and failure modalities understanding.
 4 | 
 5 | ## Define Test Scenarios
 6 | 
 7 | Identify data extraction scenarios relevant to you. Create test cases with inputs and expected outputs.
 8 | 
 9 | Reference the `test_extract_users.py` which contains a test case for extracting users, using all models and all modes. The test case is parameterized with the model and mode, and the test function is parameterized with the input and expected output.
10 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/evals/test_extract_users.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from itertools import product
 3 | from pydantic import BaseModel
 4 | import instructor
 5 | from instructor.function_calls import Mode
 6 | from ..util import models, modes
 7 | 
 8 | 
 9 | class UserDetails(BaseModel):
10 |     name: str
11 |     age: int
12 | 
13 | 
14 | # Lists for models, test data, and modes
15 | test_data = [
16 |     ("Jason is 10", "Jason", 10),
17 |     ("Alice is 25", "Alice", 25),
18 |     ("Bob is 35", "Bob", 35),
19 | ]
20 | 
21 | 
22 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
23 | def test_extract(model, data, mode, client):
24 |     sample_data, expected_name, expected_age = data
25 | 
26 |     if (mode, model) in {
27 |         (Mode.JSON, "gpt-3.5-turbo"),
28 |         (Mode.JSON, "gpt-4"),
29 |     }:
30 |         pytest.skip(f"{mode} mode is not supported for {model}, skipping test")
31 | 
32 |     # Setting up the client with the instructor patch
33 |     client = instructor.from_openai(client, mode=mode)
34 | 
35 |     # Calling the extract function with the provided model, sample data, and mode
36 |     response = client.chat.completions.create(
37 |         model=model,
38 |         response_model=UserDetails,
39 |         messages=[
40 |             {"role": "user", "content": sample_data},
41 |         ],
42 |     )
43 | 
44 |     # Assertions
45 |     assert (
46 |         response.name == expected_name
47 |     ), f"Expected name {expected_name}, got {response.name}"
48 |     assert (
49 |         response.age == expected_age
50 |     ), f"Expected age {expected_age}, got {response.age}"
51 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/evals/test_sentiment_analysis.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | from itertools import product
 3 | from pydantic import BaseModel
 4 | import pytest
 5 | import instructor
 6 | from instructor.function_calls import Mode
 7 | from ..util import models, modes
 8 | 
 9 | 
10 | class Sentiment(str, enum.Enum):
11 |     POSITIVE = "positive"
12 |     NEGATIVE = "negative"
13 |     NEUTRAL = "neutral"
14 | 
15 | 
16 | class SentimentAnalysis(BaseModel):
17 |     sentiment: Sentiment
18 | 
19 | 
20 | test_data = [
21 |     (
22 |         "I absolutely love this product! It has exceeded all my expectations.",
23 |         Sentiment.POSITIVE,
24 |     ),
25 |     (
26 |         "The service was terrible. I will never use this company again.",
27 |         Sentiment.NEGATIVE,
28 |     ),
29 |     (
30 |         "The movie was okay. It had some good moments but overall it was average.",
31 |         Sentiment.NEUTRAL,
32 |     ),
33 | ]
34 | 
35 | 
36 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
37 | def test_sentiment_analysis(model, data, mode, client):
38 |     sample_data, expected_sentiment = data
39 | 
40 |     if (mode, model) in {
41 |         (Mode.JSON, "gpt-3.5-turbo"),
42 |         (Mode.JSON, "gpt-4"),
43 |     }:
44 |         pytest.skip(f"{mode} mode is not supported for {model}, skipping test")
45 | 
46 |     client = instructor.from_openai(client, mode=mode)
47 | 
48 |     response = client.chat.completions.create(
49 |         model=model,
50 |         response_model=SentimentAnalysis,
51 |         messages=[
52 |             {
53 |                 "role": "system",
54 |                 "content": "You are a sentiment analysis model. Analyze the sentiment of the given text and provide the sentiment (positive, negative, or neutral).",
55 |             },
56 |             {"role": "user", "content": sample_data},
57 |         ],
58 |     )
59 | 
60 |     assert response.sentiment == expected_sentiment
61 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/test_attr.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | import openai
 3 | import pytest
 4 | 
 5 | 
 6 | def test_has_embedding():
 7 |     oai = openai.OpenAI()
 8 |     client = instructor.from_openai(oai)
 9 | 
10 |     embedding = client.embeddings.create(
11 |         input="Hello world", model="text-embedding-3-small"
12 |     )
13 |     assert embedding is not None, "The 'embeddings' attribute is None."
14 | 
15 | 
16 | @pytest.mark.asyncio
17 | async def test_has_embedding_async():
18 |     oai = openai.AsyncOpenAI()
19 |     client = instructor.from_openai(oai)
20 | 
21 |     # Check if the 'embeddings' attribute can be accessed through the client
22 |     embedding = await client.embeddings.create(
23 |         input="Hello world", model="text-embedding-3-small"
24 |     )
25 |     assert embedding is not None, "The 'embeddings' attribute is None."
26 | 


--------------------------------------------------------------------------------
/tests/llm/test_openai/util.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | 
 3 | models = ["gpt-4o-mini"]
 4 | modes = [
 5 |     instructor.Mode.TOOLS,
 6 |     instructor.Mode.TOOLS_STRICT,
 7 |     instructor.Mode.RESPONSES_TOOLS,
 8 |     instructor.Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS,
 9 | ]
10 | 


--------------------------------------------------------------------------------
/tests/llm/test_perplexity/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_perplexity/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_perplexity/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | from openai import OpenAI
 5 | 
 6 | 
 7 | @pytest.fixture(scope="session")
 8 | def client():
 9 |     if os.environ.get("PERPLEXITY_API_KEY"):
10 |         yield OpenAI(
11 |             api_key=os.environ["PERPLEXITY_API_KEY"],
12 |             base_url="https://api.perplexity.ai",
13 |         )
14 | 


--------------------------------------------------------------------------------
/tests/llm/test_perplexity/util.py:
--------------------------------------------------------------------------------
1 | from instructor import Mode
2 | 
3 | models = ["sonar", "sonar-pro"]
4 | modes = [Mode.PERPLEXITY_JSON]
5 | 


--------------------------------------------------------------------------------
/tests/llm/test_vertexai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_vertexai/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_vertexai/test_deprecated_async.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from unittest.mock import patch, MagicMock
 3 | from pydantic import BaseModel
 4 | from instructor.client_vertexai import from_vertexai
 5 | from instructor.exceptions import ConfigurationError
 6 | 
 7 | class User(BaseModel):
 8 |     name: str
 9 |     age: int
10 | 
11 | @patch('instructor.client_vertexai.isinstance', return_value=True)
12 | def test_deprecated_async_warning(_):
13 |     """Test that using _async parameter raises a deprecation warning."""
14 |     mock_model = MagicMock()
15 |     mock_model.generate_content = MagicMock()
16 |     mock_model.generate_content_async = MagicMock()
17 |     
18 |     with pytest.warns(DeprecationWarning, match="'_async' is deprecated. Use 'use_async' instead."):
19 |         client = from_vertexai(
20 |             mock_model, 
21 |             _async=True
22 |         )
23 | 
24 | @patch('instructor.client_vertexai.isinstance', return_value=True)
25 | def test_both_async_params_error(_):
26 |     """Test that providing both _async and use_async raises an error."""
27 |     mock_model = MagicMock()
28 |     mock_model.generate_content = MagicMock()
29 |     mock_model.generate_content_async = MagicMock()
30 |     
31 |     with pytest.raises(ConfigurationError, match="Cannot provide both '_async' and 'use_async'. Use 'use_async' instead."):
32 |         client = from_vertexai(
33 |             mock_model, 
34 |             _async=True,
35 |             use_async=True
36 |         )
37 | 


--------------------------------------------------------------------------------
/tests/llm/test_vertexai/test_format.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | from pydantic import BaseModel
 3 | from .util import models, modes
 4 | import pytest
 5 | from itertools import product
 6 | import vertexai.generative_models as gm
 7 | 
 8 | 
 9 | class User(BaseModel):
10 |     name: str
11 |     age: int
12 | 
13 | 
14 | @pytest.mark.parametrize("model, mode, is_list", product(models, modes, [True, False]))
15 | def test_format_string(model, mode, is_list):
16 |     client = instructor.from_vertexai(
17 |         gm.GenerativeModel(model),
18 |         mode=mode,
19 |     )
20 | 
21 |     content = (
22 |         [gm.Part.from_text("Extract {{name}} is {{age}} years old.")]
23 |         if is_list
24 |         else "Extract {{name}} is {{age}} years old."
25 |     )
26 | 
27 |     # note that client.chat.completions.create will also work
28 |     resp = client.messages.create(
29 |         messages=[
30 |             {
31 |                 "role": "user",
32 |                 "content": content,
33 |             }
34 |         ],
35 |         response_model=User,
36 |         context={"name": "Jason", "age": 25},
37 |     )
38 | 
39 |     assert isinstance(resp, User)
40 |     assert resp.name == "Jason"
41 |     assert resp.age == 25
42 | 


--------------------------------------------------------------------------------
/tests/llm/test_vertexai/test_message_parser.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import vertexai.generative_models as gm
 3 | from instructor.client_vertexai import vertexai_message_parser
 4 | 
 5 | 
 6 | def test_vertexai_message_parser_string_content():
 7 |     message = {"role": "user", "content": "Hello, world!"}
 8 |     result = vertexai_message_parser(message)
 9 | 
10 |     assert isinstance(result, gm.Content)
11 |     assert result.role == "user"
12 |     assert len(result.parts) == 1
13 |     assert isinstance(result.parts[0], gm.Part)
14 |     assert result.parts[0].text == "Hello, world!"
15 | 
16 | 
17 | def test_vertexai_message_parser_list_content():
18 |     message = {
19 |         "role": "user",
20 |         "content": [
21 |             "Hello, ",
22 |             gm.Part.from_text("world!"),
23 |             gm.Part.from_text(" How are you?"),
24 |         ],
25 |     }
26 |     result = vertexai_message_parser(message)
27 | 
28 |     assert isinstance(result, gm.Content)
29 |     assert result.role == "user"
30 |     assert len(result.parts) == 3
31 |     assert isinstance(result.parts[0], gm.Part)
32 |     assert isinstance(result.parts[1], gm.Part)
33 |     assert isinstance(result.parts[2], gm.Part)
34 |     assert result.parts[0].text == "Hello, "
35 |     assert result.parts[1].text == "world!"
36 |     assert result.parts[2].text == " How are you?"
37 | 
38 | 
39 | def test_vertexai_message_parser_invalid_content():
40 |     message = {"role": "user", "content": 123}  # Invalid content type
41 | 
42 |     with pytest.raises(ValueError, match="Unsupported message content type"):
43 |         vertexai_message_parser(message)
44 | 
45 | 
46 | def test_vertexai_message_parser_invalid_list_item():
47 |     message = {"role": "user", "content": ["Hello", 123, gm.Part.from_text("world!")]}
48 | 
49 |     with pytest.raises(ValueError, match="Unsupported content type in list"):
50 |         vertexai_message_parser(message)
51 | 


--------------------------------------------------------------------------------
/tests/llm/test_vertexai/test_retries.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | from typing import Annotated, cast
 3 | from pydantic import AfterValidator, BaseModel, Field
 4 | import pytest
 5 | import instructor
 6 | import vertexai.generative_models as gm  # type: ignore
 7 | 
 8 | from .util import models, modes
 9 | 
10 | 
11 | def uppercase_validator(v: str):
12 |     if v.islower():
13 |         raise ValueError("Name must be ALL CAPS")
14 |     return v
15 | 
16 | 
17 | class UserDetail(BaseModel):
18 |     name: Annotated[str, AfterValidator(uppercase_validator)] = Field(
19 |         ..., description="The name of the user"
20 |     )
21 |     age: int
22 | 
23 | 
24 | @pytest.mark.parametrize("model, mode", product(models, modes))
25 | def test_upper_case(model, mode):
26 |     client = instructor.from_vertexai(gm.GenerativeModel(model), mode)
27 |     response = client.create(
28 |         response_model=UserDetail,
29 |         messages=[
30 |             {"role": "user", "content": "Extract `jason is 12`"},
31 |         ],
32 |         max_retries=3,
33 |     )
34 |     assert response.name == "JASON"
35 | 
36 | 
37 | @pytest.mark.parametrize("model, mode", product(models, modes))
38 | def test_upper_case_tenacity(model, mode):
39 |     client = instructor.from_vertexai(gm.GenerativeModel(model), mode)
40 |     from tenacity import Retrying, stop_after_attempt, wait_fixed
41 | 
42 |     retries = Retrying(
43 |         stop=stop_after_attempt(2),
44 |         wait=wait_fixed(1),
45 |     )
46 | 
47 |     retries = cast(int, retries)
48 | 
49 |     response = client.create(
50 |         response_model=UserDetail,
51 |         messages=[
52 |             {"role": "user", "content": "Extract `jason is 12`"},
53 |         ],
54 |         max_retries=retries,
55 |     )
56 |     assert response.name == "JASON"
57 | 


--------------------------------------------------------------------------------
/tests/llm/test_vertexai/test_simple_types.py:
--------------------------------------------------------------------------------
 1 | import instructor
 2 | import pytest
 3 | import enum
 4 | import vertexai.generative_models as gm  # type: ignore
 5 | from itertools import product
 6 | from typing import Literal
 7 | 
 8 | from .util import models, modes
 9 | 
10 | 
11 | @pytest.mark.parametrize("model, mode", product(models, modes))
12 | def test_literal(model, mode):
13 |     client = instructor.from_vertexai(gm.GenerativeModel(model), mode)
14 | 
15 |     response = client.create(
16 |         response_model=Literal["1231", "212", "331"],
17 |         messages=[
18 |             {
19 |                 "role": "user",
20 |                 "content": "Produce a Random but correct response given the desired output",
21 |             },
22 |         ],
23 |     )
24 |     assert response in ["1231", "212", "331"]
25 | 
26 | 
27 | @pytest.mark.parametrize("model, mode", product(models, modes))
28 | def test_enum(model, mode):
29 |     class Options(enum.Enum):
30 |         A = "A"
31 |         B = "B"
32 |         C = "C"
33 | 
34 |     client = instructor.from_vertexai(gm.GenerativeModel(model), mode)
35 | 
36 |     response = client.create(
37 |         response_model=Options,
38 |         messages=[
39 |             {
40 |                 "role": "user",
41 |                 "content": "Produce a Random but correct response given the desired output",
42 |             },
43 |         ],
44 |     )
45 |     assert response in [Options.A, Options.B, Options.C]
46 | 
47 | 
48 | @pytest.mark.parametrize("model, mode", product(models, modes))
49 | def test_bool(model, mode):
50 |     client = instructor.from_vertexai(gm.GenerativeModel(model), mode)
51 | 
52 |     response = client.create(
53 |         response_model=bool,
54 |         messages=[
55 |             {
56 |                 "role": "user",
57 |                 "content": "Produce a Random but correct response given the desired output",
58 |             },
59 |         ],
60 |     )
61 |     assert type(response) == bool
62 | 


--------------------------------------------------------------------------------
/tests/llm/test_vertexai/util.py:
--------------------------------------------------------------------------------
1 | import instructor
2 | 
3 | models = ["gemini-2.0-flash"]
4 | modes = [instructor.Mode.VERTEXAI_TOOLS, instructor.Mode.VERTEXAI_JSON]
5 | 


--------------------------------------------------------------------------------
/tests/llm/test_writer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_writer/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_writer/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | 
 4 | 
 5 | @pytest.fixture(scope="session", autouse=True)
 6 | def configure_writer():
 7 |     api_key = os.getenv("WRITER_API_KEY")
 8 |     if not api_key:
 9 |         pytest.skip("WRITER_API_KEY environment variable not set")
10 | 


--------------------------------------------------------------------------------
/tests/llm/test_writer/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_writer/evals/__init__.py


--------------------------------------------------------------------------------
/tests/llm/test_writer/evals/test_extract_users.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from itertools import product
 3 | from pydantic import BaseModel
 4 | from writerai import Writer
 5 | import instructor
 6 | from ..util import models, modes
 7 | 
 8 | 
 9 | class UserDetails(BaseModel):
10 |     first_name: str
11 |     age: int
12 | 
13 | 
14 | test_data = [
15 |     ("Jason is 10", "Jason", 10),
16 |     ("Alice is 25", "Alice", 25),
17 |     ("Bob is 35", "Bob", 35),
18 | ]
19 | 
20 | 
21 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
22 | def test_writer_extract(
23 |     model: str, data: list[tuple[str, str, int]], mode: instructor.Mode
24 | ):
25 |     client = instructor.from_writer(client=Writer(), mode=mode)
26 | 
27 |     sample_data, expected_name, expected_age = data
28 | 
29 |     response = client.chat.completions.create(
30 |         model=model,
31 |         response_model=UserDetails,
32 |         messages=[
33 |             {"role": "user", "content": sample_data},
34 |         ],
35 |     )
36 | 
37 |     assert (
38 |         response.first_name == expected_name
39 |     ), f"Expected name {expected_name}, got {response.first_name}"
40 |     assert (
41 |         response.age == expected_age
42 |     ), f"Expected age {expected_age}, got {response.age}"
43 | 


--------------------------------------------------------------------------------
/tests/llm/test_writer/evals/test_sentiment_analysis.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | from itertools import product
 3 | 
 4 | from pydantic import BaseModel
 5 | from writerai import Writer
 6 | import pytest
 7 | import instructor
 8 | from ..util import models, modes
 9 | 
10 | 
11 | class Sentiment(str, enum.Enum):
12 |     POSITIVE = "positive"
13 |     NEGATIVE = "negative"
14 |     NEUTRAL = "neutral"
15 | 
16 | 
17 | class SentimentAnalysis(BaseModel):
18 |     sentiment: Sentiment
19 | 
20 | 
21 | test_data = [
22 |     (
23 |         "I absolutely love this product! It has exceeded all my expectations.",
24 |         Sentiment.POSITIVE,
25 |     ),
26 |     (
27 |         "The service was terrible. I will never use this company again.",
28 |         Sentiment.NEGATIVE,
29 |     ),
30 |     (
31 |         "The movie was okay. It had some good moments but overall it was average.",
32 |         Sentiment.NEUTRAL,
33 |     ),
34 | ]
35 | 
36 | 
37 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes))
38 | def test_writer_sentiment_analysis(
39 |     model: str, data: list[tuple[str, Sentiment]], mode: instructor.Mode
40 | ):
41 |     client = instructor.from_writer(client=Writer(), mode=mode)
42 | 
43 |     sample_data, expected_sentiment = data
44 | 
45 |     response = client.chat.completions.create(
46 |         model=model,
47 |         response_model=SentimentAnalysis,
48 |         messages=[
49 |             {
50 |                 "role": "system",
51 |                 "content": "You are a sentiment analysis model. Analyze the sentiment of the given text and provide the sentiment (positive, negative, or neutral).",
52 |             },
53 |             {"role": "user", "content": sample_data},
54 |         ],
55 |     )
56 | 
57 |     assert response.sentiment == expected_sentiment
58 | 


--------------------------------------------------------------------------------
/tests/llm/test_writer/util.py:
--------------------------------------------------------------------------------
1 | import instructor
2 | 
3 | models: list[str] = ["palmyra-x4", "palmyra-x5"]
4 | modes = [instructor.Mode.WRITER_TOOLS, instructor.Mode.WRITER_JSON]
5 | 


--------------------------------------------------------------------------------
/tests/test_dynamic_model_creation.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, create_model, Field
 2 | from instructor import openai_schema
 3 | 
 4 | 
 5 | def test_dynamic_model_creation_with_field_description():
 6 |     """
 7 |     Test that dynamic model creation with Field(description) works correctly.
 8 |     This verifies the example in the documentation at docs/concepts/models.md.
 9 |     """
10 |     types = {
11 |         'string': str,
12 |         'integer': int,
13 |         'email': str,
14 |     }
15 |     
16 |     mock_cursor = [
17 |         ('name', 'string', 'The name of the user.'),
18 |         ('age', 'integer', 'The age of the user.'),
19 |         ('email', 'email', 'The email of the user.'),
20 |     ]
21 |     
22 |     DynamicModel = create_model(
23 |         'User',
24 |         **{
25 |             property_name: (types[property_type], Field(description=description))
26 |             for property_name, property_type, description in mock_cursor
27 |         },
28 |         __base__=BaseModel,
29 |     )
30 |     
31 |     schema = DynamicModel.model_json_schema()
32 |     
33 |     assert schema['properties']['name']['description'] == 'The name of the user.'
34 |     assert schema['properties']['age']['description'] == 'The age of the user.'
35 |     assert schema['properties']['email']['description'] == 'The email of the user.'
36 |     
37 |     assert 'default' not in schema['properties']['name']
38 |     assert 'default' not in schema['properties']['age']
39 |     assert 'default' not in schema['properties']['email']
40 |     
41 |     OpenAISchemaModel = openai_schema(DynamicModel)
42 |     openai_schema_json = OpenAISchemaModel.model_json_schema()
43 |     
44 |     assert openai_schema_json['properties']['name']['description'] == 'The name of the user.'
45 |     assert openai_schema_json['properties']['age']['description'] == 'The age of the user.'
46 |     assert openai_schema_json['properties']['email']['description'] == 'The email of the user.'
47 | 


--------------------------------------------------------------------------------
/tests/test_fizzbuzz_fix.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import sys
 3 | from instructor.dsl.simple_type import is_simple_type
 4 | from instructor.process_response import prepare_response_model
 5 | 
 6 | 
 7 | class TestFizzbuzzFix(unittest.TestCase):
 8 |     def test_fizzbuzz_response_model(self):
 9 |         if sys.version_info < (3, 10):
10 |             self.skipTest("Union pipe syntax is only available in Python 3.10+")
11 |         """Test that list[int | str] works correctly as a response model."""
12 |         # This is the type used in the fizzbuzz example
13 |         response_model = list[int | str]
14 | 
15 |         # First check that it's correctly identified as a simple type
16 |         self.assertTrue(
17 |             is_simple_type(response_model),
18 |             f"list[int | str] should be a simple type in Python {sys.version_info.major}.{sys.version_info.minor}",
19 |         )
20 | 
21 |         # Then check that prepare_response_model handles it correctly
22 |         prepared_model = prepare_response_model(response_model)
23 |         self.assertIsNotNone(
24 |             prepared_model,
25 |             "prepare_response_model should not return None for list[int | str]",
26 |         )
27 | 


--------------------------------------------------------------------------------
/tests/test_multitask.py:
--------------------------------------------------------------------------------
 1 | from instructor import OpenAISchema
 2 | from instructor.dsl import IterableModel
 3 | 
 4 | 
 5 | def test_multi_task():
 6 |     class Search(OpenAISchema):
 7 |         """This is the search docstring"""
 8 | 
 9 |         id: int
10 |         query: str
11 | 
12 |     IterableSearch = IterableModel(Search)
13 |     assert IterableSearch.openai_schema["name"] == "IterableSearch"
14 |     assert (
15 |         IterableSearch.openai_schema["description"]
16 |         == "Correct segmentation of `Search` tasks"
17 |     )
18 | 


--------------------------------------------------------------------------------
/tests/test_patch.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | from openai import AsyncOpenAI, OpenAI
 4 | 
 5 | import instructor
 6 | from instructor.utils import is_async
 7 | 
 8 | 
 9 | def test_patch_completes_successfully():
10 |     instructor.patch(OpenAI())
11 | 
12 | 
13 | def test_apatch_completes_successfully():
14 |     instructor.apatch(AsyncOpenAI())
15 | 
16 | 
17 | def test_is_async_returns_true_if_function_is_async():
18 |     async def async_function():
19 |         pass
20 | 
21 |     assert is_async(async_function) is True
22 | 
23 | 
24 | def test_is_async_returns_false_if_function_is_not_async():
25 |     def sync_function():
26 |         pass
27 | 
28 |     assert is_async(sync_function) is False
29 | 
30 | 
31 | def test_is_async_returns_true_if_wrapped_function_is_async():
32 |     async def async_function():
33 |         pass
34 | 
35 |     @functools.wraps(async_function)
36 |     def wrapped_function():
37 |         pass
38 | 
39 |     assert is_async(wrapped_function) is True
40 | 
41 | 
42 | def test_is_async_returns_true_if_double_wrapped_function_is_async():
43 |     async def async_function():
44 |         pass
45 | 
46 |     @functools.wraps(async_function)
47 |     def wrapped_function():
48 |         pass
49 | 
50 |     @functools.wraps(wrapped_function)
51 |     def double_wrapped_function():
52 |         pass
53 | 
54 |     assert is_async(double_wrapped_function) is True
55 | 
56 | 
57 | def test_is_async_returns_true_if_triple_wrapped_function_is_async():
58 |     async def async_function():
59 |         pass
60 | 
61 |     @functools.wraps(async_function)
62 |     def wrapped_function():
63 |         pass
64 | 
65 |     @functools.wraps(wrapped_function)
66 |     def double_wrapped_function():
67 |         pass
68 | 
69 |     @functools.wraps(double_wrapped_function)
70 |     def triple_wrapped_function():
71 |         pass
72 | 
73 |     assert is_async(triple_wrapped_function) is True
74 | 


--------------------------------------------------------------------------------
/tests/test_process_response.py:
--------------------------------------------------------------------------------
 1 | from typing_extensions import TypedDict
 2 | from pydantic import BaseModel
 3 | from instructor.process_response import handle_response_model
 4 | 
 5 | 
 6 | def test_typed_dict_conversion() -> None:
 7 |     class User(TypedDict):  # type: ignore
 8 |         name: str
 9 |         age: int
10 | 
11 |     _, user_tool_definition = handle_response_model(User)
12 | 
13 |     class User(BaseModel):
14 |         name: str
15 |         age: int
16 | 
17 |     _, pydantic_user_tool_definition = handle_response_model(User)
18 |     assert user_tool_definition == pydantic_user_tool_definition
19 | 


--------------------------------------------------------------------------------
/tests/test_response_model_conversion.py:
--------------------------------------------------------------------------------
 1 | from instructor.process_response import handle_response_model
 2 | from pydantic import BaseModel, Field
 3 | import instructor
 4 | import pytest
 5 | 
 6 | modes = [
 7 |     instructor.Mode.ANTHROPIC_JSON,
 8 |     instructor.Mode.JSON,
 9 |     instructor.Mode.MD_JSON,
10 |     instructor.Mode.GEMINI_JSON,
11 |     instructor.Mode.VERTEXAI_JSON,
12 | ]
13 | 
14 | 
15 | def get_system_prompt(user_tool_definition, mode):
16 |     if mode == instructor.Mode.ANTHROPIC_JSON:
17 |         return user_tool_definition["system"]
18 |     elif mode == instructor.Mode.GEMINI_JSON:
19 |         return "\n".join(user_tool_definition["contents"][0]["parts"])
20 |     elif mode == instructor.Mode.VERTEXAI_JSON:
21 |         return str(user_tool_definition["generation_config"])
22 |     return user_tool_definition["messages"][0]["content"]
23 | 
24 | 
25 | @pytest.mark.parametrize("mode", modes)
26 | def test_json_preserves_description_of_non_english_characters_in_json_mode(
27 |     mode,
28 | ) -> None:
29 |     messages = [
30 |         {
31 |             "role": "user",
32 |             "content": "Extract the user from the text : 张三 20岁",
33 |         }
34 |     ]
35 | 
36 |     class User(BaseModel):
37 |         name: str = Field(description="用户的名字")
38 |         age: int = Field(description="用户的年龄")
39 | 
40 |     _, user_tool_definition = handle_response_model(User, mode=mode, messages=messages)
41 | 
42 |     system_prompt = get_system_prompt(user_tool_definition, mode)
43 |     assert "用户的名字" in system_prompt
44 |     assert "用户的年龄" in system_prompt
45 | 
46 |     _, user_tool_definition = handle_response_model(
47 |         User,
48 |         mode=mode,
49 |         system="你是一个AI助手",
50 |         messages=messages,
51 |     )
52 |     system_prompt = get_system_prompt(user_tool_definition, mode)
53 |     assert "用户的名字" in system_prompt
54 |     assert "用户的年龄" in system_prompt
55 | 


--------------------------------------------------------------------------------
/tests/test_simple_types.py:
--------------------------------------------------------------------------------
 1 | from instructor.dsl import is_simple_type, Partial
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | def test_enum_simple():
 6 |     from enum import Enum
 7 | 
 8 |     class Color(Enum):
 9 |         RED = 1
10 |         GREEN = 2
11 |         BLUE = 3
12 | 
13 |     assert is_simple_type(Color), "Failed for type: " + str(Color)
14 | 
15 | 
16 | def test_standard_types():
17 |     for t in [str, int, float, bool]:
18 |         assert is_simple_type(t), "Failed for type: " + str(t)
19 | 
20 | 
21 | def test_partial_not_simple():
22 |     class SampleModel(BaseModel):
23 |         data: int
24 | 
25 |     assert not is_simple_type(Partial[SampleModel]), "Failed for type: " + str(
26 |         Partial[int]
27 |     )
28 | 
29 | 
30 | def test_annotated_simple():
31 |     from pydantic import Field
32 |     from typing import Annotated
33 | 
34 |     new_type = Annotated[int, Field(description="test")]
35 | 
36 |     assert is_simple_type(new_type), "Failed for type: " + str(new_type)
37 | 
38 | 
39 | def test_literal_simple():
40 |     from typing import Literal
41 | 
42 |     new_type = Literal[1, 2, 3]
43 | 
44 |     assert is_simple_type(new_type), "Failed for type: " + str(new_type)
45 | 
46 | 
47 | def test_union_simple():
48 |     from typing import Union
49 | 
50 |     new_type = Union[int, str]
51 | 
52 |     assert is_simple_type(new_type), "Failed for type: " + str(new_type)
53 | 
54 | 
55 | def test_iterable_not_simple():
56 |     from collections.abc import Iterable
57 | 
58 |     new_type = Iterable[int]
59 | 
60 |     assert not is_simple_type(new_type), "Failed for type: " + str(new_type)
61 | 


--------------------------------------------------------------------------------