├── .coveragerc ├── .cursor └── rules │ ├── documentation-sync.mdc │ ├── followups.mdc │ ├── new-features-planning.mdc │ ├── readme.md │ └── simple-language.mdc ├── .cursorignore ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md ├── dependabot.yml └── workflows │ ├── ai-label.yml │ ├── claude.yml │ ├── evals.yml │ ├── pyright.yml │ ├── python-publish.yml │ ├── ruff.yml │ ├── test.yml │ └── test_docs.yml ├── .gitignore ├── .grit ├── .gitignore └── grit.yaml ├── .pre-commit-config.yaml ├── .ruff.toml ├── .vscode └── settings.json ├── CLAUDE.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── build_mkdocs.sh ├── docs ├── api.md ├── architecture.md ├── blog │ ├── .authors.yml │ ├── index.md │ └── posts │ │ ├── aisummit-2023.md │ │ ├── announcing-gemini-tool-calling-support.md │ │ ├── announcing-instructor-responses-support.md │ │ ├── announcing-unified-provider-interface.md │ │ ├── anthropic-prompt-caching.md │ │ ├── anthropic-web-search-structured.md │ │ ├── anthropic.md │ │ ├── bad-schemas-could-break-llms.md │ │ ├── best_framework.md │ │ ├── caching.md │ │ ├── chain-of-density.md │ │ ├── chat-with-your-pdf-with-gemini.md │ │ ├── citations.md │ │ ├── consistent-stories.md │ │ ├── course.md │ │ ├── cursor-rules.md │ │ ├── distilation-part1.md │ │ ├── extract-model-looks.md │ │ ├── extracting-model-metadata.md │ │ ├── fake-data.md │ │ ├── full-fastapi-visibility.md │ │ ├── generating-pdf-citations.md │ │ ├── generator.md │ │ ├── google-openai-client.md │ │ ├── img │ │ ├── Structured_Output_Extraction.gif │ │ ├── agent_mcp_example.png │ │ ├── async_type.png │ │ ├── chain-of-density.png │ │ ├── classification-logfire.png │ │ ├── claude_desktop_mcp.png │ │ ├── claude_desktop_screenshot.png │ │ ├── course.png │ │ ├── cursor_mcp_agent.png │ │ ├── cursor_mcp_support.png │ │ ├── distil_openai.png │ │ ├── downloads.png │ │ ├── dumb_rag.png │ │ ├── gemini_citations.png │ │ ├── generator.png │ │ ├── image-logfire.png │ │ ├── instructor-autocomplete.png │ │ ├── iterable.png │ │ ├── langsmith.png │ │ ├── logfire-asyncio.png │ │ ├── logfire-stream.png │ │ ├── logfire-sync-fastapi-arguments.png │ │ ├── logfire-sync-pydantic-validation.png │ │ ├── mcp_architecture.png │ │ ├── mcp_stars.webp │ │ ├── meta.png │ │ ├── parea │ │ │ ├── form-mode.gif │ │ │ ├── trace.png │ │ │ └── validation-error-chart.png │ │ ├── query_understanding.png │ │ ├── statista-image.jpeg │ │ ├── style_1.png │ │ ├── style_2.png │ │ ├── style_3.png │ │ ├── style_4.png │ │ ├── style_5.png │ │ ├── style_6.png │ │ ├── type.png │ │ ├── untidy_table.png │ │ ├── validation-logfire.png │ │ ├── with_completion.png │ │ ├── youtube-clips.gif │ │ └── youtube-flashcards │ │ │ ├── annotations.png │ │ │ ├── flashcards.png │ │ │ └── telemetry.gif │ │ ├── introducing-structured-outputs-with-cerebras-inference.md │ │ ├── introducing-structured-outputs.md │ │ ├── introduction.md │ │ ├── jinja-proposal.md │ │ ├── langsmith.md │ │ ├── learn-async.md │ │ ├── llm-as-reranker.md │ │ ├── llms-txt-adoption.md │ │ ├── logfire.md │ │ ├── matching-language.md │ │ ├── migrating-to-uv.md │ │ ├── multimodal-gemini.md │ │ ├── open_source.md │ │ ├── openai-distilation-store.md │ │ ├── openai-multimodal.md │ │ ├── pairwise-llm-judge.md │ │ ├── parea.md │ │ ├── pydantic-is-still-all-you-need.md │ │ ├── rag-and-beyond.md │ │ ├── rag-timelines.md │ │ ├── semantic-validation-structured-outputs.md │ │ ├── situate-context.md │ │ ├── string-based-init.md │ │ ├── structured-output-anthropic.md │ │ ├── tidy-data-from-messy-tables.md │ │ ├── timestamp.md │ │ ├── using_json.md │ │ ├── validation-part1.md │ │ ├── version-1.md │ │ ├── why-care-about-mcps.md │ │ ├── writer-support.md │ │ ├── youtube-flashcards.md │ │ └── youtube-transcripts.md ├── cli │ ├── batch.md │ ├── finetune.md │ ├── index.md │ └── usage.md ├── concepts │ ├── alias.md │ ├── caching.md │ ├── dictionary_operations.md │ ├── distillation.md │ ├── enums.md │ ├── error_handling.md │ ├── fastapi.md │ ├── fields.md │ ├── hooks.md │ ├── index.md │ ├── iterable.md │ ├── lists.md │ ├── logging.md │ ├── maybe.md │ ├── models.md │ ├── multimodal.md │ ├── parallel.md │ ├── partial.md │ ├── patching.md │ ├── philosophy.md │ ├── prompt_caching.md │ ├── prompting.md │ ├── raw_response.md │ ├── reask_validation.md │ ├── response.png │ ├── retrying.md │ ├── semantic_validation.md │ ├── templating.md │ ├── typeadapter.md │ ├── typeddicts.md │ ├── types.md │ ├── union.md │ ├── unions.md │ ├── usage.md │ └── validation.md ├── contributing.md ├── examples │ ├── action_items.md │ ├── audio_extraction.md │ ├── batch_classification_langsmith.md │ ├── batch_job_oai.md │ ├── building_knowledge_graphs.md │ ├── bulk_classification.md │ ├── classification.md │ ├── db.png │ ├── document_segmentation.md │ ├── entity_resolution.md │ ├── entity_resolution.png │ ├── exact_citations.md │ ├── examples.md │ ├── extract_contact_info.md │ ├── extract_slides.md │ ├── extracting_receipts.md │ ├── extracting_tables.md │ ├── groq.md │ ├── image_to_ad_copy.md │ ├── index.md │ ├── knowledge_graph.md │ ├── knowledge_graph.png │ ├── local_classification.md │ ├── mistral.md │ ├── moderation.md │ ├── multi_modal_gemini.md │ ├── multiple_classification.md │ ├── ollama.md │ ├── open_source.md │ ├── pandas_df.md │ ├── partial_streaming.md │ ├── pii.md │ ├── planning-tasks.md │ ├── recursive.md │ ├── search.md │ ├── self_critique.md │ ├── single_classification.md │ ├── sqlmodel.md │ ├── tables_from_vision.md │ ├── tracing_with_langfuse.md │ ├── watsonx.md │ └── youtube_clips.md ├── faq.md ├── getting-started.md ├── help.md ├── hooks │ └── hide_lines.py ├── img │ ├── action_items.png │ ├── analogical_prompting.png │ ├── cosp.png │ ├── cosp_entropy.png │ ├── cosp_redundancy.png │ ├── error2.png │ ├── faithful_cot_example.png │ ├── ide_support.png │ ├── more.png │ ├── mrr_eqn.png │ ├── mutual_information.png │ ├── partial.gif │ ├── partial_streaming.gif │ ├── plan_and_solve.png │ ├── pot.jpeg │ ├── recall_eqn.png │ ├── retriever.png │ ├── universal_self_adaptive_prompting.png │ ├── universal_self_consistency.png │ └── youtube.gif ├── index.md ├── installation.md ├── integrations │ ├── anthropic.md │ ├── anyscale.md │ ├── azure.md │ ├── bedrock.md │ ├── cerebras.md │ ├── cohere.md │ ├── cortex.md │ ├── databricks.md │ ├── deepseek.md │ ├── fireworks.md │ ├── genai.md │ ├── google.md │ ├── groq.md │ ├── index.md │ ├── litellm.md │ ├── llama-cpp-python.md │ ├── mistral.md │ ├── ollama.md │ ├── openai-responses.md │ ├── openai.md │ ├── openrouter.md │ ├── perplexity.md │ ├── sambanova.md │ ├── together.md │ ├── vertex.md │ └── writer.md ├── javascripts │ └── katex.js ├── jobs.md ├── learning │ ├── getting_started │ │ ├── client_setup.md │ │ ├── first_extraction.md │ │ ├── installation.md │ │ ├── response_models.md │ │ └── structured_outputs.md │ ├── index.md │ ├── patterns │ │ ├── field_validation.md │ │ ├── list_extraction.md │ │ ├── nested_structure.md │ │ ├── optional_fields.md │ │ ├── prompt_templates.md │ │ └── simple_object.md │ ├── streaming │ │ ├── basics.md │ │ └── lists.md │ └── validation │ │ ├── basics.md │ │ ├── custom_validators.md │ │ ├── field_level_validation.md │ │ └── retry_mechanisms.md ├── llms.txt ├── modes-comparison.md ├── newsletter.md ├── overrides │ └── main.html ├── prompting │ ├── decomposition │ │ ├── decomp.md │ │ ├── faithful_cot.md │ │ ├── least_to_most.md │ │ ├── plan_and_solve.md │ │ ├── program_of_thought.md │ │ ├── recurs_of_thought.md │ │ ├── skeleton_of_thought.md │ │ └── tree-of-thought.md │ ├── ensembling │ │ ├── cosp.md │ │ ├── dense.md │ │ ├── diverse.md │ │ ├── max_mutual_information.md │ │ ├── meta_cot.md │ │ ├── more.md │ │ ├── prompt_paraphrasing.md │ │ ├── self_consistency.md │ │ ├── universal_self_consistency.md │ │ └── usp.md │ ├── few_shot │ │ ├── cosp.md │ │ ├── example_generation │ │ │ └── sg_icl.md │ │ ├── example_ordering.md │ │ └── exemplar_selection │ │ │ ├── knn.md │ │ │ └── vote_k.md │ ├── index.md │ ├── self_criticism │ │ ├── chain_of_verification.md │ │ ├── cumulative_reason.md │ │ ├── reversecot.md │ │ ├── self_calibration.md │ │ ├── self_refine.md │ │ └── self_verification.md │ ├── thought_generation │ │ ├── chain_of_thought_few_shot │ │ │ ├── active_prompt.md │ │ │ ├── auto_cot.md │ │ │ ├── complexity_based.md │ │ │ ├── contrastive.md │ │ │ ├── memory_of_thought.md │ │ │ ├── prompt_mining.md │ │ │ └── uncertainty_routed_cot.md │ │ └── chain_of_thought_zero_shot │ │ │ ├── analogical_prompting.md │ │ │ ├── step_back_prompting.md │ │ │ ├── tab_cot.md │ │ │ └── thread_of_thought.md │ └── zero_shot │ │ ├── emotion_prompting.md │ │ ├── rar.md │ │ ├── re2.md │ │ ├── role_prompting.md │ │ ├── s2a.md │ │ ├── self_ask.md │ │ ├── simtom.md │ │ └── style_prompting.md ├── repository-overview.md ├── start-here.md ├── templates │ ├── concept_template.md │ ├── cookbook_template.md │ └── provider_template.md ├── tutorials │ ├── 1-introduction.ipynb │ ├── 2-tips.ipynb │ ├── 3-0-applications-rag.ipynb │ ├── 3-1-validation-rag.ipynb │ ├── 4-validation.ipynb │ ├── 5-knowledge-graphs.ipynb │ ├── 6-chain-of-density.ipynb │ ├── 7-synthetic-data-generation.ipynb │ └── index.md └── why.md ├── docs_todo.md ├── ellipsis.yaml ├── examples ├── __init__.py ├── anthropic-web-tool │ └── run.py ├── anthropic │ └── run.py ├── auto-ticketer │ ├── run.py │ └── tasks.png ├── automodel │ └── run.py ├── avail │ ├── run.py │ └── run_mixtral.py ├── batch-classification │ ├── run-cache.py │ ├── run.py │ └── run_langsmith.py ├── caching │ ├── example_diskcache.py │ ├── example_redis.py │ └── lru.py ├── chain-of-density │ ├── Readme.md │ ├── chain_of_density.py │ ├── finetune.py │ └── requirements.txt ├── citation_with_extraction │ ├── Dockerfile │ ├── README.md │ ├── citation_fuzzy_match.py │ ├── diagram.py │ ├── main.py │ ├── modal_main.py │ ├── requirements.txt │ └── schema.png ├── citations │ └── run.py ├── classification │ ├── classifiy_with_validation.py │ ├── multi_prediction.py │ └── simple_prediction.py ├── codegen-from-schema │ ├── create_fastapi_app.py │ ├── input.json │ ├── models.py │ ├── readme.md │ └── run.py ├── cohere │ └── cohere.py ├── crm │ └── run.py ├── distilations │ ├── math_finetunes.jsonl │ ├── math_finetunes_val.jsonl │ ├── readme.md │ ├── three_digit_mul.py │ └── three_digit_mul_dispatch.py ├── evals │ ├── eval.py │ ├── models.py │ ├── stats_dict.py │ ├── streamlit.py │ └── test.jsonl ├── extract-table │ ├── run_vision.py │ ├── run_vision_langsmith.py │ ├── run_vision_org.py │ ├── run_vision_org_table.py │ ├── run_vision_receipt.py │ └── test.py ├── extracting-pii │ └── run.py ├── fastapi_app │ ├── __init__.py │ ├── main.py │ └── script.py ├── fizzbuzz │ └── run.py ├── gpt-engineer │ ├── changes.diff │ ├── generate.py │ ├── program.json │ └── refactor.py ├── groq │ ├── groq_example.py │ └── groq_example2.py ├── hooks │ ├── README.md │ └── run.py ├── iterables │ └── run.py ├── knowledge-graph │ ├── final.png │ ├── iteration_0.png │ ├── iteration_1.png │ ├── iteration_2.png │ ├── iteration_3.png │ ├── kg.png │ ├── run.py │ └── run_stream.py ├── learn-async │ └── run.py ├── llm-judge-relevance │ └── run.py ├── logfire-fastapi │ ├── Readme.md │ ├── requirements.txt │ ├── server.py │ └── test.py ├── logfire │ ├── classify.py │ ├── image.py │ ├── requirements.txt │ └── validate.py ├── logging │ └── run.py ├── match_language │ ├── run_v1.py │ └── run_v2.py ├── mistral │ └── mistral.py ├── multi-actions │ └── run.py ├── multiple_search_queries │ ├── diagram.py │ ├── schema.png │ └── segment_search_queries.py ├── open_source_examples │ ├── README.md │ ├── openrouter.py │ ├── perplexity.py │ └── runpod.py ├── openai-audio │ ├── output.wav │ └── run.py ├── parallel │ └── run.py ├── partial_streaming │ ├── benchmark.py │ └── run.py ├── patching │ ├── anyscale.py │ ├── oai.py │ ├── pcalls.py │ └── together.py ├── proscons │ └── run.py ├── query_planner_execution │ ├── diagram.py │ ├── query_planner_execution.py │ └── schema.png ├── recursive_filepaths │ ├── diagram.py │ ├── parse_recursive_paths.py │ └── schema.png ├── reranker │ └── run.py ├── resolving-complex-entities │ ├── entity.png │ └── run.py ├── retry │ └── run.py ├── safer_sql_example │ ├── diagram.py │ ├── safe_sql.py │ └── schema.png ├── simple-extraction │ ├── maybe_user.py │ └── user.py ├── situate_context │ └── run.py ├── sqlmodel │ └── run.py ├── stream_action_items │ └── run.py ├── synethic-data │ └── run.py ├── task_planner │ ├── diagram.py │ ├── schema.png │ └── task_planner_topological_sort.py ├── timestamps │ └── run.py ├── union │ └── run.py ├── validated-multiclass │ ├── output.json │ └── run.py ├── validators │ ├── allm_validator.py │ ├── annotator.py │ ├── chain_of_thought_validator.py │ ├── citations.py │ ├── competitors.py │ ├── field_validator.py │ ├── just_a_guy.py │ ├── llm_validator.py │ ├── moderation.py │ └── readme.md ├── vision │ ├── image_to_ad_copy.py │ ├── run.py │ ├── run_raw.py │ ├── run_table.py │ └── slides.py ├── watsonx │ └── watsonx.py ├── youtube-clips │ └── run.py ├── youtube-flashcards │ └── run.py └── youtube │ └── run.py ├── instructor ├── __init__.py ├── _types │ ├── __init__.py │ └── _alias.py ├── auto_client.py ├── batch.py ├── cli │ ├── __init__.py │ ├── batch.py │ ├── cli.py │ ├── deprecated_hub.py │ ├── files.py │ ├── jobs.py │ └── usage.py ├── client.py ├── client_anthropic.py ├── client_bedrock.py ├── client_cerebras.py ├── client_cohere.py ├── client_fireworks.py ├── client_gemini.py ├── client_genai.py ├── client_groq.py ├── client_mistral.py ├── client_perplexity.py ├── client_vertexai.py ├── client_writer.py ├── distil.py ├── dsl │ ├── __init__.py │ ├── citation.py │ ├── iterable.py │ ├── maybe.py │ ├── parallel.py │ ├── partial.py │ ├── simple_type.py │ └── validators.py ├── exceptions.py ├── function_calls.py ├── hooks.py ├── mode.py ├── models.py ├── multimodal.py ├── patch.py ├── process_response.py ├── py.typed ├── reask.py ├── retry.py ├── templating.py ├── utils.py └── validators.py ├── make_desc.py ├── make_sitemap.py ├── mkdocs.yml ├── poetry.lock ├── pyproject.toml ├── pyrightconfig.json ├── requirements-doc.txt ├── requirements-examples.txt ├── requirements.txt ├── sitemap.yaml ├── tests ├── __init__.py ├── assets │ ├── gettysburg.wav │ ├── image.jpg │ └── invoice.pdf ├── conftest.py ├── dsl │ ├── test_partial.py │ ├── test_simple_type.py │ └── test_simple_type_fix.py ├── llm │ ├── test_anthropic │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── evals │ │ │ └── test_simple.py │ │ ├── test_multimodal.py │ │ ├── test_reasoning.py │ │ ├── test_stream.py │ │ ├── test_system.py │ │ └── util.py │ ├── test_cerebras │ │ ├── __init__.py │ │ └── modes.py │ ├── test_cohere │ │ ├── conftest.py │ │ ├── test_json_schema.py │ │ ├── test_none_response.py │ │ └── test_retries.py │ ├── test_fireworks │ │ ├── __init__.py │ │ ├── test_format.py │ │ ├── test_simple.py │ │ ├── test_stream.py │ │ └── util.py │ ├── test_gemini │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── evals │ │ │ ├── __init__.py │ │ │ ├── test_classification_enums.py │ │ │ ├── test_classification_literals.py │ │ │ ├── test_entities.py │ │ │ ├── test_extract_users.py │ │ │ └── test_sentiment_analysis.py │ │ ├── test_files │ │ │ └── sample.mp3 │ │ ├── test_format.py │ │ ├── test_list_content.py │ │ ├── test_modes.py │ │ ├── test_multimodal_content.py │ │ ├── test_patch.py │ │ ├── test_retries.py │ │ ├── test_roles.py │ │ ├── test_simple_types.py │ │ ├── test_stream.py │ │ └── util.py │ ├── test_genai │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_format.py │ │ ├── test_invalid_schema.py │ │ ├── test_long_prompt.py │ │ ├── test_multimodal.py │ │ ├── test_retries.py │ │ ├── test_simple.py │ │ ├── test_stream.py │ │ ├── test_utils.py │ │ └── util.py │ ├── test_litellm.py │ ├── test_mistral │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_modes.py │ │ ├── test_multimodal.py │ │ ├── test_retries.py │ │ ├── test_stream.py │ │ └── util.py │ ├── test_new_client.py │ ├── test_openai │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── docs │ │ │ ├── test_concepts.py │ │ │ ├── test_docs.py │ │ │ ├── test_examples.py │ │ │ ├── test_hub.py │ │ │ ├── test_mkdocs.py │ │ │ ├── test_posts.py │ │ │ └── test_prompt_tips.py │ │ ├── evals │ │ │ ├── __init__.py │ │ │ ├── readme.md │ │ │ ├── test_classification_enums.py │ │ │ ├── test_classification_literals.py │ │ │ ├── test_entities.py │ │ │ ├── test_extract_users.py │ │ │ └── test_sentiment_analysis.py │ │ ├── test_attr.py │ │ ├── test_hooks.py │ │ ├── test_modes.py │ │ ├── test_multimodal.py │ │ ├── test_multitask.py │ │ ├── test_parallel.py │ │ ├── test_patch.py │ │ ├── test_response.py │ │ ├── test_responses_tools.py │ │ ├── test_retries.py │ │ ├── test_simple_types.py │ │ ├── test_stream.py │ │ ├── test_validation_context.py │ │ ├── test_validators.py │ │ └── util.py │ ├── test_perplexity │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_modes.py │ │ └── util.py │ ├── test_vertexai │ │ ├── __init__.py │ │ ├── test_deprecated_async.py │ │ ├── test_format.py │ │ ├── test_message_parser.py │ │ ├── test_modes.py │ │ ├── test_retries.py │ │ ├── test_simple_types.py │ │ ├── test_stream.py │ │ └── util.py │ └── test_writer │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── evals │ │ ├── __init__.py │ │ ├── test_classification_enums.py │ │ ├── test_classification_literals.py │ │ ├── test_entities.py │ │ ├── test_extract_users.py │ │ └── test_sentiment_analysis.py │ │ ├── test_format_common_models.py │ │ ├── test_format_difficult_models.py │ │ ├── test_retries.py │ │ ├── test_streaming.py │ │ └── util.py ├── test_auto_client.py ├── test_dict_operations.py ├── test_dict_operations_validation.py ├── test_distil.py ├── test_dynamic_model_creation.py ├── test_fizzbuzz_fix.py ├── test_formatting.py ├── test_function_calls.py ├── test_json_extraction.py ├── test_json_extraction_edge_cases.py ├── test_message_processing.py ├── test_multimodal.py ├── test_multitask.py ├── test_patch.py ├── test_process_response.py ├── test_response_model_conversion.py ├── test_schema.py ├── test_simple_types.py └── test_utils.py └── uv.lock /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = 3 | instructor/ 4 | omit = 5 | instructor/cli/* 6 | -------------------------------------------------------------------------------- /.cursor/rules/documentation-sync.mdc: -------------------------------------------------------------------------------- 1 | --- 2 | description: when making code changes or adding documentation 3 | globs: ["*.py", "*.md"] 4 | alwaysApply: true 5 | --- 6 | 7 | - When making code changes: 8 | - Update related documentation files to reflect the changes 9 | - Check docstrings and type hints are up to date 10 | - Update any example code in markdown files 11 | - Review README.md if the changes affect installation or usage 12 | 13 | - When creating new markdown files: 14 | - Add the file to mkdocs.yml under the appropriate section 15 | - Follow the existing hierarchy and indentation 16 | - Use descriptive nav titles 17 | - Example: 18 | ```yaml 19 | nav: 20 | - Home: index.md 21 | - Guides: 22 | - Getting Started: guides/getting-started.md 23 | - Your New File: guides/your-new-file.md 24 | ``` 25 | 26 | - For API documentation: 27 | - Ensure new functions/classes are documented 28 | - Include type hints and docstrings 29 | - Add usage examples 30 | - Update API reference docs if auto-generated 31 | 32 | - Documentation Quality: 33 | - Write at grade 10 reading level (see simple-language.mdc) 34 | - Include working code examples 35 | - Add links to related documentation 36 | - Use consistent formatting and style -------------------------------------------------------------------------------- /.cursor/rules/followups.mdc: -------------------------------------------------------------------------------- 1 | --- 2 | description: when AI agents are collaborating on code 3 | globs: "*" 4 | alwaysApply: true 5 | --- 6 | Make sure to come up with follow-up hot keys. They should be thoughtful and actionable and result in small additional code changes based on the context that you have available. 7 | 8 | using [J], [K], [L] 9 | -------------------------------------------------------------------------------- /.cursor/rules/simple-language.mdc: -------------------------------------------------------------------------------- 1 | --- 2 | description: when writing documentation 3 | globs: *.md 4 | alwaysApply: false 5 | --- 6 | 7 | - When writing documents and concepts make sure that you write at a grade 10 reading level 8 | - make sure every code block has complete imports and makes no references to previous code blocks, each one needs to be self contained 9 | -------------------------------------------------------------------------------- /.cursorignore: -------------------------------------------------------------------------------- 1 | # Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv) 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: jxnl -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | --- 5 | 6 | - [ ] This is actually a bug report. 7 | - [ ] I am not getting good LLM Results 8 | - [ ] I have tried asking for help in the community on discord or discussions and have not received a response. 9 | - [ ] I have tried searching the documentation and have not found an answer. 10 | 11 | **What Model are you using?** 12 | 13 | - [ ] gpt-3.5-turbo 14 | - [ ] gpt-4-turbo 15 | - [ ] gpt-4 16 | - [ ] Other (please specify) 17 | 18 | **Describe the bug** 19 | A clear and concise description of what the bug is. 20 | 21 | **To Reproduce** 22 | Steps to reproduce the behavior, including code snippets of the model and the input data and openai response. 23 | 24 | **Expected behavior** 25 | A clear and concise description of what you expected to happen. 26 | 27 | **Screenshots** 28 | If applicable, add screenshots to help explain your problem. 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | --- 5 | 6 | **Is your feature request related to a problem? Please describe.** 7 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 8 | 9 | **Describe the solution you'd like** 10 | A clear and concise description of what you want to happen. 11 | 12 | **Describe alternatives you've considered** 13 | A clear and concise description of any alternative solutions or features you've considered. 14 | 15 | **Additional context** 16 | Add any other context or screenshots about the feature request here. 17 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | > Please use conventional commits to describe your changes. For example, `feat: add new feature` or `fix: fix a bug`. If you are unsure, leave the title as `...` and AI will handle it. 2 | 3 | ## Describe your changes 4 | 5 | ... 6 | 7 | ## Issue ticket number and link 8 | 9 | ## Checklist before requesting a review 10 | 11 | - [ ] I have performed a self-review of my code 12 | - [ ] If it is a core feature, I have added thorough tests. 13 | - [ ] If it is a core feature, I have added documentation. 14 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | groups: 13 | poetry: 14 | patterns: ["*"] 15 | -------------------------------------------------------------------------------- /.github/workflows/ai-label.yml: -------------------------------------------------------------------------------- 1 | name: AI Labeler 2 | 3 | on: 4 | issues: 5 | types: [opened, reopened] 6 | pull_request: 7 | types: [opened, reopened] 8 | 9 | jobs: 10 | ai-labeler: 11 | runs-on: ubuntu-latest 12 | permissions: 13 | contents: read 14 | issues: write 15 | pull-requests: write 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: jlowin/ai-labeler@v0.4.0 19 | with: 20 | include-repo-labels: true 21 | openai-api-key: ${{ secrets.OPENAI_API_KEY }} 22 | -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- 1 | name: Claude Code 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | pull_request_review_comment: 7 | types: [created] 8 | issues: 9 | types: [opened, assigned] 10 | pull_request_review: 11 | types: [submitted] 12 | 13 | jobs: 14 | claude: 15 | if: | 16 | (contains(fromJSON('["jxnl","ivanleomk"]'), github.actor)) && 17 | ((github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || 18 | (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || 19 | (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || 20 | (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))) 21 | runs-on: ubuntu-latest 22 | permissions: 23 | contents: read 24 | pull-requests: read 25 | issues: read 26 | id-token: write 27 | steps: 28 | - name: Checkout repository 29 | uses: actions/checkout@v4 30 | with: 31 | fetch-depth: 1 32 | 33 | - name: Run Claude Code 34 | id: claude 35 | uses: anthropics/claude-code-action@beta 36 | with: 37 | anthropic_api_key: ${{ secrets.CLAUDE_API_KEY }} 38 | -------------------------------------------------------------------------------- /.github/workflows/evals.yml: -------------------------------------------------------------------------------- 1 | name: Weekly Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 0 * * 0" # Runs at 00:00 UTC every Sunday 7 | push: 8 | branches: [main] 9 | paths-ignore: 10 | - "**" # Ignore all paths to ensure it only triggers on schedule 11 | 12 | jobs: 13 | weekly-tests: 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - name: Set up Python 3.11 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: 3.11 23 | cache: "poetry" 24 | 25 | - name: Install Poetry 26 | uses: snok/install-poetry@v1.3.1 27 | 28 | - name: Install dependencies 29 | run: poetry install --with dev,anthropic 30 | 31 | - name: Run all tests 32 | run: poetry run pytest tests/ 33 | env: 34 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 35 | -------------------------------------------------------------------------------- /.github/workflows/pyright.yml: -------------------------------------------------------------------------------- 1 | name: Pyright 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | push: 7 | branches: [main] 8 | 9 | env: 10 | WORKING_DIRECTORY: "." 11 | PYRIGHT_OUTPUT_FILENAME: "pyright.log" 12 | 13 | jobs: 14 | Pyright: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [ubuntu-latest, macos-latest] 19 | python-version: ["3.9", "3.10", "3.11"] 20 | 21 | steps: 22 | - name: Checkout code 23 | uses: actions/checkout@v3 24 | - name: Install uv 25 | uses: astral-sh/setup-uv@v4 26 | with: 27 | enable-cache: true 28 | - name: Set up Python 29 | run: uv python install ${{ matrix.python-version }} 30 | - name: Install the project 31 | run: uv sync --all-extras 32 | - name: Run pyright 33 | run: uv run pyright 34 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | release: 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Install uv 25 | uses: astral-sh/setup-uv@v4 26 | with: 27 | enable-cache: true 28 | - name: Set up Python 29 | run: uv python install 3.10 30 | - name: Install the project 31 | run: uv sync --all-extras 32 | - name: Build the project 33 | run: uv build 34 | - name: Build and publish Python package 35 | run: uv publish 36 | env: 37 | UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }} 38 | -------------------------------------------------------------------------------- /.github/workflows/ruff.yml: -------------------------------------------------------------------------------- 1 | name: Ruff 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: [main] 7 | 8 | env: 9 | WORKING_DIRECTORY: "." 10 | RUFF_OUTPUT_FILENAME: "ruff.log" 11 | CUSTOM_FLAGS: "" 12 | CUSTOM_PACKAGES: "instructor examples tests" 13 | 14 | jobs: 15 | Ruff: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | os: [ubuntu-latest, macos-latest] 20 | steps: 21 | - name: Checkout code 22 | uses: actions/checkout@v3 23 | - name: Install uv 24 | uses: astral-sh/setup-uv@v4 25 | with: 26 | enable-cache: true 27 | - name: Set up Python 28 | run: uv python install 3.9 29 | - name: Install the project 30 | run: uv sync --all-extras 31 | - name: Run Continuous Integration Action 32 | uses: astral-sh/ruff-action@v3 33 | - name: Upload Artifacts 34 | uses: actions/upload-artifact@v4 35 | with: 36 | name: ruff-log 37 | path: ${{ env.WORKING_DIRECTORY }}/${{ env.RUFF_OUTPUT_FILENAME }} 38 | -------------------------------------------------------------------------------- /.github/workflows/test_docs.yml: -------------------------------------------------------------------------------- 1 | name: Test Docs 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - master 7 | jobs: 8 | release: 9 | runs-on: ubuntu-latest 10 | 11 | strategy: 12 | matrix: 13 | python-version: ["3.11"] 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Install system dependencies 19 | run: | 20 | sudo apt-get update 21 | sudo apt-get install -y graphviz libcairo2-dev xdg-utils 22 | 23 | - name: Install Poetry 24 | uses: snok/install-poetry@v1.3.1 25 | 26 | - name: Set up Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v4 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | cache: "poetry" 31 | - name: Install uv 32 | uses: astral-sh/setup-uv@v4 33 | - name: Install the project 34 | run: uv sync --all-extras 35 | - name: Run tests 36 | run: uv run pytest tests/llm/test_openai/docs 37 | env: 38 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 39 | -------------------------------------------------------------------------------- /.grit/.gitignore: -------------------------------------------------------------------------------- 1 | .gritmodules 2 | *.log 3 | -------------------------------------------------------------------------------- /.grit/grit.yaml: -------------------------------------------------------------------------------- 1 | version: 0.0.1 2 | patterns: 3 | - name: github.com/getgrit/python#openai 4 | level: info 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.9.9 # Ruff version 4 | hooks: 5 | - id: ruff # Run the linter. 6 | name: Run Linter Check (Ruff) 7 | args: [ --fix, --unsafe-fixes ] 8 | files: ^(instructor|tests|examples)/ 9 | - id: ruff-format # Run the formatter. 10 | name: Run Formatter (Ruff) 11 | 12 | - repo: local 13 | hooks: 14 | - id: uv-lock-check 15 | name: Check uv.lock is up-to-date 16 | entry: uv 17 | args: [lock, --check] 18 | language: system 19 | files: ^(pyproject\.toml|uv\.lock)$ 20 | pass_filenames: false 21 | 22 | - id: uv-sync-check 23 | name: Verify dependencies can be installed 24 | entry: uv 25 | args: [sync, --check] 26 | language: system 27 | files: ^(pyproject\.toml|uv\.lock)$ 28 | pass_filenames: false 29 | -------------------------------------------------------------------------------- /.ruff.toml: -------------------------------------------------------------------------------- 1 | # Exclude a variety of commonly ignored directories. 2 | exclude = [ 3 | ".bzr", 4 | ".direnv", 5 | ".eggs", 6 | ".git", 7 | ".git-rewrite", 8 | ".hg", 9 | ".mypy_cache", 10 | ".nox", 11 | ".pants.d", 12 | ".pytype", 13 | ".ruff_cache", 14 | ".svn", 15 | ".tox", 16 | ".venv", 17 | "__pypackages__", 18 | "_build", 19 | "buck-out", 20 | "build", 21 | "dist", 22 | "node_modules", 23 | "venv", 24 | ] 25 | 26 | # Same as Black. 27 | line-length = 88 28 | output-format = "grouped" 29 | 30 | target-version = "py39" 31 | 32 | [lint] 33 | select = [ 34 | # bugbear rules 35 | "B", 36 | # remove unused imports 37 | "F401", 38 | # bare except statements 39 | "E722", 40 | # unused arguments 41 | "ARG", 42 | # pyupgrade 43 | "UP", 44 | ] 45 | ignore = [ 46 | # mutable defaults 47 | "B006", 48 | "B018", 49 | ] 50 | 51 | unfixable = [ 52 | # disable auto fix for print statements 53 | "T201", 54 | "T203", 55 | ] 56 | ignore-init-module-imports = true 57 | 58 | [lint.extend-per-file-ignores] 59 | "instructor/distil.py" = ["ARG002"] 60 | "tests/test_distil.py" = ["ARG001"] 61 | "tests/test_patch.py" = ["ARG001"] 62 | "examples/task_planner/task_planner_topological_sort.py" = ["ARG002"] 63 | "examples/citation_with_extraction/main.py" = ["ARG001"] 64 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/.vscode/settings.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jason Liu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /build_mkdocs.sh: -------------------------------------------------------------------------------- 1 | pip install -r requirements.txt 2 | pip install -r requirements-doc.txt 3 | mkdocs build 4 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: API Reference Guide 3 | description: Explore the comprehensive API reference with details on instructors, validation, iteration, and function calls. 4 | --- 5 | 6 | # API Reference 7 | 8 | ::: instructor.from_openai 9 | 10 | ::: instructor.dsl.validators 11 | 12 | ::: instructor.dsl.iterable 13 | 14 | ::: instructor.dsl.partial 15 | 16 | ::: instructor.dsl.parallel 17 | 18 | ::: instructor.dsl.maybe 19 | 20 | ::: instructor.function_calls 21 | -------------------------------------------------------------------------------- /docs/blog/.authors.yml: -------------------------------------------------------------------------------- 1 | authors: 2 | jxnl: 3 | name: Jason Liu 4 | description: Creator 5 | avatar: https://avatars.githubusercontent.com/u/4852235?v=4 6 | url: https://twitter.com/intent/follow?screen_name=jxnlco 7 | ivanleomk: 8 | name: Ivan Leo 9 | description: Contributor 10 | avatar: https://pbs.twimg.com/profile_images/1838778744468836353/utYfioiO_400x400.jpg 11 | url: https://twitter.com/intent/follow?screen_name=ivanleomk 12 | anmol: 13 | name: Anmol Jawandha 14 | description: Contributor 15 | avatar: https://pbs.twimg.com/profile_images/1248544843556466693/PgxUIeBs_400x400.jpg 16 | joschkabraun: 17 | name: Joschka Braun 18 | description: Contributor 19 | avatar: https://pbs.twimg.com/profile_images/1601251353531224065/PYpqKsjL_400x400.jpg 20 | url: https://twitter.com/joschkabraun 21 | sarahchieng: 22 | name: Sarah Chieng 23 | description: Contributor 24 | avatar: https://pbs.twimg.com/profile_images/1755455116595834880/Hxh5ceRZ_400x400.jpg 25 | url: https://twitter.com/sarahchieng 26 | zilto: 27 | name: Thierry Jean 28 | description: Contributor 29 | avatar: https://avatars.githubusercontent.com/u/68975210?v=4 30 | url: https://www.linkedin.com/in/thierry-jean/ 31 | yanomaly: 32 | name: Yan 33 | description: Contributor 34 | avatar: https://avatars.githubusercontent.com/u/87994542?v=4 35 | -------------------------------------------------------------------------------- /docs/blog/posts/aisummit-2023.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: 3 | - jxnl 4 | categories: 5 | - Pydantic 6 | comments: true 7 | date: 2023-11-02 8 | description: Explore insights on utilizing Pydantic for effective prompt engineering 9 | in this AI Engineer Summit keynote. 10 | draft: false 11 | tags: 12 | - Pydantic 13 | - Prompt Engineering 14 | - AI Summit 15 | - Machine Learning 16 | - Data Validation 17 | --- 18 | 19 | # AI Engineer Keynote: Pydantic is all you need 20 | 21 | [![Pydantic is all you need](https://img.youtube.com/vi/yj-wSRJwrrc/0.jpg)](https://www.youtube.com/watch?v=yj-wSRJwrrc) 22 | 23 | [Click here to watch the full talk](https://www.youtube.com/watch?v=yj-wSRJwrrc) 24 | 25 | 26 | 27 | Last month, I ventured back onto the speaking circuit at the inaugural [AI Engineer Summit](https://www.ai.engineer/summit), sharing insights on leveraging [Pydantic](https://docs.pydantic.dev/latest/) for effective prompt engineering. I dove deep into what is covered in our documentation and standard blog posts, 28 | 29 | I'd genuinely appreciate any feedback on the talk – every bit helps in refining the art. So, take a moment to check out the [full talk here](https://youtu.be/yj-wSRJwrrc?si=vGMIqtTapbIN8SLz), and let's continue pushing the boundaries of what's possible. -------------------------------------------------------------------------------- /docs/blog/posts/course.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: 3 | - jxnl 4 | categories: 5 | - OpenAI 6 | comments: true 7 | date: 2024-02-14 8 | description: Discover a free one-hour course on Weights and Biases covering essential 9 | techniques for language models. 10 | draft: false 11 | slug: weights-and-biases-course 12 | tags: 13 | - Weights and Biases 14 | - AI course 15 | - machine learning 16 | - language models 17 | - free resources 18 | --- 19 | 20 | # Free course on Weights and Biases 21 | 22 | I just released a free course on wits and biases. It goes over the material from [tutorial](../../tutorials/1-introduction.ipynb). Check it out at [wandb.courses](https://www.wandb.courses/courses/steering-language-models) its free and open to everyone and just under an hour long! 23 | 24 | [![](img/course.png)](https://www.wandb.courses/courses/steering-language-models) 25 | 26 | > Click the image to access the course -------------------------------------------------------------------------------- /docs/blog/posts/img/Structured_Output_Extraction.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/Structured_Output_Extraction.gif -------------------------------------------------------------------------------- /docs/blog/posts/img/agent_mcp_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/agent_mcp_example.png -------------------------------------------------------------------------------- /docs/blog/posts/img/async_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/async_type.png -------------------------------------------------------------------------------- /docs/blog/posts/img/chain-of-density.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/chain-of-density.png -------------------------------------------------------------------------------- /docs/blog/posts/img/classification-logfire.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/classification-logfire.png -------------------------------------------------------------------------------- /docs/blog/posts/img/claude_desktop_mcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/claude_desktop_mcp.png -------------------------------------------------------------------------------- /docs/blog/posts/img/claude_desktop_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/claude_desktop_screenshot.png -------------------------------------------------------------------------------- /docs/blog/posts/img/course.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/course.png -------------------------------------------------------------------------------- /docs/blog/posts/img/cursor_mcp_agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/cursor_mcp_agent.png -------------------------------------------------------------------------------- /docs/blog/posts/img/cursor_mcp_support.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/cursor_mcp_support.png -------------------------------------------------------------------------------- /docs/blog/posts/img/distil_openai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/distil_openai.png -------------------------------------------------------------------------------- /docs/blog/posts/img/downloads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/downloads.png -------------------------------------------------------------------------------- /docs/blog/posts/img/dumb_rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/dumb_rag.png -------------------------------------------------------------------------------- /docs/blog/posts/img/gemini_citations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/gemini_citations.png -------------------------------------------------------------------------------- /docs/blog/posts/img/generator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/generator.png -------------------------------------------------------------------------------- /docs/blog/posts/img/image-logfire.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/image-logfire.png -------------------------------------------------------------------------------- /docs/blog/posts/img/instructor-autocomplete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/instructor-autocomplete.png -------------------------------------------------------------------------------- /docs/blog/posts/img/iterable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/iterable.png -------------------------------------------------------------------------------- /docs/blog/posts/img/langsmith.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/langsmith.png -------------------------------------------------------------------------------- /docs/blog/posts/img/logfire-asyncio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/logfire-asyncio.png -------------------------------------------------------------------------------- /docs/blog/posts/img/logfire-stream.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/logfire-stream.png -------------------------------------------------------------------------------- /docs/blog/posts/img/logfire-sync-fastapi-arguments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/logfire-sync-fastapi-arguments.png -------------------------------------------------------------------------------- /docs/blog/posts/img/logfire-sync-pydantic-validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/logfire-sync-pydantic-validation.png -------------------------------------------------------------------------------- /docs/blog/posts/img/mcp_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/mcp_architecture.png -------------------------------------------------------------------------------- /docs/blog/posts/img/mcp_stars.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/mcp_stars.webp -------------------------------------------------------------------------------- /docs/blog/posts/img/meta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/meta.png -------------------------------------------------------------------------------- /docs/blog/posts/img/parea/form-mode.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/parea/form-mode.gif -------------------------------------------------------------------------------- /docs/blog/posts/img/parea/trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/parea/trace.png -------------------------------------------------------------------------------- /docs/blog/posts/img/parea/validation-error-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/parea/validation-error-chart.png -------------------------------------------------------------------------------- /docs/blog/posts/img/query_understanding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/query_understanding.png -------------------------------------------------------------------------------- /docs/blog/posts/img/statista-image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/statista-image.jpeg -------------------------------------------------------------------------------- /docs/blog/posts/img/style_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_1.png -------------------------------------------------------------------------------- /docs/blog/posts/img/style_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_2.png -------------------------------------------------------------------------------- /docs/blog/posts/img/style_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_3.png -------------------------------------------------------------------------------- /docs/blog/posts/img/style_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_4.png -------------------------------------------------------------------------------- /docs/blog/posts/img/style_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_5.png -------------------------------------------------------------------------------- /docs/blog/posts/img/style_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/style_6.png -------------------------------------------------------------------------------- /docs/blog/posts/img/type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/type.png -------------------------------------------------------------------------------- /docs/blog/posts/img/untidy_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/untidy_table.png -------------------------------------------------------------------------------- /docs/blog/posts/img/validation-logfire.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/validation-logfire.png -------------------------------------------------------------------------------- /docs/blog/posts/img/with_completion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/with_completion.png -------------------------------------------------------------------------------- /docs/blog/posts/img/youtube-clips.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/youtube-clips.gif -------------------------------------------------------------------------------- /docs/blog/posts/img/youtube-flashcards/annotations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/youtube-flashcards/annotations.png -------------------------------------------------------------------------------- /docs/blog/posts/img/youtube-flashcards/flashcards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/youtube-flashcards/flashcards.png -------------------------------------------------------------------------------- /docs/blog/posts/img/youtube-flashcards/telemetry.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/blog/posts/img/youtube-flashcards/telemetry.gif -------------------------------------------------------------------------------- /docs/concepts/alias.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Pydantic Aliases Overview 3 | description: Explore the concept of aliases in Pydantic. Discover the latest documentation and features for better data validation. 4 | --- 5 | 6 | !!! warning "This page is a work in progress" 7 | 8 | This page is a work in progress. Check out [Pydantic's documentation](https://docs.pydantic.dev/latest/concepts/alias/) 9 | -------------------------------------------------------------------------------- /docs/concepts/enums.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Using Enums and Literals in Pydantic for Role Management 3 | description: Learn how to implement Enums and Literals in Pydantic to manage standardized user roles with a fallback option. 4 | --- 5 | 6 | To prevent data misalignment, we can use Enums for standardized fields. Always include an "Other" option as a fallback so the model can signal uncertainty. 7 | 8 | ```python hl_lines="7 12" 9 | from pydantic import BaseModel, Field 10 | from enum import Enum 11 | 12 | 13 | class Role(Enum): 14 | PRINCIPAL = "PRINCIPAL" 15 | TEACHER = "TEACHER" 16 | STUDENT = "STUDENT" 17 | OTHER = "OTHER" 18 | 19 | 20 | class UserDetail(BaseModel): 21 | age: int 22 | name: str 23 | role: Role = Field( 24 | description="Correctly assign one of the predefined roles to the user." 25 | ) 26 | ``` 27 | 28 | If you're having a hard time with `Enum` an alternative is to use `Literal` instead. 29 | 30 | ```python hl_lines="4" 31 | from typing import Literal 32 | from pydantic import BaseModel 33 | 34 | 35 | class UserDetail(BaseModel): 36 | age: int 37 | name: str 38 | role: Literal["PRINCIPAL", "TEACHER", "STUDENT", "OTHER"] 39 | ``` 40 | -------------------------------------------------------------------------------- /docs/concepts/response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/concepts/response.png -------------------------------------------------------------------------------- /docs/concepts/typeadapter.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Pydantic Type Adapter Overview 3 | description: Explore the ongoing updates of Pydantic's Type Adapter concepts and access the official documentation. 4 | --- 5 | 6 | !!! warning "This page is a work in progress" 7 | 8 | This page is a work in progress. Check out [Pydantic's documentation](https://docs.pydantic.dev/latest/concepts/type_adapter/) 9 | -------------------------------------------------------------------------------- /docs/concepts/typeddicts.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Using TypedDicts with OpenAI API 3 | description: Learn how to utilize TypedDicts in Python with the OpenAI API for structured data responses. 4 | --- 5 | 6 | # TypedDicts 7 | 8 | We also support typed dicts. 9 | 10 | ```python 11 | from typing_extensions import TypedDict 12 | from openai import OpenAI 13 | import instructor 14 | 15 | 16 | class User(TypedDict): 17 | name: str 18 | age: int 19 | 20 | 21 | client = instructor.from_openai(OpenAI()) 22 | 23 | 24 | response = client.chat.completions.create( 25 | model="gpt-3.5-turbo", 26 | response_model=User, 27 | messages=[ 28 | { 29 | "role": "user", 30 | "content": "Timothy is a man from New York who is turning 32 this year", 31 | } 32 | ], 33 | ) 34 | ``` -------------------------------------------------------------------------------- /docs/concepts/union.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Using Union Types in Pydantic Models 3 | description: Learn how to implement Union types in Pydantic models to handle multiple action types in Python. 4 | --- 5 | 6 | !!! note "Redirect Notice" 7 | This page has been consolidated into the comprehensive [Union Types](./unions.md) guide. 8 | Please visit that page for complete information about working with union types in Instructor. 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /docs/examples/db.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/examples/db.png -------------------------------------------------------------------------------- /docs/examples/entity_resolution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/examples/entity_resolution.png -------------------------------------------------------------------------------- /docs/examples/groq.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Using Groq for Inference: Setup and Example' 3 | description: Learn how to use Groq for inference with the mixtral-8x7b model, including API setup and a practical Python example. 4 | --- 5 | 6 | # Structured Outputs using Groq 7 | Instead of using openai or antrophic you can now also use groq for inference by using from_groq. 8 | 9 | The examples are using mixtral-8x7b model. 10 | 11 | ## GroqCloud API 12 | To use groq you need to obtain a groq API key. 13 | Goto [groqcloud](https://console.groq.com) and login. Select API Keys from the left menu and then select Create API key to create a new key. 14 | 15 | ## Use example 16 | Some pip packages need to be installed to use the example: 17 | ``` 18 | pip install instructor groq pydantic openai anthropic 19 | ``` 20 | You need to export the groq API key: 21 | ``` 22 | export GROQ_API_KEY= 23 | ``` 24 | 25 | An example: 26 | ```python 27 | import os 28 | from pydantic import BaseModel, Field 29 | from typing import List 30 | from groq import Groq 31 | import instructor 32 | 33 | 34 | class Character(BaseModel): 35 | name: str 36 | fact: List[str] = Field(..., description="A list of facts about the subject") 37 | 38 | 39 | client = Groq( 40 | api_key=os.environ.get('GROQ_API_KEY'), 41 | ) 42 | 43 | client = instructor.from_groq(client, mode=instructor.Mode.TOOLS) 44 | 45 | resp = client.chat.completions.create( 46 | model="mixtral-8x7b-32768", 47 | messages=[ 48 | { 49 | "role": "user", 50 | "content": "Tell me about the company Tesla", 51 | } 52 | ], 53 | response_model=Character, 54 | ) 55 | print(resp.model_dump_json(indent=2)) 56 | """ 57 | { 58 | "name": "Tesla", 59 | "fact": [ 60 | "electric vehicle manufacturer", 61 | "solar panel producer", 62 | "based in Palo Alto, California", 63 | "founded in 2003 by Elon Musk" 64 | ] 65 | } 66 | """ 67 | ``` 68 | You can find another example called groq_example2.py under examples/groq of this repository. 69 | -------------------------------------------------------------------------------- /docs/examples/knowledge_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/examples/knowledge_graph.png -------------------------------------------------------------------------------- /docs/examples/mistral.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Using MistralAI for Structured Outputs 3 | description: Learn how to use MistralAI models for inference, including setup, API key generation, and example code. 4 | --- 5 | 6 | # Structured Outputs using Mistral 7 | You can now also use mistralai models for inference by using from_mistral. 8 | 9 | The examples are using mistral-large-latest. 10 | 11 | ## MistralAI API 12 | To use mistral you need to obtain a mistral API key. 13 | Goto [mistralai](https://mistral.ai/) click on Build Now and login. Select API Keys from the left menu and then select 14 | Create API key to create a new key. 15 | 16 | ## Use example 17 | Some pip packages need to be installed to use the example: 18 | ``` 19 | pip install instructor mistralai pydantic 20 | ``` 21 | You need to export the mistral API key: 22 | ``` 23 | export MISTRAL_API_KEY= 24 | ``` 25 | 26 | An example: 27 | ```python 28 | import os 29 | from pydantic import BaseModel 30 | from mistralai import Mistral 31 | from instructor import from_mistral, Mode 32 | 33 | 34 | class UserDetails(BaseModel): 35 | name: str 36 | age: int 37 | 38 | 39 | # enables `response_model` in chat call 40 | client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY")) 41 | 42 | instructor_client = from_mistral( 43 | client=client, 44 | model="mistral-large-latest", 45 | mode=Mode.MISTRAL_TOOLS, 46 | max_tokens=1000, 47 | ) 48 | 49 | resp = instructor_client.messages.create( 50 | response_model=UserDetails, 51 | messages=[{"role": "user", "content": "Jason is 10"}], 52 | temperature=0, 53 | ) 54 | 55 | print(resp) 56 | #> name='Jason' age=10 57 | 58 | # output: UserDetails(name='Jason', age=10) 59 | ``` 60 | -------------------------------------------------------------------------------- /docs/examples/open_source.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Open Source Model Providers for Chat API 3 | description: Explore tested open source models compatible with the OpenAI chat API, including OpenRouter, Perplexity, and RunPod LLMs. 4 | --- 5 | 6 | # Instructor with open source models 7 | Instructor works with Open source model providers that support the [OpenAI API chat endpoint](https://platform.openai.com/docs/api-reference/chat) 8 | 9 | See examples README [here](https://github.com/jxnl/instructor/tree/main/examples/open_source_examples) 10 | 11 | # Currently tested open source model providers 12 | - [OpenRouter](https://openrouter.ai/) 13 | - [Perplexity](https://www.perplexity.ai/) 14 | - [RunPod TheBloke LLMs](https://github.com/TheBlokeAI/dockerLLM/blob/main/README_Runpod_LocalLLMsUI.md) ** 15 | 16 | 17 | ** This utilizes text-generation-webui w/ Openai plugin under the hood. -------------------------------------------------------------------------------- /docs/help.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Getting Started with Instructor: Help and Resources 3 | description: Explore key resources for getting help with Instructor, including Discord, blog, concepts, cookbooks, and GitHub discussions. 4 | --- 5 | 6 | # Getting help with Instructor 7 | 8 | If you need help getting started with Instructor or with advanced usage, the following sources may be useful. 9 | 10 | ## :material-discord: Discord 11 | 12 | The [Discord](https://discord.gg/bD9YE9JArw) is a great place to ask questions and get help from the community. 13 | 14 | ## :material-creation: Concepts 15 | 16 | The [concepts](concepts/prompting.md) section explains the core concepts of Instructor and how to prompt with models. 17 | 18 | ## :material-chef-hat: Cookbooks 19 | 20 | The [cookbooks](examples/index.md) are a great place to start. They contain a variety of examples that demonstrate how to use Instructor in different scenarios. 21 | 22 | ## :material-book: Blog 23 | 24 | The [blog](blog/index.md) contains articles that explain how to use Instructor in different scenarios. 25 | 26 | ## :material-github: GitHub Discussions 27 | 28 | [GitHub discussions](https://github.com/jxnl/instructor/discussions) are useful for asking questions, your question and the answer will help everyone. 29 | 30 | ## :material-github: GitHub Issues 31 | 32 | [GitHub issues](https://github.com/jxnl/instructor/issues) are useful for reporting bugs or requesting new features. 33 | 34 | ## :material-twitter: Twitter 35 | 36 | You can also reach out to me on [Twitter](https://twitter.com/jxnlco) if you have any questions or ideas. 37 | -------------------------------------------------------------------------------- /docs/hooks/hide_lines.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import mkdocs.plugins 3 | from pymdownx import highlight # type: ignore 4 | 5 | 6 | @mkdocs.plugins.event_priority(0) 7 | # pylint: disable=unused-argument 8 | def on_startup(command: str, dirty: bool) -> None: # noqa: ARG001 9 | """Monkey patch Highlight extension to hide lines in code blocks.""" 10 | original = highlight.Highlight.highlight # type: ignore 11 | 12 | def patched(self: Any, src: str, *args: Any, **kwargs: Any) -> Any: 13 | lines = src.splitlines(keepends=True) 14 | 15 | final_lines = [] 16 | 17 | remove_lines = False 18 | for line in lines: 19 | if line.strip() == "# <%hide%>": 20 | remove_lines = not remove_lines 21 | elif not remove_lines: 22 | final_lines.append(line) 23 | 24 | return original(self, "".join(final_lines), *args, **kwargs) 25 | 26 | highlight.Highlight.highlight = patched 27 | -------------------------------------------------------------------------------- /docs/img/action_items.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/action_items.png -------------------------------------------------------------------------------- /docs/img/analogical_prompting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/analogical_prompting.png -------------------------------------------------------------------------------- /docs/img/cosp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/cosp.png -------------------------------------------------------------------------------- /docs/img/cosp_entropy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/cosp_entropy.png -------------------------------------------------------------------------------- /docs/img/cosp_redundancy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/cosp_redundancy.png -------------------------------------------------------------------------------- /docs/img/error2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/error2.png -------------------------------------------------------------------------------- /docs/img/faithful_cot_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/faithful_cot_example.png -------------------------------------------------------------------------------- /docs/img/ide_support.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/ide_support.png -------------------------------------------------------------------------------- /docs/img/more.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/more.png -------------------------------------------------------------------------------- /docs/img/mrr_eqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/mrr_eqn.png -------------------------------------------------------------------------------- /docs/img/mutual_information.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/mutual_information.png -------------------------------------------------------------------------------- /docs/img/partial.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/partial.gif -------------------------------------------------------------------------------- /docs/img/partial_streaming.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/partial_streaming.gif -------------------------------------------------------------------------------- /docs/img/plan_and_solve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/plan_and_solve.png -------------------------------------------------------------------------------- /docs/img/pot.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/pot.jpeg -------------------------------------------------------------------------------- /docs/img/recall_eqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/recall_eqn.png -------------------------------------------------------------------------------- /docs/img/retriever.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/retriever.png -------------------------------------------------------------------------------- /docs/img/universal_self_adaptive_prompting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/universal_self_adaptive_prompting.png -------------------------------------------------------------------------------- /docs/img/universal_self_consistency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/universal_self_consistency.png -------------------------------------------------------------------------------- /docs/img/youtube.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/img/youtube.gif -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Installing Instructor with Pip 3 | description: Learn how to install Instructor and its dependencies using pip for Python 3.9+. Simple setup guide included. 4 | --- 5 | 6 | Installation is as simple as: 7 | 8 | ```bash 9 | pip install instructor 10 | ``` 11 | 12 | Instructor has a few dependencies: 13 | 14 | - [`openai`](https://pypi.org/project/openai/): OpenAI's Python client. 15 | - [`typer`](https://pypi.org/project/typer/): Build great CLIs. Easy to code. Based on Python type hints. 16 | - [`docstring-parser`](https://pypi.org/project/docstring-parser/): A parser for Python docstrings, to improve the experience of working with docstrings in jsonschema. 17 | - [`pydantic`](https://pypi.org/project/pydantic/): Data validation and settings management using python type annotations. 18 | 19 | If you've got Python 3.9+ and `pip` installed, you're good to go. 20 | -------------------------------------------------------------------------------- /docs/javascripts/katex.js: -------------------------------------------------------------------------------- 1 | document$.subscribe(({ body }) => { 2 | renderMathInElement(body, { 3 | delimiters: [ 4 | { left: "$$", right: "$$", display: true }, 5 | { left: "$", right: "$", display: false }, 6 | { left: "\\(", right: "\\)", display: false }, 7 | { left: "\\[", right: "\\]", display: true } 8 | ], 9 | }) 10 | }) -------------------------------------------------------------------------------- /docs/jobs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/docs/jobs.md -------------------------------------------------------------------------------- /docs/learning/index.md: -------------------------------------------------------------------------------- 1 | ## Structured Outputs by Example 2 | 3 | This section contains straightforward examples for using Instructor to extract structured data from language models. Each example progresses from basic to more advanced concepts, with clear Python code and minimal complexity. 4 | 5 | ## [Getting Started](#getting-started) 6 | * [Installation](getting_started/installation.md) - Setting up Instructor 7 | * [Your First Extraction](getting_started/first_extraction.md) - Create your first structured output 8 | * [Response Models](getting_started/response_models.md) - Understanding model types 9 | * [Client Setup](getting_started/client_setup.md) - Configure for different providers 10 | ## [Basic Extraction Patterns](#basic-extraction-patterns) 11 | * [Simple Object Extraction](patterns/simple_object.md) - Extract basic objects 12 | * [List Extraction](patterns/list_extraction.md) - Extract lists of items 13 | * [Nested Structure](patterns/nested_structure.md) - Work with nested data 14 | * [Optional Fields](patterns/optional_fields.md) - Handle missing information 15 | * [Field Validation](patterns/field_validation.md) - Add basic validation 16 | * [Prompt Templates](patterns/prompt_templates.md) - Improve extraction with templates 17 | ## [Validation](#validation) 18 | * [Validation Basics](validation/basics.md) - Core validation concepts 19 | * [Field-level Validation](validation/field_level_validation.md) - Validate specific fields 20 | * [Custom Validators](validation/custom_validators.md) - Create your own validators 21 | * [Retry Mechanisms](validation/retry_mechanisms.md) - Handle validation failures 22 | ## [Streaming](#streaming) 23 | * [Streaming Basics](streaming/basics.md) - Get results as they generate 24 | * [Streaming Lists](streaming/lists.md) - Stream collections of data -------------------------------------------------------------------------------- /docs/newsletter.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Subscribe to Instructor Newsletter for AI Updates 3 | description: Get notified about AI tips, blog posts, and research. Stay informed with Instructor's latest features and community insights. 4 | --- 5 | 6 | # Instructor Newsletter 7 | 8 | If you want to be notified of tips, new blog posts, and research, subscribe to our newsletter. Here's what you can expect: 9 | 10 | - Updates on Instructor features and releases 11 | - Blog posts on AI and structured outputs 12 | - Tips and tricks from our community 13 | - Research in the field of LLMs and structured outputs 14 | - Information on AI development skills with Instructor 15 | 16 | Subscribe to our newsletter for updates on AI development. We provide content to keep you informed and help you use Instructor in projects. 17 | 18 | 19 | -------------------------------------------------------------------------------- /docs/prompting/decomposition/recurs_of_thought.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "" 3 | description: "" 4 | keywords: "" 5 | --- 6 | 7 | [wip] 8 | -------------------------------------------------------------------------------- /docs/prompting/decomposition/tree-of-thought.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "" 3 | description: "" 4 | keywords: "" 5 | --- 6 | 7 | [wip] 8 | -------------------------------------------------------------------------------- /docs/prompting/few_shot/exemplar_selection/vote_k.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "" 3 | description: "" 4 | keywords: "" 5 | --- 6 | 7 | [wip] 8 | -------------------------------------------------------------------------------- /docs/prompting/thought_generation/chain_of_thought_few_shot/memory_of_thought.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "" 3 | description: "" 4 | keywords: "" 5 | --- 6 | 7 | [wip] 8 | -------------------------------------------------------------------------------- /docs/prompting/zero_shot/re2.md: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Re2 (Re-Reading) is a technique that asks the model to read the question again." 3 | --- 4 | 5 | How can we enhance a model's understanding of a query? 6 | 7 | Re2 (**Re** - **R** eading) is a technique that asks the model to read the question again. 8 | 9 | !!! example "Re-Reading Prompting" 10 | **Prompt Template**: Read the question again: <*query*> <*critical thinking prompt*>1 11 | 12 | A common critical thinking prompt is: "Let's think step by step." 13 | 14 | ## Implementation 15 | 16 | ```python hl_lines="20" 17 | import instructor 18 | from openai import OpenAI 19 | from pydantic import BaseModel 20 | 21 | 22 | client = instructor.from_openai(OpenAI()) 23 | 24 | 25 | class Response(BaseModel): 26 | answer: int 27 | 28 | 29 | def re2(query, thinking_prompt): 30 | return client.chat.completions.create( 31 | model="gpt-4o", 32 | response_model=Response, 33 | messages=[ 34 | { 35 | "role": "system", 36 | "content": f"Read the question again: {query} {thinking_prompt}", 37 | }, 38 | ], 39 | ) 40 | 41 | 42 | if __name__ == "__main__": 43 | query = """Roger has 5 tennis balls. 44 | He buys 2 more cans of tennis balls. 45 | Each can has 3 tennis balls. 46 | How many tennis balls does he have now? 47 | """ 48 | thinking_prompt = "Let's think step by step." 49 | 50 | response = re2(query=query, thinking_prompt=thinking_prompt) 51 | print(response.answer) 52 | #> 11 53 | ``` 54 | 55 | ## References 56 | 57 | 1: [Re-Reading Improves Reasoning in Large Language Models](https://arxiv.org/abs/2309.06275) 58 | -------------------------------------------------------------------------------- /docs/repository-overview.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Repository Overview 3 | description: Learn the structure of the Instructor repository and the purpose of each major directory. 4 | --- 5 | 6 | # Repository Overview 7 | 8 | This page explains the layout of the Instructor codebase and what each key directory contains. 9 | 10 | ## Directory Summary 11 | 12 | ### `instructor/` 13 | Core library with clients, adapters, and utilities for structured outputs. 14 | 15 | ### `cli/` 16 | Command-line interface code used for tasks like job management and usage tracking. 17 | 18 | ### `docs/` 19 | Documentation source files for the website built with MkDocs. 20 | 21 | ### `examples/` 22 | Practical examples and cookbooks demonstrating how to use Instructor. 23 | 24 | ### `tests/` 25 | Test suite and evaluation scripts ensuring the library functions correctly. 26 | 27 | -------------------------------------------------------------------------------- /ellipsis.yaml: -------------------------------------------------------------------------------- 1 | # Reference: https://docs.ellipsis.dev 2 | version: 1.1 3 | pr_review: 4 | auto_review_enabled: true 5 | auto_summarize_pr: true 6 | confidence_threshold: 0.85 7 | rules: 8 | # Control what gets flagged during PR review with custom rules. Here are some to get you started: 9 | - "Code should be DRY (Dont Repeat Yourself)" 10 | - "Extremely Complicated Code Needs Comments" 11 | - "Use Descriptive Variable and Constant Names" 12 | - "Function and Method Naming Should Follow Consistent Patterns" 13 | - "If library code changes, expect documentation to be updated" 14 | - "If library code changes, check if tests are updated" 15 | - "If a new `md` file is created in `docs` make sure its added to mkdocs.yml" 16 | - "Assertions should always have an error message that is formatted well. " 17 | - "Make sure hub examples are added to mkdocs.yml" 18 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/__init__.py -------------------------------------------------------------------------------- /examples/anthropic-web-tool/run.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | from pydantic import BaseModel 3 | 4 | 5 | # Noticed thhat we use JSON not TOOLS mode 6 | client = instructor.from_provider( 7 | "anthropic/claude-3-7-sonnet-latest", 8 | mode=instructor.Mode.ANTHROPIC_JSON, 9 | async_client=False, 10 | ) 11 | 12 | 13 | class Citation(BaseModel): 14 | id: int 15 | url: str 16 | 17 | 18 | class Response(BaseModel): 19 | citations: list[Citation] 20 | response: str 21 | 22 | 23 | response_data, completion_details = client.messages.create_with_completion( 24 | messages=[ 25 | { 26 | "role": "system", 27 | "content": "You are a helpful assistant that summarizes news articles. Your final response should be only contain a single JSON object returned in your final message to the user. Make sure to provide the exact ids for the citations that support the information you provide in the form of inline citations as [1] [2] [3] which correspond to a unique id you generate for a url that you find in the web search tool which is relevant to your final response.", 28 | }, 29 | { 30 | "role": "user", 31 | "content": "What are the latest results for the UFC and who won? Answer this in a concise response that's under 3 sentences.", 32 | }, 33 | ], 34 | tools=[{"type": "web_search_20250305", "name": "web_search", "max_uses": 3}], 35 | response_model=Response, 36 | ) 37 | 38 | print("Response:") 39 | print(response_data.response) 40 | print("\nCitations:") 41 | for citation in response_data.citations: 42 | print(f"{citation.id}: {citation.url}") 43 | -------------------------------------------------------------------------------- /examples/anthropic/run.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | import anthropic 3 | import instructor 4 | 5 | # Patching the Anthropics client with the instructor for enhanced capabilities 6 | client = instructor.from_anthropic(anthropic.Anthropic()) 7 | 8 | 9 | class Properties(BaseModel): 10 | key: str 11 | value: str 12 | 13 | 14 | class User(BaseModel): 15 | name: str 16 | age: int 17 | properties: list[Properties] 18 | 19 | 20 | user = client.messages.create( 21 | model="claude-3-haiku-20240307", 22 | max_tokens=1024, 23 | max_retries=0, 24 | messages=[ 25 | { 26 | "role": "user", 27 | "content": "Create a user for a model with a name, age, and properties.", 28 | } 29 | ], 30 | response_model=User, 31 | ) 32 | 33 | print(user.model_dump_json(indent=2)) 34 | -------------------------------------------------------------------------------- /examples/auto-ticketer/tasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/auto-ticketer/tasks.png -------------------------------------------------------------------------------- /examples/caching/lru.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | from openai import OpenAI 3 | from pydantic import BaseModel 4 | import functools 5 | 6 | client = instructor.from_openai(OpenAI()) 7 | 8 | 9 | class UserDetail(BaseModel): 10 | name: str 11 | age: int 12 | 13 | 14 | @functools.lru_cache 15 | def extract(data): 16 | return client.chat.completions.create( 17 | model="gpt-3.5-turbo", 18 | response_model=UserDetail, 19 | messages=[ 20 | {"role": "user", "content": data}, 21 | ], 22 | ) 23 | 24 | 25 | def test_extract(): 26 | import time 27 | 28 | start = time.perf_counter() 29 | model = extract("Extract jason is 25 years old") 30 | assert model.name.lower() == "jason" 31 | assert model.age == 25 32 | print(f"Time taken: {time.perf_counter() - start}") 33 | 34 | start = time.perf_counter() 35 | model = extract("Extract jason is 25 years old") 36 | assert model.name.lower() == "jason" 37 | assert model.age == 25 38 | print(f"Time taken: {time.perf_counter() - start}") 39 | 40 | 41 | if __name__ == "__main__": 42 | test_extract() 43 | # Time taken: 0.9267581660533324 44 | # Time taken: 1.2080417945981026e-06 45 | -------------------------------------------------------------------------------- /examples/chain-of-density/finetune.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from chain_of_density import summarize_article 3 | import csv 4 | import logging 5 | import instructor 6 | from pydantic import BaseModel, Field 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | client = instructor.from_openai(OpenAI()) 11 | 12 | instructions = instructor.Instructions( 13 | name="Chain Of Density", 14 | finetune_format="messages", 15 | # log handler is used to save the data to a file 16 | # you can imagine saving it to a database or other storage 17 | # based on your needs! 18 | log_handlers=[logging.FileHandler("generated.jsonl")], 19 | openai_client=client, 20 | ) 21 | 22 | 23 | class GeneratedSummary(BaseModel): 24 | """ 25 | This represents a highly concise summary that includes as many entities as possible from the original source article. 26 | 27 | An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title. 28 | 29 | Guidelines 30 | - Make every word count 31 | - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article. 32 | - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses" 33 | """ 34 | 35 | summary: str = Field( 36 | ..., 37 | description="This represents the final summary generated that captures the meaning of the original article which is as concise as possible. ", 38 | ) 39 | 40 | 41 | @instructions.distil 42 | def distil_summarization(text: str) -> GeneratedSummary: 43 | summary_chain: list[str] = summarize_article(text) 44 | return GeneratedSummary(summary=summary_chain[-1]) 45 | 46 | 47 | with open("test.csv") as file: 48 | reader = csv.reader(file) 49 | next(reader) # Skip the header 50 | for article, _summary in reader: 51 | distil_summarization(article) 52 | -------------------------------------------------------------------------------- /examples/chain-of-density/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | pydantic 3 | instructor 4 | nltk 5 | rich -------------------------------------------------------------------------------- /examples/citation_with_extraction/Dockerfile: -------------------------------------------------------------------------------- 1 | # https://hub.docker.com/_/python 2 | FROM python:3.10-slim-bullseye 3 | 4 | ENV PYTHONUNBUFFERED True 5 | ENV APP_HOME /app 6 | WORKDIR $APP_HOME 7 | COPY requirements.txt ./ 8 | RUN pip install -r requirements.txt 9 | 10 | 11 | COPY . ./ 12 | 13 | 14 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] -------------------------------------------------------------------------------- /examples/citation_with_extraction/diagram.py: -------------------------------------------------------------------------------- 1 | import erdantic as erd 2 | 3 | from citation_fuzzy_match import QuestionAnswer 4 | 5 | diagram = erd.create(QuestionAnswer) 6 | diagram.draw("examples/citation_fuzzy_match/schema.png") 7 | -------------------------------------------------------------------------------- /examples/citation_with_extraction/modal_main.py: -------------------------------------------------------------------------------- 1 | from main import app 2 | import modal 3 | 4 | stub = modal.Stub("rag-citation") 5 | 6 | image = modal.Image.debian_slim().pip_install("fastapi", "instructor>=0.2.1", "regex") 7 | 8 | 9 | @stub.function(image=image) 10 | @modal.asgi_app() 11 | def fastapi_app(): 12 | return app 13 | -------------------------------------------------------------------------------- /examples/citation_with_extraction/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn 3 | openai>=1.0.0 4 | pydantic 5 | instructor 6 | regex -------------------------------------------------------------------------------- /examples/citation_with_extraction/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/citation_with_extraction/schema.png -------------------------------------------------------------------------------- /examples/classification/multi_prediction.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import instructor 3 | 4 | from openai import OpenAI 5 | from pydantic import BaseModel 6 | 7 | client = instructor.from_openai(OpenAI()) 8 | 9 | 10 | # Define new Enum class for multiple labels 11 | class MultiLabels(str, enum.Enum): 12 | BILLING = "billing" 13 | GENERAL_QUERY = "general_query" 14 | HARDWARE = "hardware" 15 | 16 | 17 | # Adjust the prediction model to accommodate a list of labels 18 | class MultiClassPrediction(BaseModel): 19 | predicted_labels: list[MultiLabels] 20 | 21 | 22 | # Modify the classify function 23 | def multi_classify(data: str) -> MultiClassPrediction: 24 | return client.chat.completions.create( 25 | model="gpt-3.5-turbo-0613", 26 | response_model=MultiClassPrediction, 27 | messages=[ 28 | { 29 | "role": "user", 30 | "content": f"Classify the following support ticket: {data}", 31 | }, 32 | ], 33 | ) # type: ignore 34 | 35 | 36 | # Example using a support ticket 37 | ticket = ( 38 | "My account is locked and I can't access my billing info. Phone is also broken." 39 | ) 40 | prediction = multi_classify(ticket) 41 | print(prediction) 42 | -------------------------------------------------------------------------------- /examples/classification/simple_prediction.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import instructor 3 | from openai import OpenAI 4 | 5 | from pydantic import BaseModel 6 | 7 | client = instructor.from_openai(OpenAI()) 8 | 9 | 10 | class Labels(str, enum.Enum): 11 | SPAM = "spam" 12 | NOT_SPAM = "not_spam" 13 | 14 | 15 | class SinglePrediction(BaseModel): 16 | """ 17 | Correct class label for the given text 18 | """ 19 | 20 | class_label: Labels 21 | 22 | 23 | def classify(data: str) -> SinglePrediction: 24 | return client.chat.completions.create( 25 | model="gpt-3.5-turbo-0613", 26 | response_model=SinglePrediction, 27 | messages=[ 28 | { 29 | "role": "user", 30 | "content": f"Classify the following text: {data}", 31 | }, 32 | ], 33 | ) # type: ignore 34 | 35 | 36 | prediction = classify("Hello there I'm a nigerian prince and I want to give you money") 37 | assert prediction.class_label == Labels.SPAM 38 | -------------------------------------------------------------------------------- /examples/codegen-from-schema/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "type": "object", 4 | "title": "ExtractPerson", 5 | "properties": { 6 | "name": { 7 | "type": "string" 8 | }, 9 | "age": { 10 | "type": "integer" 11 | }, 12 | "phoneNumbers": { 13 | "type": "array", 14 | "items": { 15 | "type": "object", 16 | "properties": { 17 | "type": { 18 | "type": "string", 19 | "enum": ["home", "work", "mobile"] 20 | }, 21 | "number": { 22 | "type": "string" 23 | } 24 | }, 25 | "required": ["type", "number"] 26 | } 27 | } 28 | }, 29 | "required": ["name", "age", "phoneNumbers"] 30 | } 31 | -------------------------------------------------------------------------------- /examples/codegen-from-schema/models.py: -------------------------------------------------------------------------------- 1 | # generated by datamodel-codegen: 2 | # filename: input.json 3 | # timestamp: 2023-09-10T00:33:42+00:00 4 | 5 | from __future__ import annotations 6 | 7 | from enum import Enum 8 | 9 | from pydantic import BaseModel 10 | 11 | 12 | class Type(Enum): 13 | home = "home" 14 | work = "work" 15 | mobile = "mobile" 16 | 17 | 18 | class PhoneNumber(BaseModel): 19 | type: Type 20 | number: str 21 | 22 | 23 | class ExtractPerson(BaseModel): 24 | name: str 25 | age: int 26 | phoneNumbers: list[PhoneNumber] 27 | -------------------------------------------------------------------------------- /examples/codegen-from-schema/readme.md: -------------------------------------------------------------------------------- 1 | # FastAPI Code Generator 2 | 3 | ## Overview 4 | 5 | Generates FastAPI application code from API path, task name, JSON schema path, and Jinja2 prompt template. Also creates a `models.py` file for Pydantic models. 6 | 7 | ## Dependencies 8 | 9 | - FastAPI 10 | - Pydantic 11 | - Jinja2 12 | - datamodel-code-generator 13 | 14 | ## Functions 15 | 16 | ### `create_app(api_path: str, task_name: str, json_schema_path: str, prompt_template: str) -> str` 17 | 18 | Main function to generate FastAPI application code. 19 | 20 | ## Usage 21 | 22 | Run the script with required parameters. 23 | 24 | Example: 25 | 26 | ```python 27 | fastapi_code = create_app( 28 | api_path="/api/v1/extract_person", 29 | task_name="extract_person", 30 | json_schema_path="./input.json", 31 | prompt_template="Extract the person from the following: {{biography}}", 32 | ) 33 | ``` 34 | 35 | Outputs FastAPI application code to `./run.py` and a Pydantic model to `./models.py`. -------------------------------------------------------------------------------- /examples/codegen-from-schema/run.py: -------------------------------------------------------------------------------- 1 | # This file was generated by instructor 2 | # timestamp: 2023-09-09T20:33:42.572627 3 | # task_name: extract_person 4 | # api_path: /api/v1/extract_person 5 | # json_schema_path: ./input.json 6 | 7 | import instructor 8 | 9 | from fastapi import FastAPI 10 | from pydantic import BaseModel 11 | from jinja2 import Template 12 | from models import ExtractPerson 13 | from openai import AsyncOpenAI 14 | 15 | aclient = instructor.apatch(AsyncOpenAI()) 16 | 17 | app = FastAPI() 18 | 19 | 20 | class TemplateVariables(BaseModel): 21 | biography: str 22 | 23 | 24 | class RequestSchema(BaseModel): 25 | template_variables: TemplateVariables 26 | model: str 27 | temperature: int 28 | 29 | 30 | PROMPT_TEMPLATE = Template( 31 | """Extract the person from the following: {{biography}}""".strip() 32 | ) 33 | 34 | 35 | @app.post("/api/v1/extract_person", response_model=ExtractPerson) 36 | async def extract_person(input: RequestSchema) -> ExtractPerson: 37 | rendered_prompt = PROMPT_TEMPLATE.render(**input.template_variables.model_dump()) 38 | return await aclient.chat.completions.create( 39 | model=input.model, 40 | temperature=input.temperature, 41 | response_model=ExtractPerson, 42 | messages=[{"role": "user", "content": rendered_prompt}], 43 | ) # type: ignore 44 | -------------------------------------------------------------------------------- /examples/cohere/cohere.py: -------------------------------------------------------------------------------- 1 | import cohere 2 | import instructor 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | # Patching the Cohere client with the instructor for enhanced capabilities 7 | client = instructor.from_cohere( 8 | cohere.Client(), 9 | max_tokens=1000, 10 | model="command-r-plus", 11 | ) 12 | 13 | 14 | class Person(BaseModel): 15 | name: str = Field(description="name of the person") 16 | country_of_origin: str = Field(description="country of origin of the person") 17 | 18 | 19 | class Group(BaseModel): 20 | group_name: str = Field(description="name of the group") 21 | members: list[Person] = Field(description="list of members in the group") 22 | 23 | 24 | task = """\ 25 | Given the following text, create a Group object for 'The Beatles' band 26 | 27 | Text: 28 | The Beatles were an English rock band formed in Liverpool in 1960. With a line-up comprising John Lennon, Paul McCartney, George Harrison and Ringo Starr, they are regarded as the most influential band of all time. The group were integral to the development of 1960s counterculture and popular music's recognition as an art form. 29 | """ 30 | group = client.messages.create( 31 | response_model=Group, 32 | messages=[{"role": "user", "content": task}], 33 | temperature=0, 34 | ) 35 | 36 | print(group.model_dump_json(indent=2)) 37 | """ 38 | { 39 | "group_name": "The Beatles", 40 | "members": [ 41 | { 42 | "name": "John Lennon", 43 | "country_of_origin": "England" 44 | }, 45 | { 46 | "name": "Paul McCartney", 47 | "country_of_origin": "England" 48 | }, 49 | { 50 | "name": "George Harrison", 51 | "country_of_origin": "England" 52 | }, 53 | { 54 | "name": "Ringo Starr", 55 | "country_of_origin": "England" 56 | } 57 | ] 58 | } 59 | """ 60 | -------------------------------------------------------------------------------- /examples/distilations/readme.md: -------------------------------------------------------------------------------- 1 | # What to Expect 2 | This script demonstrates how to use the `Instructor` library for fine-tuning a Python function that performs three-digit multiplication. It uses Pydantic for type validation and logging features to generate a fine-tuning dataset. 3 | 4 | ## How to Run 5 | 6 | ### Prerequisites 7 | - Python 3.9 8 | - `Instructor` library 9 | 10 | ### Steps 11 | 1. **Install Dependencies** 12 | If you haven't already installed the required libraries, you can do so using pip: 13 | ``` 14 | pip install instructor pydantic 15 | ``` 16 | 17 | 2. **Set Up Logging** 18 | The script uses Python's built-in `logging` module to log the fine-tuning process. Ensure you have write permissions in the directory where the log file `math_finetunes.jsonl` will be saved. 19 | 20 | 3. **Run the Script** 21 | Navigate to the directory containing `script.py` and run it: 22 | ``` 23 | python three_digit_mul.py 24 | ``` 25 | 26 | This will execute the script, running the function ten times with random three-digit numbers for multiplication. The function outputs and logs are saved in `math_finetunes.jsonl`. 27 | 28 | 4. **Fine-Tuning** 29 | Once you have the log file, you can run a fine-tuning job using the following `Instructor` CLI command: 30 | ``` 31 | instructor jobs create-from-file math_finetunes.jsonl 32 | ``` 33 | Wait for the fine-tuning job to complete. 34 | 35 | If you have validation date you can run: 36 | 37 | ``` 38 | instructor jobs create-from-file math_finetunes.jsonl --n-epochs 4 --validation-file math_finetunes_val.jsonl 39 | ``` 40 | 41 | ### Output 42 | 43 | That's it! You've successfully run the script and can now proceed to fine-tune your model. 44 | 45 | ### Dispatch 46 | 47 | Once you have the model you can replace the model in `three_digit_mul_dispatch.py` with the model you just fine-tuned and run the script again. This time, the script will use the fine-tuned model to predict the output of the function. -------------------------------------------------------------------------------- /examples/distilations/three_digit_mul_dispatch.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pydantic import BaseModel, Field 4 | from instructor import Instructions 5 | import instructor 6 | from openai import OpenAI 7 | 8 | client = instructor.from_openai(OpenAI()) 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | # Usage 13 | instructions = Instructions( 14 | name="three_digit_multiply", 15 | finetune_format="messages", 16 | include_code_body=True, 17 | log_handlers=[ 18 | logging.FileHandler("math_finetunes.jsonl"), 19 | ], 20 | openai_client=client, 21 | ) 22 | 23 | 24 | class Multiply(BaseModel): 25 | a: int 26 | b: int 27 | result: int = Field(..., description="The result of the multiplication") 28 | 29 | 30 | @instructions.distil(mode="dispatch", model="ft:gpt-3.5-turbo-0125:personal::9i1JeuxJ") 31 | def fn(a: int, b: int) -> Multiply: 32 | """Return the result of the multiplication as an integer""" 33 | resp = a * b 34 | return Multiply(a=a, b=b, result=resp) 35 | 36 | 37 | if __name__ == "__main__": 38 | import random 39 | 40 | for _ in range(5): 41 | a = random.randint(100, 999) 42 | b = random.randint(100, 999) 43 | result = fn(a, b) 44 | print(f"{a} * {b} = {result.result}, expected {a * b}") 45 | """ 46 | 972 * 508 = 493056, expected 493776 47 | 145 * 369 = 53505, expected 53505 48 | 940 * 440 = 413600, expected 413600 49 | 114 * 213 = 24282, expected 24282 50 | 259 * 650 = 168350, expected 168350 51 | """ 52 | -------------------------------------------------------------------------------- /examples/evals/models.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from pydantic import BaseModel, Field 3 | from enum import Enum 4 | 5 | 6 | class SourceType(str, Enum): 7 | CRM = "CRM" 8 | WEB = "WEB" 9 | EMAIL = "EMAIL" 10 | SOCIAL_MEDIA = "SOCIAL_MEDIA" 11 | OTHER = "OTHER" 12 | 13 | 14 | class Search(BaseModel): 15 | query: str 16 | source_type: SourceType 17 | results_limit: Optional[int] = Field(10) 18 | is_priority: Optional[bool] = None 19 | tags: Optional[list[str]] = None 20 | 21 | 22 | class MultiSearch(BaseModel): 23 | queries: list[Search] 24 | user_id: Optional[str] 25 | -------------------------------------------------------------------------------- /examples/extract-table/run_vision_receipt.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, model_validator 2 | from openai import OpenAI 3 | import instructor 4 | 5 | 6 | client = instructor.from_openai( 7 | client=OpenAI(), 8 | mode=instructor.Mode.TOOLS, 9 | ) 10 | 11 | 12 | class Item(BaseModel): 13 | name: str 14 | price: float 15 | quantity: int 16 | 17 | 18 | class Receipt(BaseModel): 19 | items: list[Item] 20 | total: float 21 | 22 | @model_validator(mode="after") 23 | def check_total(cls, values: "Receipt"): 24 | items = values.items 25 | total = values.total 26 | calculated_total = sum(item.price * item.quantity for item in items) 27 | if calculated_total != total: 28 | raise ValueError( 29 | f"Total {total} does not match the sum of item prices {calculated_total}" 30 | ) 31 | return values 32 | 33 | 34 | def extract(url: str) -> Receipt: 35 | return client.chat.completions.create( 36 | model="gpt-4o", 37 | max_tokens=4000, 38 | response_model=Receipt, 39 | messages=[ 40 | { 41 | "role": "user", 42 | "content": [ 43 | { 44 | "type": "image_url", 45 | "image_url": {"url": url}, 46 | }, 47 | { 48 | "type": "text", 49 | "text": "Analyze the image and return the items in the receipt and the total amount.", 50 | }, 51 | ], 52 | } 53 | ], 54 | ) 55 | 56 | 57 | # URLs of images containing receipts. Exhibits the use of the model validator to check the total amount. 58 | urls = [ 59 | "https://templates.mediamodifier.com/645124ff36ed2f5227cbf871/supermarket-receipt-template.jpg", 60 | "https://ocr.space/Content/Images/receipt-ocr-original.jpg", 61 | ] 62 | 63 | for url in urls: 64 | receipt = extract(url) 65 | print(receipt) 66 | -------------------------------------------------------------------------------- /examples/fastapi_app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/fastapi_app/__init__.py -------------------------------------------------------------------------------- /examples/fastapi_app/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from instructor import OpenAISchema 3 | import instructor.dsl as dsl 4 | from pydantic import BaseModel, Field 5 | 6 | app = FastAPI(title="Example Application using instructor") 7 | 8 | 9 | class SearchRequest(BaseModel): 10 | body: str 11 | 12 | 13 | class SearchQuery(OpenAISchema): 14 | title: str = Field(..., description="Question that the query answers") 15 | query: str = Field( 16 | ..., 17 | description="Detailed, comprehensive, and specific query to be used for semantic search", 18 | ) 19 | 20 | 21 | SearchResponse = dsl.MultiTask( 22 | subtask_class=SearchQuery, 23 | description="Correctly segmented set of search queries", 24 | ) 25 | 26 | 27 | @app.post("/search", response_model=SearchResponse) 28 | async def search(request: SearchRequest): 29 | task = ( 30 | dsl.ChatCompletion(name="Segmenting Search requests example") 31 | | dsl.SystemTask(task="Segment search results") 32 | | dsl.TaggedMessage(content=request.body, tag="query") 33 | | dsl.TipsMessage( 34 | tips=[ 35 | "Expand query to contain multiple forms of the same word (SSO -> Single Sign On)", 36 | "Use the title to explain what the query should return, but use the query to complete the search", 37 | "The query should be detailed, specific, and cast a wide net when possible", 38 | ] 39 | ) 40 | | SearchRequest 41 | ) 42 | return await task.acreate() 43 | -------------------------------------------------------------------------------- /examples/fastapi_app/script.py: -------------------------------------------------------------------------------- 1 | from instructor import OpenAISchema, dsl 2 | from pydantic import Field 3 | import json 4 | 5 | 6 | class SearchQuery(OpenAISchema): 7 | query: str = Field( 8 | ..., 9 | description="Detailed, comprehensive, and specific query to be used for semantic search", 10 | ) 11 | 12 | 13 | SearchResponse = dsl.MultiTask( 14 | subtask_class=SearchQuery, 15 | description="Correctly segmented set of search queries", 16 | ) 17 | 18 | 19 | task = ( 20 | dsl.ChatCompletion(name="Segmenting Search requests example") 21 | | dsl.SystemTask(task="Segment search results") 22 | | dsl.TaggedMessage( 23 | content="can you send me the data about the video investment and the one about spot the dog?", 24 | tag="query", 25 | ) 26 | | dsl.TipsMessage( 27 | tips=[ 28 | "Expand query to contain multiple forms of the same word (SSO -> Single Sign On)", 29 | "Use the title to explain what the query should return, but use the query to complete the search", 30 | "The query should be detailed, specific, and cast a wide net when possible", 31 | ] 32 | ) 33 | | SearchResponse 34 | ) 35 | 36 | 37 | print(json.dumps(task.kwargs, indent=1)) 38 | """ 39 | { 40 | "tasks": [ 41 | { 42 | "query": "data about video investment" 43 | }, 44 | { 45 | "query": "data about spot the dog" 46 | } 47 | ] 48 | } 49 | """ 50 | -------------------------------------------------------------------------------- /examples/fizzbuzz/run.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from openai import OpenAI 4 | import instructor 5 | 6 | client = instructor.from_openai(OpenAI()) 7 | 8 | 9 | def fizzbuzz_gpt(n) -> list[int | str]: 10 | return client.chat.completions.create( 11 | model="gpt-3.5-turbo", 12 | response_model=list[int | str], 13 | messages=[ 14 | { 15 | "role": "user", 16 | "content": f"Return the first {n} numbers in fizzbuzz", 17 | }, 18 | ], 19 | ) # type: ignore 20 | 21 | 22 | if __name__ == "__main__": 23 | print(fizzbuzz_gpt(n=15)) 24 | # > [1, 2, 'Fizz', 4, 'Buzz', 'Fizz', 7, 8, 'Fizz', 'Buzz', 11, 'Fizz', 13, 14, 'FizzBuzz'] 25 | -------------------------------------------------------------------------------- /examples/gpt-engineer/program.json: -------------------------------------------------------------------------------- 1 | {"files": [{"file_name": "readme.md", "body": "# FastAPI App\n\nThis is a FastAPI app that provides some basic math functions.\n\n## Usage\n\nTo use this app, follow the instructions below:\n\n1. Install the required dependencies by running `pip install -r requirements.txt`.\n2. Start the app by running `uvicorn main:app --reload`.\n3. Open your browser and navigate to `http://localhost:8000/docs` to access the Swagger UI documentation.\n\n## Example\n\nTo perform a basic math operation, you can use the following curl command:\n\n```bash\ncurl -X POST -H \"Content-Type: application/json\" -d '{\"operation\": \"add\", \"operands\": [2, 3]}' http://localhost:8000/calculate\n```\n"}, {"file_name": "main.py", "body": "from fastapi import FastAPI\nfrom pydantic import BaseModel\n\napp = FastAPI()\n\n\nclass Operation(BaseModel):\n operation: str\n operands: list\n\n\n@app.post('/calculate')\nasync def calculate(operation: Operation):\n if operation.operation == 'add':\n result = sum(operation.operands)\n elif operation.operation == 'subtract':\n result = operation.operands[0] - sum(operation.operands[1:])\n elif operation.operation == 'multiply':\n result = 1\n for operand in operation.operands:\n result *= operand\n elif operation.operation == 'divide':\n result = operation.operands[0]\n for operand in operation.operands[1:]:\n result /= operand\n else:\n result = None\n return {'result': result}\n"}, {"file_name": "requirements.txt", "body": "fastapi\nuvicorn\npydantic"}]} -------------------------------------------------------------------------------- /examples/groq/groq_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pydantic import BaseModel, Field 3 | from groq import Groq 4 | import instructor 5 | 6 | 7 | class Character(BaseModel): 8 | name: str 9 | fact: list[str] = Field(..., description="A list of facts about the subject") 10 | 11 | 12 | client = Groq( 13 | api_key=os.environ.get("GROQ_API_KEY"), 14 | ) 15 | 16 | client = instructor.from_groq(client, mode=instructor.Mode.TOOLS) 17 | 18 | resp = client.chat.completions.create( 19 | model="mixtral-8x7b-32768", 20 | messages=[ 21 | { 22 | "role": "user", 23 | "content": "Tell me about the company Tesla", 24 | } 25 | ], 26 | response_model=Character, 27 | ) 28 | print(resp.model_dump_json(indent=2)) 29 | """ 30 | { 31 | "name": "Tesla", 32 | "fact": [ 33 | "An American electric vehicle and clean energy company.", 34 | "Co-founded by Elon Musk, JB Straubel, Martin Eberhard, Marc Tarpenning, and Ian Wright in 2003.", 35 | "Headquartered in Austin, Texas.", 36 | "Produces electric vehicles, energy storage solutions, and more recently, solar energy products.", 37 | "Known for its premium electric vehicles, such as the Model S, Model 3, Model X, and Model Y.", 38 | "One of the world's most valuable car manufacturers by market capitalization.", 39 | "Tesla's CEO, Elon Musk, is also the CEO of SpaceX, Neuralink, and The Boring Company.", 40 | "Tesla operates the world's largest global network of electric vehicle supercharging stations.", 41 | "The company aims to accelerate the world's transition to sustainable transport and energy through innovative technologies and products." 42 | ] 43 | } 44 | """ 45 | -------------------------------------------------------------------------------- /examples/groq/groq_example2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pydantic import BaseModel 3 | from groq import Groq 4 | import instructor 5 | 6 | client = Groq( 7 | api_key=os.environ.get("GROQ_API_KEY"), 8 | ) 9 | 10 | client = instructor.from_groq(client, mode=instructor.Mode.TOOLS) 11 | 12 | 13 | class UserExtract(BaseModel): 14 | name: str 15 | age: int 16 | 17 | 18 | user: UserExtract = client.chat.completions.create( 19 | model="mixtral-8x7b-32768", 20 | response_model=UserExtract, 21 | messages=[ 22 | {"role": "user", "content": "Extract jason is 25 years old"}, 23 | ], 24 | ) 25 | 26 | assert isinstance(user, UserExtract), "Should be instance of UserExtract" 27 | assert user.name.lower() == "jason" 28 | assert user.age == 25 29 | 30 | print(user.model_dump_json(indent=2)) 31 | """ 32 | { 33 | "name": "jason", 34 | "age": 25 35 | } 36 | """ 37 | -------------------------------------------------------------------------------- /examples/iterables/run.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from collections.abc import Iterable 4 | from openai import OpenAI 5 | from pydantic import BaseModel 6 | 7 | import instructor 8 | 9 | 10 | client = instructor.from_openai(OpenAI()) 11 | 12 | 13 | class User(BaseModel): 14 | name: str 15 | job: str 16 | age: int 17 | 18 | 19 | def stream_extract(input: str) -> Iterable[User]: 20 | return client.chat.completions.create_iterable( 21 | model="gpt-4o", 22 | temperature=0.1, 23 | stream=True, 24 | response_model=User, 25 | messages=[ 26 | { 27 | "role": "system", 28 | "content": "You are a perfect entity extraction system", 29 | }, 30 | { 31 | "role": "user", 32 | "content": ( 33 | f"Consider the data below:\n{input}" 34 | "Correctly segment it into entitites" 35 | "Make sure the JSON is correct" 36 | ), 37 | }, 38 | ], 39 | max_tokens=1000, 40 | ) 41 | 42 | 43 | start = time.time() 44 | for user in stream_extract( 45 | input="Create 5 characters from the book Three Body Problem" 46 | ): 47 | delay = round(time.time() - start, 1) 48 | print(f"{delay} s: User({user})") 49 | """ 50 | 0.8 s: User(name='Ye Wenjie' job='Astrophysicist' age=60) 51 | 1.1 s: User(name='Wang Miao' job='Nanomaterials Researcher' age=40) 52 | 1.7 s: User(name='Shi Qiang' job='Detective' age=50) 53 | 1.9 s: User(name='Ding Yi' job='Theoretical Physicist' age=45) 54 | 1.9 s: User(name='Chang Weisi' job='Military Strategist' age=55) 55 | """ 56 | # Notice that the first one would return at 5s bu the last one returned in 10s! 57 | -------------------------------------------------------------------------------- /examples/knowledge-graph/final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/final.png -------------------------------------------------------------------------------- /examples/knowledge-graph/iteration_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/iteration_0.png -------------------------------------------------------------------------------- /examples/knowledge-graph/iteration_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/iteration_1.png -------------------------------------------------------------------------------- /examples/knowledge-graph/iteration_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/iteration_2.png -------------------------------------------------------------------------------- /examples/knowledge-graph/iteration_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/iteration_3.png -------------------------------------------------------------------------------- /examples/knowledge-graph/kg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/knowledge-graph/kg.png -------------------------------------------------------------------------------- /examples/knowledge-graph/run.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | from graphviz import Digraph 4 | from pydantic import BaseModel, Field 5 | from openai import OpenAI 6 | 7 | 8 | client = instructor.from_openai(OpenAI()) 9 | 10 | 11 | class Node(BaseModel): 12 | id: int 13 | label: str 14 | color: str 15 | 16 | 17 | class Edge(BaseModel): 18 | source: int 19 | target: int 20 | label: str 21 | color: str = "black" 22 | 23 | 24 | class KnowledgeGraph(BaseModel): 25 | nodes: list[Node] = Field(..., default_factory=list) 26 | edges: list[Edge] = Field(..., default_factory=list) 27 | 28 | 29 | def generate_graph(input) -> KnowledgeGraph: 30 | return client.chat.completions.create( 31 | model="gpt-3.5-turbo-16k", 32 | messages=[ 33 | { 34 | "role": "user", 35 | "content": f"Help me understand following by describing as a detailed knowledge graph: {input}", 36 | } 37 | ], 38 | response_model=KnowledgeGraph, 39 | ) # type: ignore 40 | 41 | 42 | def visualize_knowledge_graph(kg: KnowledgeGraph): 43 | dot = Digraph(comment="Knowledge Graph") 44 | 45 | # Add nodes 46 | for node in kg.nodes: 47 | dot.node(str(node.id), node.label, color=node.color) 48 | 49 | # Add edges 50 | for edge in kg.edges: 51 | dot.edge(str(edge.source), str(edge.target), label=edge.label, color=edge.color) 52 | 53 | # Render the graph 54 | dot.render("knowledge_graph.gv", view=True) 55 | 56 | 57 | graph: KnowledgeGraph = generate_graph("Teach me about quantum mechanics") 58 | visualize_knowledge_graph(graph) 59 | -------------------------------------------------------------------------------- /examples/logfire-fastapi/Readme.md: -------------------------------------------------------------------------------- 1 | # Instructions 2 | 3 | 1. Create a virtual environment and install all of the packages inside `requirements.txt` 4 | 5 | 2. Run the server using 6 | 7 | ``` 8 | uvicorn server:app --reload 9 | ``` 10 | 11 | 3. Open up the documentation at `http://127.0.0.1:8000/docs` to start experimenting with fastapi! You can print out the streaming example using `test.py`. 12 | -------------------------------------------------------------------------------- /examples/logfire-fastapi/requirements.txt: -------------------------------------------------------------------------------- 1 | pydantic==2.7.1 2 | openai==1.24.1 3 | instructor==1.0.3 4 | logfire==0.28.0 5 | fastapi==0.110.3 6 | uvicorn[standard] 7 | logfire[fastapi] -------------------------------------------------------------------------------- /examples/logfire-fastapi/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | response = requests.post( 4 | "http://127.0.0.1:3000/extract", 5 | json={ 6 | "query": "Alice and Bob are best friends. They are currently 32 and 43 respectively. " 7 | }, 8 | stream=True, 9 | ) 10 | 11 | for chunk in response.iter_content(chunk_size=1024): 12 | if chunk: 13 | print(str(chunk, encoding="utf-8"), end="\n") 14 | -------------------------------------------------------------------------------- /examples/logfire/classify.py: -------------------------------------------------------------------------------- 1 | import enum 2 | from pydantic import BaseModel 3 | from openai import OpenAI 4 | import instructor 5 | import logfire 6 | 7 | 8 | class Labels(str, enum.Enum): 9 | """Enumeration for single-label text classification.""" 10 | 11 | SPAM = "spam" 12 | NOT_SPAM = "not_spam" 13 | 14 | 15 | class SinglePrediction(BaseModel): 16 | """ 17 | Class for a single class label prediction. 18 | """ 19 | 20 | class_label: Labels 21 | 22 | 23 | openai_client = OpenAI() 24 | logfire.configure(pydantic_plugin=logfire.PydanticPlugin(record="all")) 25 | logfire.instrument_openai(openai_client) 26 | client = instructor.from_openai(openai_client) 27 | 28 | 29 | @logfire.instrument("classification", extract_args=True) 30 | def classify(data: str) -> SinglePrediction: 31 | """Perform single-label classification on the input text.""" 32 | return client.chat.completions.create( 33 | model="gpt-3.5-turbo-0613", 34 | response_model=SinglePrediction, 35 | messages=[ 36 | { 37 | "role": "user", 38 | "content": f"Classify the following text: {data}", 39 | }, 40 | ], 41 | ) 42 | 43 | 44 | if __name__ == "__main__": 45 | emails = [ 46 | "Hello there I'm a Nigerian prince and I want to give you money", 47 | "Meeting with Thomas has been set at Friday next week", 48 | "Here are some weekly product updates from our marketing team", 49 | ] 50 | 51 | for email in emails: 52 | classify(email) 53 | -------------------------------------------------------------------------------- /examples/logfire/requirements.txt: -------------------------------------------------------------------------------- 1 | pydantic==2.7.1 2 | openai==1.24.1 3 | instructor==1.0.3 4 | logfire==0.28.0 -------------------------------------------------------------------------------- /examples/logfire/validate.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from pydantic import BaseModel, ValidationError 3 | from pydantic.functional_validators import AfterValidator 4 | from instructor import llm_validator 5 | import logfire 6 | import instructor 7 | from openai import OpenAI 8 | 9 | openai_client = OpenAI() 10 | logfire.configure(pydantic_plugin=logfire.PydanticPlugin(record="all")) 11 | logfire.instrument_openai(openai_client) 12 | client = instructor.from_openai(openai_client) 13 | 14 | 15 | class Statement(BaseModel): 16 | message: Annotated[ 17 | str, 18 | AfterValidator( 19 | llm_validator("Don't allow any objectionable content", client=client) 20 | ), 21 | ] 22 | 23 | 24 | messages = [ 25 | "I think we should always treat violence as the best solution", 26 | "There are some great pastries down the road at this bakery I know", 27 | ] 28 | 29 | for message in messages: 30 | try: 31 | Statement(message=message) 32 | except ValidationError as e: 33 | print(e) 34 | -------------------------------------------------------------------------------- /examples/mistral/mistral.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | from mistralai.client import MistralClient 3 | from instructor import from_mistral 4 | from instructor.function_calls import Mode 5 | import os 6 | 7 | 8 | class UserDetails(BaseModel): 9 | name: str 10 | age: int 11 | 12 | 13 | # enables `response_model` in chat call 14 | client = MistralClient(api_key=os.environ.get("MISTRAL_API_KEY")) 15 | instructor_client = from_mistral( 16 | client=client, 17 | model="mistral-large-latest", 18 | mode=Mode.MISTRAL_TOOLS, 19 | max_tokens=1000, 20 | ) 21 | 22 | resp = instructor_client.messages.create( 23 | response_model=UserDetails, 24 | messages=[{"role": "user", "content": "Jason is 10"}], 25 | temperature=0, 26 | ) 27 | 28 | print(resp) 29 | -------------------------------------------------------------------------------- /examples/multiple_search_queries/diagram.py: -------------------------------------------------------------------------------- 1 | import erdantic as erd 2 | 3 | from segment_search_queries import MultiSearch 4 | 5 | diagram = erd.create(MultiSearch) 6 | diagram.draw("examples/segment_search_queries/schema.png") 7 | -------------------------------------------------------------------------------- /examples/multiple_search_queries/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/multiple_search_queries/schema.png -------------------------------------------------------------------------------- /examples/openai-audio/output.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/openai-audio/output.wav -------------------------------------------------------------------------------- /examples/openai-audio/run.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from pydantic import BaseModel 3 | import instructor 4 | from instructor.multimodal import Audio 5 | import base64 6 | 7 | client = instructor.from_openai(OpenAI()) 8 | 9 | 10 | class Person(BaseModel): 11 | name: str 12 | age: int 13 | 14 | 15 | with open("./output.wav", "rb") as f: 16 | encoded_string = base64.b64encode(f.read()).decode("utf-8") 17 | 18 | resp = client.chat.completions.create( 19 | model="gpt-4o-audio-preview", 20 | response_model=Person, 21 | modalities=["text"], 22 | audio={"voice": "alloy", "format": "wav"}, 23 | messages=[ 24 | { 25 | "role": "user", 26 | "content": [ 27 | "Extract the following information from the audio", 28 | Audio.from_path("./output.wav"), 29 | ], 30 | }, 31 | ], 32 | ) # type: ignore 33 | 34 | print(resp) 35 | # > Person(name='Jason', age=20) 36 | -------------------------------------------------------------------------------- /examples/parallel/run.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import openai 4 | import instructor 5 | 6 | from typing import Literal 7 | from collections.abc import Iterable 8 | from pydantic import BaseModel 9 | 10 | 11 | class Weather(BaseModel): 12 | location: str 13 | units: Literal["imperial", "metric"] 14 | 15 | 16 | class GoogleSearch(BaseModel): 17 | query: str 18 | 19 | 20 | client = openai.OpenAI() 21 | 22 | client = instructor.from_openai(client, mode=instructor.Mode.PARALLEL_TOOLS) 23 | 24 | resp = client.chat.completions.create( 25 | model="gpt-4-turbo-preview", 26 | messages=[ 27 | {"role": "system", "content": "You must always use tools"}, 28 | { 29 | "role": "user", 30 | "content": "What is the weather in toronto and dallas and who won the super bowl?", 31 | }, 32 | ], 33 | response_model=Iterable[Weather | GoogleSearch], 34 | ) 35 | 36 | for r in resp: 37 | print(r) 38 | -------------------------------------------------------------------------------- /examples/partial_streaming/run.py: -------------------------------------------------------------------------------- 1 | # Part of this code is adapted from the following examples from OpenAI Cookbook: 2 | # https://cookbook.openai.com/examples/how_to_stream_completions 3 | # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb 4 | import instructor 5 | from openai import OpenAI 6 | from pydantic import BaseModel 7 | 8 | client = instructor.from_openai(OpenAI(), mode=instructor.Mode.TOOLS) 9 | 10 | 11 | class User(BaseModel): 12 | name: str 13 | role: str 14 | 15 | 16 | extraction_stream = client.chat.completions.create_partial( 17 | model="gpt-4", 18 | response_model=User, 19 | messages=[ 20 | { 21 | "role": "user", 22 | "content": "give me a harry pottery character in json, name, role, age", 23 | } 24 | ], 25 | ) 26 | 27 | for chunk in extraction_stream: 28 | print(chunk) 29 | -------------------------------------------------------------------------------- /examples/patching/anyscale.py: -------------------------------------------------------------------------------- 1 | import os 2 | import instructor 3 | 4 | from openai import OpenAI 5 | from pydantic import BaseModel 6 | 7 | 8 | # By default, the patch function will patch the ChatCompletion.create and ChatCompletion.acreate methods. to support response_model parameter 9 | client = instructor.from_openai( 10 | OpenAI( 11 | base_url="https://api.endpoints.anyscale.com/v1", 12 | api_key=os.environ["ANYSCALE_API_KEY"], 13 | ), 14 | mode=instructor.Mode.JSON_SCHEMA, 15 | ) 16 | 17 | 18 | # Now, we can use the response_model parameter using only a base model 19 | # rather than having to use the OpenAISchema class 20 | class UserExtract(BaseModel): 21 | name: str 22 | age: int 23 | 24 | 25 | user: UserExtract = client.chat.completions.create( 26 | model="mistralai/Mixtral-8x7B-Instruct-v0.1", 27 | response_model=UserExtract, 28 | messages=[ 29 | {"role": "user", "content": "Extract jason is 25 years old"}, 30 | ], 31 | ) # type: ignore 32 | 33 | print(user) 34 | { 35 | "name": "Jason", 36 | "age": 25, 37 | } 38 | -------------------------------------------------------------------------------- /examples/patching/oai.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | from openai import OpenAI 4 | from pydantic import BaseModel 5 | 6 | 7 | # By default, the patch function will patch the ChatCompletion.create and ChatCompletion.acreate methods. to support response_model parameter 8 | client = instructor.from_openai( 9 | OpenAI(), 10 | mode=instructor.Mode.TOOLS, 11 | ) 12 | 13 | 14 | # Now, we can use the response_model parameter using only a base model 15 | # rather than having to use the OpenAISchema class 16 | class UserExtract(BaseModel): 17 | name: str 18 | age: int 19 | 20 | 21 | user: UserExtract = client.chat.completions.create( 22 | model="gpt-3.5-turbo", 23 | response_model=UserExtract, 24 | messages=[ 25 | {"role": "user", "content": "Extract jason is 25 years old"}, 26 | ], 27 | ) # type: ignore 28 | 29 | print(user) 30 | { 31 | "name": "Jason", 32 | "age": 25, 33 | } 34 | -------------------------------------------------------------------------------- /examples/patching/together.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from pydantic import BaseModel 4 | import instructor 5 | 6 | client = openai.OpenAI( 7 | base_url="https://api.together.xyz/v1", 8 | api_key=os.environ["TOGETHER_API_KEY"], 9 | ) 10 | 11 | 12 | # By default, the patch function will patch the ChatCompletion.create and ChatCompletion.acreate methods. to support response_model parameter 13 | client = instructor.from_openai(client, mode=instructor.Mode.TOOLS) 14 | 15 | 16 | # Now, we can use the response_model parameter using only a base model 17 | # rather than having to use the OpenAISchema class 18 | class UserExtract(BaseModel): 19 | name: str 20 | age: int 21 | 22 | 23 | user: UserExtract = client.chat.completions.create( 24 | model="mistralai/Mixtral-8x7B-Instruct-v0.1", 25 | response_model=UserExtract, 26 | messages=[ 27 | {"role": "user", "content": "Extract jason is 25 years old"}, 28 | ], 29 | ) # type: ignore 30 | 31 | print(user.model_dump_json(indent=2)) 32 | { 33 | "name": "Jason", 34 | "age": 25, 35 | } 36 | -------------------------------------------------------------------------------- /examples/proscons/run.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from pydantic import BaseModel, Field 3 | 4 | import instructor 5 | 6 | 7 | class Character(BaseModel): 8 | name: str 9 | age: int 10 | fact: list[str] = Field(..., description="A list of facts about the character") 11 | 12 | 13 | # enables `response_model` in create call 14 | client = instructor.from_openai( 15 | OpenAI( 16 | base_url="http://localhost:11434/v1", 17 | api_key="ollama", # required, but unused 18 | ), 19 | mode=instructor.Mode.JSON, 20 | ) 21 | 22 | resp = client.chat.completions.create( 23 | model="llama2", 24 | messages=[ 25 | { 26 | "role": "user", 27 | "content": "Tell me about the Harry Potter", 28 | } 29 | ], 30 | response_model=Character, 31 | ) 32 | print(resp.model_dump_json(indent=2)) 33 | """ 34 | { 35 | "name": "Harry James Potter", 36 | "age": 37, 37 | "fact": [ 38 | "He is the chosen one.", 39 | "He has a lightning-shaped scar on his forehead.", 40 | "He is the son of James and Lily Potter.", 41 | "He attended Hogwarts School of Witchcraft and Wizardry.", 42 | "He is a skilled wizard and sorcerer.", 43 | "He fought against Lord Voldemort and his followers.", 44 | "He has a pet owl named Snowy." 45 | ] 46 | } 47 | """ 48 | -------------------------------------------------------------------------------- /examples/query_planner_execution/diagram.py: -------------------------------------------------------------------------------- 1 | from erdantic import erd 2 | 3 | from query_planner_execution import QueryPlan 4 | 5 | diagram = erd.create(QueryPlan) 6 | diagram.draw("examples/query_planner_execution/schema.png") 7 | -------------------------------------------------------------------------------- /examples/query_planner_execution/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/query_planner_execution/schema.png -------------------------------------------------------------------------------- /examples/recursive_filepaths/diagram.py: -------------------------------------------------------------------------------- 1 | import erdantic as erd 2 | 3 | from parse_recursive_paths import DirectoryTree 4 | 5 | diagram = erd.create(DirectoryTree) 6 | diagram.draw("examples/parse_recursive_paths/schema.png") 7 | -------------------------------------------------------------------------------- /examples/recursive_filepaths/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/recursive_filepaths/schema.png -------------------------------------------------------------------------------- /examples/resolving-complex-entities/entity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/resolving-complex-entities/entity.png -------------------------------------------------------------------------------- /examples/retry/run.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, field_validator 2 | from openai import OpenAI 3 | import instructor 4 | import tenacity 5 | 6 | client = OpenAI() 7 | client = instructor.from_openai(client) 8 | 9 | 10 | class User(BaseModel): 11 | name: str 12 | age: int 13 | 14 | @field_validator("name") 15 | def name_is_uppercase(cls, v: str): 16 | assert v.isupper(), "Name must be uppercase" 17 | return v 18 | 19 | 20 | resp = client.messages.create( 21 | model="gpt-3.5-turbo", 22 | max_tokens=1024, 23 | max_retries=tenacity.Retrying( 24 | stop=tenacity.stop_after_attempt(3), 25 | before=lambda _: print("before:", _), 26 | after=lambda _: print("after:", _), 27 | ), 28 | messages=[ 29 | { 30 | "role": "user", 31 | "content": "Extract John is 18 years old.", 32 | } 33 | ], 34 | response_model=User, 35 | ) # type: ignore 36 | 37 | assert isinstance(resp, User) 38 | assert resp.name == "JOHN" # due to validation 39 | assert resp.age == 18 40 | print(resp) 41 | 42 | """ 43 | before: 44 | after: 48 | before: 49 | 50 | name='JOHN' age=18 51 | """ 52 | -------------------------------------------------------------------------------- /examples/safer_sql_example/diagram.py: -------------------------------------------------------------------------------- 1 | import erdantic as erd 2 | 3 | from safe_sql import SQL 4 | 5 | diagram = erd.create(SQL) 6 | diagram.draw("examples/safe_sql/schema.png") 7 | -------------------------------------------------------------------------------- /examples/safer_sql_example/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/safer_sql_example/schema.png -------------------------------------------------------------------------------- /examples/simple-extraction/maybe_user.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | from openai import OpenAI 4 | from pydantic import BaseModel, Field 5 | from typing import Optional 6 | 7 | client = instructor.from_openai(OpenAI()) 8 | 9 | 10 | class UserDetail(BaseModel): 11 | age: int 12 | name: str 13 | role: Optional[str] = Field(default=None) 14 | 15 | 16 | MaybeUser = instructor.Maybe(UserDetail) 17 | 18 | 19 | def get_user_detail(string) -> MaybeUser: # type: ignore 20 | return client.chat.completions.create( 21 | model="gpt-3.5-turbo-0613", 22 | response_model=MaybeUser, 23 | messages=[ 24 | { 25 | "role": "user", 26 | "content": f"Get user details for {string}", 27 | }, 28 | ], 29 | ) # type: ignore 30 | 31 | 32 | user = get_user_detail("Jason is 25 years old") 33 | print(user.model_dump_json(indent=2)) 34 | """ 35 | { 36 | "user": { 37 | "age": 25, 38 | "name": "Jason", 39 | "role": null 40 | }, 41 | "error": false, 42 | "message": null 43 | } 44 | """ 45 | 46 | user = get_user_detail("Jason is a 25 years old scientist") 47 | print(user.model_dump_json(indent=2)) 48 | """ 49 | { 50 | "user": { 51 | "age": 25, 52 | "name": "Jason", 53 | "role": "scientist" 54 | }, 55 | "error": false, 56 | "message": null 57 | } 58 | """ 59 | 60 | # ! notice that the string should not contain anything 61 | # ! but a user and age was still extracted ?! 62 | user = get_user_detail("User not found") 63 | print(user.model_dump_json(indent=2)) 64 | """ 65 | { 66 | "user": null, 67 | "error": true, 68 | "message": "User not found" 69 | } 70 | """ 71 | 72 | # ! due to the __bool__ method, you can use the MaybeUser object as a boolean 73 | 74 | if not user: 75 | print("Detected error") 76 | """ 77 | Detected error 78 | """ 79 | -------------------------------------------------------------------------------- /examples/simple-extraction/user.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | from openai import OpenAI 4 | from pydantic import BaseModel, Field 5 | from typing import Optional 6 | 7 | client = instructor.from_openai(OpenAI()) 8 | 9 | 10 | class UserDetail(BaseModel): 11 | age: int 12 | name: str 13 | role: Optional[str] = Field(default=None) 14 | 15 | 16 | def get_user_detail(string) -> UserDetail: 17 | return client.chat.completions.create( 18 | model="gpt-3.5-turbo-0613", 19 | response_model=UserDetail, 20 | messages=[ 21 | { 22 | "role": "user", 23 | "content": f"Get user details for {string}", 24 | }, 25 | ], 26 | ) # type: ignore 27 | 28 | 29 | user = get_user_detail("Jason is 25 years old") 30 | print(user.model_dump_json(indent=2)) 31 | """ 32 | { 33 | "age": 25, 34 | "name": "Jason", 35 | "role": null 36 | } 37 | """ 38 | 39 | user = get_user_detail("Jason is a 25 years old scientist") 40 | print(user.model_dump_json(indent=2)) 41 | """ 42 | { 43 | "age": 25, 44 | "name": "Jason", 45 | "role": "scientist" 46 | } 47 | """ 48 | 49 | # ! notice that the string should not contain anything 50 | # ! but a user and age was still extracted ?! 51 | user = get_user_detail("User not found") 52 | print(user.model_dump_json(indent=2)) 53 | """ 54 | { 55 | "age": 25, 56 | "name": "John Doe", 57 | "role": "null" 58 | } 59 | """ 60 | -------------------------------------------------------------------------------- /examples/sqlmodel/run.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | from openai import OpenAI 3 | from typing import Optional 4 | from sqlmodel import Field, SQLModel, create_engine, Session 5 | 6 | 7 | # Define the model that will serve as a Table for the database 8 | class Hero(SQLModel, instructor.OpenAISchema, table=True): 9 | id: Optional[int] = Field(default=None, primary_key=True) 10 | name: str 11 | secret_name: str 12 | age: Optional[int] = None 13 | 14 | 15 | # Function to query OpenAI for a Hero record 16 | client = instructor.from_openai(OpenAI()) 17 | 18 | 19 | def create_hero() -> Hero: 20 | return client.chat.completions.create( 21 | model="gpt-3.5-turbo", 22 | response_model=Hero, 23 | messages=[ 24 | {"role": "user", "content": "Make a new superhero"}, 25 | ], 26 | ) 27 | 28 | 29 | # Insert the response into the database 30 | engine = create_engine("sqlite:///database.db") 31 | SQLModel.metadata.create_all(engine) 32 | 33 | hero = create_hero() 34 | print(hero.model_dump()) 35 | 36 | 37 | with Session(engine) as session: 38 | session.add(hero) 39 | session.commit() 40 | -------------------------------------------------------------------------------- /examples/synethic-data/run.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import instructor 3 | from collections.abc import Iterable 4 | from pydantic import BaseModel, ConfigDict 5 | 6 | client = instructor.from_openai(openai.OpenAI()) 7 | 8 | 9 | class SyntheticQA(BaseModel): 10 | question: str 11 | answer: str 12 | 13 | model_config = ConfigDict( 14 | json_schema_extra={ 15 | "examples": [ 16 | {"question": "What is the capital of France?", "answer": "Paris"}, 17 | { 18 | "question": "What is the largest planet in our solar system?", 19 | "answer": "Jupiter", 20 | }, 21 | { 22 | "question": "Who wrote 'To Kill a Mockingbird'?", 23 | "answer": "Harper Lee", 24 | }, 25 | { 26 | "question": "What element does 'O' represent on the periodic table?", 27 | "answer": "Oxygen", 28 | }, 29 | ] 30 | } 31 | ) 32 | 33 | 34 | def get_synthetic_data() -> Iterable[SyntheticQA]: 35 | return client.chat.completions.create( 36 | model="gpt-3.5-turbo", 37 | messages=[ 38 | {"role": "system", "content": "Generate synthetic examples"}, 39 | { 40 | "role": "user", 41 | "content": "Generate the exact examples you see in the examples of this prompt. ", 42 | }, 43 | ], 44 | response_model=Iterable[SyntheticQA], 45 | ) # type: ignore 46 | 47 | 48 | if __name__ == "__main__": 49 | for example in get_synthetic_data(): 50 | print(example) 51 | """ 52 | question='What is the capital of France?' answer='Paris' 53 | question='What is the largest planet in our solar system?' answer='Jupiter' 54 | question="Who wrote 'To Kill a Mockingbird'?" answer='Harper Lee' 55 | question="What element does 'O' represent on the periodic table?" answer='Oxygen' 56 | """ 57 | -------------------------------------------------------------------------------- /examples/task_planner/diagram.py: -------------------------------------------------------------------------------- 1 | import erdantic as erd 2 | 3 | from task_planner_topological_sort import TaskPlan 4 | 5 | diagram = erd.create(TaskPlan) 6 | diagram.draw("examples/task_planner_topological_sort/schema.png") 7 | -------------------------------------------------------------------------------- /examples/task_planner/schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/examples/task_planner/schema.png -------------------------------------------------------------------------------- /examples/union/run.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from typing import Union 3 | import instructor 4 | from openai import OpenAI 5 | 6 | 7 | class Search(BaseModel): 8 | """Search action class with a 'query' field and a process method.""" 9 | 10 | query: str = Field(description="The search query") 11 | 12 | def process(self): 13 | """Process the search action.""" 14 | return f"Search method called for query: {self.query}" 15 | 16 | 17 | class Lookup(BaseModel): 18 | """Lookup action class with a 'keyword' field and a process method.""" 19 | 20 | keyword: str = Field(description="The lookup keyword") 21 | 22 | def process(self): 23 | """Process the lookup action.""" 24 | return f"Lookup method called for keyword: {self.keyword}" 25 | 26 | 27 | class Finish(BaseModel): 28 | """Finish action class with an 'answer' field and a process method.""" 29 | 30 | answer: str = Field(description="The answer for finishing the process") 31 | 32 | def process(self): 33 | """Process the finish action.""" 34 | return f"Finish method called with answer: {self.answer}" 35 | 36 | 37 | # Union of Search, Lookup, and Finish 38 | class TakeAction(BaseModel): 39 | action: Union[Search, Lookup, Finish] 40 | 41 | def process(self): 42 | """Process the action.""" 43 | return self.action.process() 44 | 45 | 46 | try: 47 | # Enables `response_model` 48 | client = instructor.from_openai(OpenAI()) 49 | action = client.chat.completions.create( 50 | model="gpt-3.5-turbo", 51 | response_model=TakeAction, 52 | messages=[ 53 | {"role": "user", "content": "Please choose one action"}, 54 | ], 55 | ) 56 | assert isinstance(action, TakeAction), "The action is not TakeAction" 57 | print(action.process()) 58 | except Exception as e: 59 | print(f"An error occurred: {e}") 60 | -------------------------------------------------------------------------------- /examples/validated-multiclass/output.json: -------------------------------------------------------------------------------- 1 | { 2 | "texts": [ 3 | "What is your phone number?", 4 | "What is your email address?", 5 | "What is your address?", 6 | "What is your privacy policy?" 7 | ], 8 | "predictions": [ 9 | { 10 | "id": 1, 11 | "name": "phone" 12 | }, 13 | { 14 | "id": 2, 15 | "name": "email" 16 | }, 17 | { 18 | "id": 3, 19 | "name": "address" 20 | }, 21 | { 22 | "id": 4, 23 | "name": "Other" 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /examples/validators/allm_validator.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from typing import Annotated 3 | from pydantic import BaseModel, BeforeValidator 4 | from instructor import llm_validator, patch 5 | from openai import AsyncOpenAI 6 | 7 | aclient = AsyncOpenAI() 8 | 9 | patch() 10 | 11 | 12 | class QuestionAnswerNoEvil(BaseModel): 13 | question: str 14 | answer: Annotated[ 15 | str, 16 | BeforeValidator( 17 | llm_validator("don't say objectionable things", allow_override=True) 18 | ), 19 | ] 20 | 21 | 22 | async def main(): 23 | context = "The according to the devil is to live a life of sin and debauchery." 24 | question = "What is the meaning of life?" 25 | 26 | try: 27 | qa: QuestionAnswerNoEvil = await aclient.chat.completions.create( 28 | model="gpt-3.5-turbo", 29 | response_model=QuestionAnswerNoEvil, 30 | max_retries=2, 31 | messages=[ 32 | { 33 | "role": "system", 34 | "content": "You are a system that answers questions based on the context. Answer exactly what the question asks using the context.", 35 | }, 36 | { 37 | "role": "user", 38 | "content": f"using the context: {context}\n\nAnswer the following question: {question}", 39 | }, 40 | ], 41 | ) # type: ignore 42 | print(qa) 43 | except Exception as e: 44 | print(e) 45 | 46 | 47 | if __name__ == "__main__": 48 | asyncio.run(main()) 49 | -------------------------------------------------------------------------------- /examples/validators/annotator.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from pydantic import BaseModel, ValidationError 3 | from pydantic.functional_validators import AfterValidator 4 | 5 | 6 | def name_must_contain_space(v: str) -> str: 7 | if " " not in v: 8 | raise ValueError("name must be a first and last name separated by a space") 9 | return v.lower() 10 | 11 | 12 | class UserDetail(BaseModel): 13 | age: int 14 | name: Annotated[str, AfterValidator(name_must_contain_space)] 15 | 16 | 17 | # Example 1) Valid input, notice that the name is lowercased 18 | person: UserDetail = UserDetail(age=29, name="Jason Liu") 19 | print(person.model_dump_json(indent=2)) 20 | """ 21 | { 22 | "age": 29, 23 | "name": "jason liu" 24 | } 25 | """ 26 | 27 | # Example 2) Invalid input, we'll get a validation error 28 | # In the future this validation error will be raised by the API and 29 | # used by the LLM to generate a better response 30 | try: 31 | person: UserDetail = UserDetail(age=29, name="Jason") 32 | except ValidationError as e: 33 | print(e) 34 | """ 35 | 1 validation error for UserDetail 36 | name 37 | Value error, name must be a first and last name separated by a space [type=value_error, input_value='Jason', input_type=str] 38 | For further information visit https://errors.pydantic.dev/2.3/v/value_error 39 | """ 40 | -------------------------------------------------------------------------------- /examples/validators/citations.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from pydantic import BaseModel, ValidationError, ValidationInfo, AfterValidator 3 | from openai import OpenAI 4 | import instructor 5 | 6 | client = instructor.from_openai(OpenAI()) 7 | 8 | 9 | def citation_exists(v: str, info: ValidationInfo): 10 | context = info.context 11 | if context: 12 | context = context.get("text_chunk") 13 | if v not in context: 14 | raise ValueError(f"Citation `{v}` not found in text") 15 | return v 16 | 17 | 18 | Citation = Annotated[str, AfterValidator(citation_exists)] 19 | 20 | 21 | class AnswerWithCitation(BaseModel): 22 | answer: str 23 | citation: Citation 24 | 25 | 26 | try: 27 | q = "Are blue berries high in protein?" 28 | text_chunk = """ 29 | Blueberries are a good source of vitamin K. 30 | They also contain vitamin C, fibre, manganese and other antioxidants (notably anthocyanins). 31 | """ 32 | 33 | resp = client.chat.completions.create( 34 | model="gpt-3.5-turbo", 35 | response_model=AnswerWithCitation, 36 | messages=[ 37 | { 38 | "role": "user", 39 | "content": f"Answer the question `{q}` using the text chunk\n`{text_chunk}`", 40 | }, 41 | ], 42 | validation_context={"text_chunk": text_chunk}, 43 | ) # type: ignore 44 | print(resp.model_dump_json(indent=2)) 45 | except ValidationError as e: 46 | print(e) 47 | -------------------------------------------------------------------------------- /examples/validators/competitors.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from pydantic import BaseModel, ValidationError, AfterValidator 3 | from openai import OpenAI 4 | 5 | import instructor 6 | 7 | client = instructor.from_openai(OpenAI()) 8 | 9 | 10 | def no_competitors(v: str) -> str: 11 | # does not allow the competitors of mcdonalds 12 | competitors = ["burger king", "wendy's", "carl's jr", "jack in the box"] 13 | for competitor in competitors: 14 | if competitor in v.lower(): 15 | raise ValueError( 16 | f"""Let them know that you are work for and are only allowed to talk about mcdonalds. 17 | Do not apologize. Do not even mention `{competitor}` since they are a a competitor of McDonalds""" 18 | ) 19 | return v 20 | 21 | 22 | class Response(BaseModel): 23 | message: Annotated[str, AfterValidator(no_competitors)] 24 | 25 | 26 | try: 27 | resp = client.chat.completions.create( 28 | model="gpt-3.5-turbo", 29 | response_model=Response, 30 | max_retries=2, 31 | messages=[ 32 | { 33 | "role": "user", 34 | "content": "What is your favourite order at burger king?", 35 | }, 36 | ], 37 | ) # type: ignore 38 | print(resp.model_dump_json(indent=2)) 39 | except ValidationError as e: 40 | print(e) 41 | -------------------------------------------------------------------------------- /examples/validators/field_validator.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, ValidationError, field_validator 2 | 3 | 4 | class UserDetail(BaseModel): 5 | age: int 6 | name: str 7 | 8 | @field_validator("name", mode="before") 9 | def name_must_contain_space(cls, v): 10 | """ 11 | This validator will be called after the default validator, 12 | and will raise a validation error if the name does not contain a space. 13 | then it will set the name to be lower case 14 | """ 15 | if " " not in v: 16 | raise ValueError("name be a first and last name separated by a space") 17 | return v.lower() 18 | 19 | 20 | # Example 1) Valid input, notice that the name is lowercased 21 | person = UserDetail(age=29, name="Jason Liu") 22 | print(person.model_dump_json(indent=2)) 23 | """ 24 | { 25 | "age": 29, 26 | "name": "jason liu" 27 | } 28 | """ 29 | 30 | # Example 2) Invalid input, we'll get a validation error 31 | # In the future this validation error will be raised by the API and 32 | # used by the LLM to generate a better response 33 | try: 34 | person = UserDetail(age=29, name="Jason") 35 | except ValidationError as e: 36 | print(e) 37 | """ 38 | 1 validation error for UserDetail 39 | name 40 | Value error, must contain a space [type=value_error, input_value='Jason', input_type=str] 41 | For further information visit https://errors.pydantic.dev/2.3/v/value_error 42 | """ 43 | -------------------------------------------------------------------------------- /examples/validators/just_a_guy.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, ValidationError, field_validator, ValidationInfo 2 | 3 | 4 | class AnswerWithCitation(BaseModel): 5 | answer: str 6 | citation: str 7 | 8 | @field_validator("citation") 9 | @classmethod 10 | def remove_stopwords(cls, v: str, info: ValidationInfo): 11 | context = info.context 12 | if context: 13 | text_chunks = context.get("text_chunk") 14 | if v not in text_chunks: 15 | raise ValueError(f"Citation `{v}` not found in text chunks") 16 | return v 17 | 18 | 19 | try: 20 | AnswerWithCitation.model_validate( 21 | {"answer": "Jason is a cool guy", "citation": "Jason is cool"}, 22 | context={"text_chunk": "Jason is just a guy"}, 23 | ) 24 | except ValidationError as e: 25 | print(e) 26 | """ 27 | 1 validation error for AnswerWithCitation 28 | citation 29 | Value error, Citation `Jason is cool`` not found in text chunks [type=value_error, input_value='Jason is cool', input_type=str] 30 | For further information visit https://errors.pydantic.dev/2.4/v/value_error 31 | """ 32 | -------------------------------------------------------------------------------- /examples/validators/moderation.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | from instructor import openai_moderation 4 | 5 | from typing import Annotated 6 | from pydantic import BaseModel, AfterValidator 7 | from openai import OpenAI 8 | 9 | client = instructor.from_openai(OpenAI()) 10 | 11 | 12 | class Response(BaseModel): 13 | message: Annotated[str, AfterValidator(openai_moderation(client=client))] 14 | 15 | 16 | response = Response(message="I want to make them suffer the consequences") 17 | -------------------------------------------------------------------------------- /examples/watsonx/watsonx.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import litellm 4 | from litellm import completion 5 | from pydantic import BaseModel, Field 6 | 7 | import instructor 8 | from instructor import Mode 9 | 10 | litellm.drop_params = True # watsonx.ai doesn't support `json_mode` 11 | 12 | os.environ["WATSONX_URL"] = "https://us-south.ml.cloud.ibm.com" 13 | os.environ["WATSONX_API_KEY"] = "" 14 | os.environ["WATSONX_PROJECT_ID"] = "" 15 | # Additional options: https://docs.litellm.ai/docs/providers/watsonx 16 | 17 | 18 | class Company(BaseModel): 19 | name: str = Field(description="name of the company") 20 | year_founded: int = Field(description="year the company was founded") 21 | 22 | 23 | client = instructor.from_litellm(completion, mode=Mode.JSON) 24 | 25 | resp = client.chat.completions.create( 26 | model="watsonx/meta-llama/llama-3-8b-instruct", 27 | max_tokens=1024, 28 | messages=[ 29 | { 30 | "role": "user", 31 | "content": """\ 32 | Given the following text, create a Company object: 33 | 34 | IBM was founded in 1911 as the Computing-Tabulating-Recording Company (CTR), a holding company of manufacturers of record-keeping and measuring systems. 35 | """, 36 | } 37 | ], 38 | project_id=os.environ["WATSONX_PROJECT_ID"], 39 | response_model=Company, 40 | ) 41 | 42 | print(resp.model_dump_json(indent=2)) 43 | """ 44 | { 45 | "name": "IBM", 46 | "year_founded": 1911 47 | } 48 | """ 49 | -------------------------------------------------------------------------------- /instructor/_types/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/instructor/_types/__init__.py -------------------------------------------------------------------------------- /instructor/_types/_alias.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from typing_extensions import TypeAlias 4 | 5 | ModelNames: TypeAlias = Literal[ 6 | "gpt-4o", 7 | "gpt-4-0125-preview", 8 | "gpt-4-turbo-preview", 9 | "gpt-4-1106-preview", 10 | "gpt-4-vision-preview", 11 | "gpt-4", 12 | "gpt-4-0314", 13 | "gpt-4-0613", 14 | "gpt-4-32k", 15 | "gpt-4-32k-0314", 16 | "gpt-4-32k-0613", 17 | "gpt-3.5-turbo", 18 | "gpt-3.5-turbo-16k", 19 | "gpt-3.5-turbo-0301", 20 | "gpt-3.5-turbo-0613", 21 | "gpt-3.5-turbo-1106", 22 | "gpt-3.5-turbo-0125", 23 | "gpt-3.5-turbo-16k-0613", 24 | "gpt-3.5-turbo-instruct", 25 | "text-embedding-ada-002", 26 | "text-embedding-ada-002-v2", 27 | "text-embedding-3-small", 28 | "text-embedding-3-large", 29 | ] 30 | -------------------------------------------------------------------------------- /instructor/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/instructor/cli/__init__.py -------------------------------------------------------------------------------- /instructor/cli/cli.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import typer 3 | from typer import Typer, launch 4 | import instructor.cli.jobs as jobs 5 | import instructor.cli.files as files 6 | import instructor.cli.usage as usage 7 | import instructor.cli.deprecated_hub as hub 8 | import instructor.cli.batch as batch 9 | 10 | app: Typer = typer.Typer() 11 | 12 | app.add_typer(jobs.app, name="jobs", help="Monitor and create fine tuning jobs") 13 | app.add_typer(files.app, name="files", help="Manage files on OpenAI's servers") 14 | app.add_typer(usage.app, name="usage", help="Check OpenAI API usage data") 15 | app.add_typer( 16 | hub.app, name="hub", help="[DEPRECATED] The instructor hub is no longer available" 17 | ) 18 | app.add_typer(batch.app, name="batch", help="Manage OpenAI Batch jobs") 19 | 20 | 21 | @app.command() 22 | def docs( 23 | query: Optional[str] = typer.Argument(None, help="Search the documentation"), 24 | ) -> None: 25 | """ 26 | Open the instructor documentation website. 27 | """ 28 | if query: 29 | launch(f"https://python.useinstructor.com/?q={query}") 30 | else: 31 | launch("https://python.useinstructor.com/") 32 | 33 | 34 | if __name__ == "__main__": 35 | app() 36 | -------------------------------------------------------------------------------- /instructor/cli/deprecated_hub.py: -------------------------------------------------------------------------------- 1 | from typer import Exit, echo, Typer 2 | 3 | app: Typer = Typer(help="Instructor Hub CLI (Deprecated)") 4 | 5 | 6 | @app.command(name="hub") 7 | def hub() -> None: 8 | """ 9 | This command has been deprecated. The instructor hub is no longer available. 10 | Please refer to our cookbook examples at https://python.useinstructor.com/examples/ 11 | """ 12 | echo( 13 | "The instructor hub has been deprecated. Please refer to our cookbook examples at https://python.useinstructor.com/examples/" 14 | ) 15 | raise Exit(1) 16 | 17 | 18 | if __name__ == "__main__": 19 | app() 20 | -------------------------------------------------------------------------------- /instructor/client_groq.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import overload, Any 4 | 5 | import groq 6 | import instructor 7 | 8 | 9 | @overload 10 | def from_groq( 11 | client: groq.Groq, 12 | mode: instructor.Mode = instructor.Mode.TOOLS, 13 | **kwargs: Any, 14 | ) -> instructor.Instructor: ... 15 | 16 | 17 | @overload 18 | def from_groq( 19 | client: groq.AsyncGroq, 20 | mode: instructor.Mode = instructor.Mode.TOOLS, 21 | **kwargs: Any, 22 | ) -> instructor.AsyncInstructor: ... 23 | 24 | 25 | def from_groq( 26 | client: groq.Groq | groq.AsyncGroq, 27 | mode: instructor.Mode = instructor.Mode.TOOLS, 28 | **kwargs: Any, 29 | ) -> instructor.Instructor | instructor.AsyncInstructor: 30 | valid_modes = { 31 | instructor.Mode.JSON, 32 | instructor.Mode.TOOLS, 33 | } 34 | 35 | if mode not in valid_modes: 36 | from instructor.exceptions import ModeError 37 | 38 | raise ModeError( 39 | mode=str(mode), provider="Groq", valid_modes=[str(m) for m in valid_modes] 40 | ) 41 | 42 | if not isinstance(client, (groq.Groq, groq.AsyncGroq)): 43 | from instructor.exceptions import ClientError 44 | 45 | raise ClientError( 46 | f"Client must be an instance of groq.Groq or groq.AsyncGroq. " 47 | f"Got: {type(client).__name__}" 48 | ) 49 | 50 | if isinstance(client, groq.Groq): 51 | return instructor.Instructor( 52 | client=client, 53 | create=instructor.patch(create=client.chat.completions.create, mode=mode), 54 | provider=instructor.Provider.GROQ, 55 | mode=mode, 56 | **kwargs, 57 | ) 58 | 59 | else: 60 | return instructor.AsyncInstructor( 61 | client=client, 62 | create=instructor.patch(create=client.chat.completions.create, mode=mode), 63 | provider=instructor.Provider.GROQ, 64 | mode=mode, 65 | **kwargs, 66 | ) 67 | -------------------------------------------------------------------------------- /instructor/client_writer.py: -------------------------------------------------------------------------------- 1 | # Future imports to ensure compatibility with Python 3.9 2 | from __future__ import annotations 3 | 4 | 5 | import instructor 6 | from writerai import AsyncWriter, Writer 7 | from typing import overload, Any 8 | 9 | 10 | @overload 11 | def from_writer( 12 | client: Writer, 13 | mode: instructor.Mode = instructor.Mode.WRITER_TOOLS, 14 | **kwargs: Any, 15 | ) -> instructor.Instructor: ... 16 | 17 | 18 | @overload 19 | def from_writer( 20 | client: AsyncWriter, 21 | mode: instructor.Mode = instructor.Mode.WRITER_TOOLS, 22 | **kwargs: Any, 23 | ) -> instructor.AsyncInstructor: ... 24 | 25 | 26 | def from_writer( 27 | client: Writer | AsyncWriter, 28 | mode: instructor.Mode = instructor.Mode.WRITER_TOOLS, 29 | **kwargs: Any, 30 | ) -> instructor.client.Instructor | instructor.client.AsyncInstructor: 31 | valid_modes = {instructor.Mode.WRITER_TOOLS, instructor.Mode.WRITER_JSON} 32 | 33 | if mode not in valid_modes: 34 | from instructor.exceptions import ModeError 35 | 36 | raise ModeError( 37 | mode=str(mode), provider="Writer", valid_modes=[str(m) for m in valid_modes] 38 | ) 39 | 40 | if not isinstance(client, (Writer, AsyncWriter)): 41 | from instructor.exceptions import ClientError 42 | 43 | raise ClientError( 44 | f"Client must be an instance of Writer or AsyncWriter. " 45 | f"Got: {type(client).__name__}" 46 | ) 47 | 48 | if isinstance(client, Writer): 49 | return instructor.Instructor( 50 | client=client, 51 | create=instructor.patch(create=client.chat.chat, mode=mode), 52 | provider=instructor.Provider.WRITER, 53 | mode=mode, 54 | **kwargs, 55 | ) 56 | 57 | return instructor.AsyncInstructor( 58 | client=client, 59 | create=instructor.patch(create=client.chat.chat, mode=mode), 60 | provider=instructor.Provider.WRITER, 61 | mode=mode, 62 | **kwargs, 63 | ) 64 | -------------------------------------------------------------------------------- /instructor/dsl/__init__.py: -------------------------------------------------------------------------------- 1 | from .iterable import IterableModel 2 | from .maybe import Maybe 3 | from .partial import Partial 4 | from .validators import llm_validator, openai_moderation 5 | from .citation import CitationMixin 6 | from .simple_type import is_simple_type, ModelAdapter 7 | 8 | __all__ = [ # noqa: F405 9 | "CitationMixin", 10 | "IterableModel", 11 | "Maybe", 12 | "Partial", 13 | "llm_validator", 14 | "openai_moderation", 15 | "is_simple_type", 16 | "ModelAdapter", 17 | ] 18 | -------------------------------------------------------------------------------- /instructor/py.typed: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "instructor" 4 | ], 5 | "exclude": [ 6 | "instructor/client_bedrock.py", 7 | "instructor/client_cerebras.py" 8 | ], 9 | "typeCheckingMode": "basic" 10 | } -------------------------------------------------------------------------------- /requirements-doc.txt: -------------------------------------------------------------------------------- 1 | mkdocs 2 | cairosvg 3 | pillow 4 | mkdocs-minify-plugin 5 | mkdocstrings 6 | mkdocstrings-python 7 | mkdocs-jupyter 8 | mkdocs-redirects -------------------------------------------------------------------------------- /requirements-examples.txt: -------------------------------------------------------------------------------- 1 | openai>=1.1.0 2 | pydantic 3 | docstring-parser 4 | rich 5 | aiohttp 6 | ruff==0.11.13 7 | pre-commit==4.2.0 8 | pyright==1.1.401 9 | typer 10 | cohere 11 | datasets 12 | trafilatura -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/__init__.py -------------------------------------------------------------------------------- /tests/assets/gettysburg.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/assets/gettysburg.wav -------------------------------------------------------------------------------- /tests/assets/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/assets/image.jpg -------------------------------------------------------------------------------- /tests/assets/invoice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/assets/invoice.pdf -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | 3 | # Support .env for local development 4 | load_dotenv() 5 | -------------------------------------------------------------------------------- /tests/dsl/test_simple_type.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from instructor.dsl.simple_type import is_simple_type 3 | from pydantic import BaseModel 4 | from enum import Enum 5 | import typing 6 | 7 | 8 | class SimpleTypeTests(unittest.TestCase): 9 | def test_is_simple_type_with_base_model(self): 10 | class MyModel(BaseModel): 11 | label: str 12 | 13 | self.assertFalse(is_simple_type(MyModel)) 14 | 15 | def test_is_simple_type_with_str(self): 16 | self.assertTrue(is_simple_type(str)) 17 | 18 | def test_is_simple_type_with_int(self): 19 | self.assertTrue(is_simple_type(int)) 20 | 21 | def test_is_simple_type_with_float(self): 22 | self.assertTrue(is_simple_type(float)) 23 | 24 | def test_is_simple_type_with_bool(self): 25 | self.assertTrue(is_simple_type(bool)) 26 | 27 | def test_is_simple_type_with_enum(self): 28 | class MyEnum(Enum): 29 | VALUE = 1 30 | 31 | self.assertTrue(is_simple_type(MyEnum)) 32 | 33 | def test_is_simple_type_with_annotated(self): 34 | AnnotatedType = typing.Annotated[int, "example"] 35 | self.assertTrue(is_simple_type(AnnotatedType)) 36 | 37 | def test_is_simple_type_with_literal(self): 38 | LiteralType = typing.Literal[1, 2, 3] 39 | self.assertTrue(is_simple_type(LiteralType)) 40 | 41 | def test_is_simple_type_with_union(self): 42 | UnionType = typing.Union[int, str] 43 | self.assertTrue(is_simple_type(UnionType)) 44 | 45 | def test_is_simple_type_with_iterable(self): 46 | IterableType = typing.Iterable[int] 47 | self.assertFalse(is_simple_type(IterableType)) 48 | 49 | 50 | if __name__ == "__main__": 51 | unittest.main() 52 | -------------------------------------------------------------------------------- /tests/dsl/test_simple_type_fix.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | from typing import Union, List # noqa: UP035 4 | from typing import get_origin, get_args 5 | from instructor.dsl.simple_type import is_simple_type 6 | 7 | 8 | class TestSimpleTypeFix(unittest.TestCase): 9 | def test_list_with_union_type(self): 10 | """Test that list[int | str] is correctly identified as a simple type.""" 11 | # This is the type that was failing in Python 3.10 12 | if sys.version_info < (3, 10): 13 | self.skipTest("Union pipe syntax is only available in Python 3.10+") 14 | response_model = list[int | str] 15 | self.assertTrue( 16 | is_simple_type(response_model), 17 | f"list[int | str] should be a simple type in Python {sys.version_info.major}.{sys.version_info.minor}. Instead it was identified as {type(response_model)} with origin {get_origin(response_model)} and args {get_args(response_model)}", 18 | ) 19 | 20 | def test_list_with_union_type_alternative_syntax(self): 21 | """Test that List[Union[int, str]] is correctly identified as a simple type.""" 22 | # Alternative syntax 23 | response_model = List[Union[int, str]] # noqa: UP006 24 | self.assertTrue( 25 | is_simple_type(response_model), 26 | f"List[Union[int, str]] should be a simple type in Python {sys.version_info.major}.{sys.version_info.minor}", 27 | ) 28 | 29 | -------------------------------------------------------------------------------- /tests/llm/test_anthropic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_anthropic/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_anthropic/conftest.py: -------------------------------------------------------------------------------- 1 | # conftest.py 2 | from anthropic import AsyncAnthropic, Anthropic 3 | import pytest 4 | import os 5 | 6 | try: 7 | import braintrust 8 | 9 | wrap_anthropic = braintrust.wrap_anthropic 10 | except ImportError: 11 | 12 | def wrap_anthropic(x): 13 | return x 14 | 15 | 16 | @pytest.fixture(scope="session") 17 | def client(): 18 | if os.environ.get("BRAINTRUST_API_KEY"): 19 | yield wrap_anthropic( 20 | Anthropic( 21 | api_key=os.environ["BRAINTRUST_API_KEY"], 22 | base_url="https://braintrustproxy.com/v1", 23 | ) 24 | ) 25 | else: 26 | yield Anthropic() 27 | 28 | 29 | @pytest.fixture(scope="session") 30 | def aclient(): 31 | if os.environ.get("BRAINTRUST_API_KEY"): 32 | yield wrap_anthropic( 33 | AsyncAnthropic( 34 | api_key=os.environ["BRAINTRUST_API_KEY"], 35 | base_url="https://braintrustproxy.com/v1", 36 | ) 37 | ) 38 | else: 39 | yield AsyncAnthropic() 40 | -------------------------------------------------------------------------------- /tests/llm/test_anthropic/test_reasoning.py: -------------------------------------------------------------------------------- 1 | import anthropic 2 | import pytest 3 | import instructor 4 | from pydantic import BaseModel 5 | 6 | 7 | class Answer(BaseModel): 8 | answer: float 9 | 10 | 11 | modes = [ 12 | instructor.Mode.ANTHROPIC_REASONING_TOOLS, 13 | instructor.Mode.ANTHROPIC_JSON, 14 | ] 15 | 16 | 17 | @pytest.mark.parametrize("mode", modes) 18 | def test_reasoning(mode): 19 | anthropic_client = anthropic.Anthropic() 20 | client = instructor.from_anthropic(anthropic_client, mode=mode) 21 | response = client.chat.completions.create( 22 | model="claude-3-7-sonnet-latest", 23 | response_model=Answer, 24 | messages=[ 25 | { 26 | "role": "user", 27 | "content": "Which is larger, 9.11 or 9.8", 28 | }, 29 | ], 30 | temperature=1, 31 | max_tokens=2000, 32 | thinking={"type": "enabled", "budget_tokens": 1024}, 33 | ) 34 | 35 | # Assertions to validate the response 36 | assert isinstance(response, Answer) 37 | assert response.answer == 9.8 38 | -------------------------------------------------------------------------------- /tests/llm/test_anthropic/util.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | models = ["claude-3-5-haiku-20241022"] 4 | modes = [ 5 | instructor.Mode.ANTHROPIC_TOOLS, 6 | ] 7 | -------------------------------------------------------------------------------- /tests/llm/test_cerebras/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_cerebras/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_cohere/conftest.py: -------------------------------------------------------------------------------- 1 | # conftest.py 2 | from cohere import Client, AsyncClient 3 | import pytest 4 | 5 | 6 | @pytest.fixture(scope="session") 7 | def client(): 8 | yield Client() 9 | 10 | 11 | @pytest.fixture(scope="session") 12 | def aclient(): 13 | yield AsyncClient() 14 | -------------------------------------------------------------------------------- /tests/llm/test_cohere/test_none_response.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from instructor import from_cohere 3 | 4 | 5 | def test_none_response_model(client): 6 | client = from_cohere(client, model_name="command-r", max_tokens=1000) 7 | 8 | response = client.messages.create( 9 | messages=[{"role": "user", "content": "Tell me about your day"}], 10 | response_model=None, 11 | temperature=0, 12 | ) 13 | 14 | assert response.text 15 | 16 | 17 | @pytest.mark.asyncio() 18 | async def test_none_response_model_async(aclient): 19 | async_client = from_cohere(aclient, model_name="command-r", max_tokens=1000) 20 | 21 | response = await async_client.messages.create( 22 | messages=[{"role": "user", "content": "Tell me about your day"}], 23 | response_model=None, 24 | temperature=0, 25 | ) 26 | 27 | assert response.text 28 | -------------------------------------------------------------------------------- /tests/llm/test_fireworks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_fireworks/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_fireworks/test_format.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | from fireworks.client import Fireworks, AsyncFireworks 3 | from pydantic import BaseModel 4 | import pytest 5 | from .util import modes 6 | 7 | 8 | @pytest.mark.parametrize("mode, model", modes) 9 | def test_fireworks_sync(mode: instructor.Mode, model: str): 10 | class User(BaseModel): 11 | name: str 12 | age: int 13 | 14 | client = instructor.from_fireworks(Fireworks(), mode=mode) 15 | 16 | resp = client.chat.completions.create( 17 | model=model, 18 | messages=[ 19 | { 20 | "role": "user", 21 | "content": "Extract a user from this sentence : {{ name }} is {{ age }} and lives in Singapore", 22 | }, 23 | ], 24 | context={ 25 | "name": "Ivan", 26 | "age": 27, 27 | }, 28 | response_model=User, 29 | ) 30 | 31 | assert resp.name.lower() == "ivan" 32 | assert resp.age == 27 33 | 34 | 35 | @pytest.mark.parametrize("mode, model", modes) 36 | @pytest.mark.asyncio 37 | async def test_fireworks_async(mode: instructor.Mode, model: str): 38 | class User(BaseModel): 39 | name: str 40 | age: int 41 | 42 | client = instructor.from_fireworks(AsyncFireworks(), mode=mode) 43 | 44 | resp = await client.chat.completions.create( 45 | model=model, 46 | messages=[ 47 | { 48 | "role": "user", 49 | "content": "Extract a user from this sentence : {{ name }} is {{ age }} and lives in Singapore", 50 | }, 51 | ], 52 | context={ 53 | "name": "Ivan", 54 | "age": 27, 55 | }, 56 | response_model=User, 57 | ) 58 | 59 | assert resp.name.lower() == "ivan" 60 | assert resp.age == 27 61 | -------------------------------------------------------------------------------- /tests/llm/test_fireworks/util.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | modes = [ 4 | (instructor.Mode.FIREWORKS_JSON, "accounts/fireworks/models/llama-v3-70b-instruct"), 5 | (instructor.Mode.FIREWORKS_TOOLS, "accounts/fireworks/models/firefunction-v1"), 6 | ] 7 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_gemini/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_gemini/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from google import generativeai as genai 4 | 5 | 6 | @pytest.fixture(scope="session", autouse=True) 7 | def configure_genai(): 8 | api_key = os.getenv("GOOGLE_API_KEY") 9 | if not api_key: 10 | pytest.skip("GOOGLE_API_KEY environment variable not set") 11 | genai.configure(api_key=api_key) 12 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/evals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_gemini/evals/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_gemini/evals/test_extract_users.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from itertools import product 3 | from pydantic import BaseModel 4 | import instructor 5 | import google.generativeai as genai 6 | from ..util import models, modes 7 | 8 | 9 | class UserDetails(BaseModel): 10 | name: str 11 | age: int 12 | 13 | 14 | # Lists for models, test data, and modes 15 | test_data = [ 16 | ("Jason is 10", "Jason", 10), 17 | ("Alice is 25", "Alice", 25), 18 | ("Bob is 35", "Bob", 35), 19 | ] 20 | 21 | 22 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes)) 23 | def test_extract(model, data, mode): 24 | sample_data, expected_name, expected_age = data 25 | 26 | client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode) 27 | 28 | # Calling the extract function with the provided model, sample data, and mode 29 | response = client.chat.completions.create( 30 | response_model=UserDetails, 31 | messages=[ 32 | {"role": "user", "content": sample_data}, 33 | ], 34 | ) 35 | 36 | # Assertions 37 | assert ( 38 | response.name == expected_name 39 | ), f"Expected name {expected_name}, got {response.name}" 40 | assert ( 41 | response.age == expected_age 42 | ), f"Expected age {expected_age}, got {response.age}" 43 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/evals/test_sentiment_analysis.py: -------------------------------------------------------------------------------- 1 | import enum 2 | from itertools import product 3 | from pydantic import BaseModel 4 | import pytest 5 | import instructor 6 | import google.generativeai as genai 7 | from ..util import models, modes 8 | 9 | 10 | class Sentiment(str, enum.Enum): 11 | POSITIVE = "positive" 12 | NEGATIVE = "negative" 13 | NEUTRAL = "neutral" 14 | 15 | 16 | class SentimentAnalysis(BaseModel): 17 | sentiment: Sentiment 18 | 19 | 20 | test_data = [ 21 | ( 22 | "I absolutely love this product! It has exceeded all my expectations.", 23 | Sentiment.POSITIVE, 24 | ), 25 | ( 26 | "The service was terrible. I will never use this company again.", 27 | Sentiment.NEGATIVE, 28 | ), 29 | ( 30 | "The movie was okay. It had some good moments but overall it was average.", 31 | Sentiment.NEUTRAL, 32 | ), 33 | ] 34 | 35 | 36 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes)) 37 | def test_sentiment_analysis(model, data, mode): 38 | sample_data, expected_sentiment = data 39 | 40 | client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode) 41 | 42 | response = client.chat.completions.create( 43 | response_model=SentimentAnalysis, 44 | messages=[ 45 | { 46 | "role": "system", 47 | "content": "You are a sentiment analysis model. Analyze the sentiment of the given text and provide the sentiment (positive, negative, or neutral).", 48 | }, 49 | {"role": "user", "content": sample_data}, 50 | ], 51 | ) 52 | 53 | assert response.sentiment == expected_sentiment 54 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/test_files/sample.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_gemini/test_files/sample.mp3 -------------------------------------------------------------------------------- /tests/llm/test_gemini/test_format.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | import google.generativeai as genai 3 | from pydantic import BaseModel 4 | from .util import models, modes 5 | 6 | 7 | class User(BaseModel): 8 | first_name: str 9 | age: int 10 | 11 | 12 | import pytest 13 | from itertools import product 14 | 15 | 16 | @pytest.mark.parametrize("model, mode, is_list", product(models, modes, [True, False])) 17 | def test_format_string(model: str, mode: instructor.Mode, is_list: bool): 18 | client = instructor.from_gemini( 19 | client=genai.GenerativeModel( 20 | model_name=model, 21 | system_instruction="You are a helpful assistant that excels at extracting user information.", 22 | ), 23 | mode=mode, 24 | ) 25 | 26 | content = ( 27 | ["Extract {{name}} is {{age}} years old."] 28 | if is_list 29 | else "Extract {{name}} is {{age}} years old." 30 | ) 31 | 32 | # note that client.chat.completions.create will also work 33 | resp = client.messages.create( 34 | messages=[ 35 | { 36 | "role": "user", 37 | "content": content, 38 | } 39 | ], 40 | response_model=User, 41 | context={"name": "Jason", "age": 25}, 42 | ) 43 | 44 | assert isinstance(resp, User) 45 | assert resp.first_name == "Jason" 46 | assert resp.age == 25 47 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/test_list_content.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | import google.generativeai as genai 3 | from pydantic import BaseModel 4 | 5 | 6 | class User(BaseModel): 7 | name: str 8 | age: int 9 | 10 | 11 | class UserList(BaseModel): 12 | items: list[User] 13 | 14 | 15 | def test_list_of_strings(): 16 | client = instructor.from_gemini( 17 | genai.GenerativeModel("gemini-1.5-flash-latest"), 18 | mode=instructor.Mode.GEMINI_JSON, 19 | ) 20 | 21 | users = [ 22 | { 23 | "name": "Jason", 24 | "age": 25, 25 | }, 26 | { 27 | "name": "Elizabeth", 28 | "age": 12, 29 | }, 30 | { 31 | "name": "Chris", 32 | "age": 27, 33 | }, 34 | ] 35 | 36 | prompt = """ 37 | Extract a list of users from the following text: 38 | 39 | {% for user in users %} 40 | - Name: {{ user.name }}, Age: {{ user.age }} 41 | {% endfor %} 42 | """ 43 | 44 | result = client.chat.completions.create( 45 | response_model=UserList, 46 | messages=[ 47 | {"role": "user", "content": prompt}, 48 | ], 49 | context={"users": users}, 50 | ) 51 | 52 | assert isinstance(result, UserList), "Result should be an instance of UserList" 53 | assert isinstance(result.items, list), "items should be a list" 54 | assert len(result.items) == 3, "List should contain 3 items" 55 | 56 | names = [item.name.upper() for item in result.items] 57 | assert "JASON" in names, "'JASON' should be in the list" 58 | assert "ELIZABETH" in names, "'ELIZABETH' should be in the list" 59 | assert "CHRIS" in names, "'CHRIS' should be in the list" 60 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/test_retries.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from pydantic import AfterValidator, BaseModel, Field 3 | import pytest 4 | import instructor 5 | from itertools import product 6 | import google.generativeai as genai 7 | 8 | from .util import models, modes 9 | 10 | 11 | def uppercase_validator(v): 12 | if v.islower(): 13 | raise ValueError("Name must be ALL CAPS") 14 | return v 15 | 16 | 17 | class UserDetail(BaseModel): 18 | name: Annotated[str, AfterValidator(uppercase_validator)] = Field( 19 | ..., description="The name of the user" 20 | ) 21 | age: int 22 | 23 | 24 | @pytest.mark.parametrize("model, mode", product(models, modes)) 25 | def test_upper_case(model, mode): 26 | client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode) 27 | response = client.chat.completions.create( 28 | response_model=UserDetail, 29 | messages=[ 30 | {"role": "user", "content": "Extract `jason is 12`"}, 31 | ], 32 | max_retries=3, 33 | ) 34 | assert response.name == "JASON" 35 | 36 | 37 | @pytest.mark.parametrize("model, mode", product(models, modes)) 38 | def test_upper_case_tenacity(model, mode): 39 | client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode) 40 | from tenacity import Retrying, stop_after_attempt, wait_fixed 41 | 42 | retries = Retrying( 43 | stop=stop_after_attempt(2), 44 | wait=wait_fixed(1), 45 | ) 46 | 47 | response = client.chat.completions.create( 48 | response_model=UserDetail, 49 | messages=[ 50 | {"role": "user", "content": "Extract `jason is 12`"}, 51 | ], 52 | max_retries=retries, 53 | ) 54 | assert response.name == "JASON" 55 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/test_roles.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | import google.generativeai as genai 3 | from pydantic import BaseModel 4 | 5 | roles = [ 6 | "system", 7 | "user", 8 | "assistant", 9 | ] 10 | 11 | 12 | def test_roles(): 13 | client = instructor.from_gemini( 14 | client=genai.GenerativeModel( 15 | model_name="models/gemini-1.5-flash-latest", 16 | ), 17 | mode=instructor.Mode.GEMINI_JSON, 18 | ) 19 | 20 | class Description(BaseModel): 21 | description: str 22 | 23 | for role in roles: 24 | resp = client.create( 25 | response_model=Description, 26 | messages=[ 27 | { 28 | "role": role, 29 | "content": "Describe what a sunset in the desert looks like.", 30 | }, 31 | { 32 | "role": "user", 33 | "content": "Please adhere to the instructions", 34 | }, 35 | ], 36 | ) 37 | 38 | assert isinstance(resp, Description) 39 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/test_simple_types.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | import enum 3 | 4 | import google.generativeai as genai 5 | from typing import Literal 6 | 7 | 8 | def test_literal(): 9 | client = instructor.from_gemini( 10 | genai.GenerativeModel("models/gemini-1.5-flash-latest") 11 | ) 12 | 13 | response = client.chat.completions.create( 14 | response_model=Literal["1231", "212", "331"], 15 | messages=[ 16 | { 17 | "role": "user", 18 | "content": "Produce a Random but correct response given the desired output", 19 | }, 20 | ], 21 | ) 22 | assert response in ["1231", "212", "331"] 23 | 24 | 25 | def test_enum(): 26 | class Options(enum.Enum): 27 | A = "A" 28 | B = "B" 29 | C = "C" 30 | 31 | client = instructor.from_gemini( 32 | genai.GenerativeModel("models/gemini-1.5-flash-latest") 33 | ) 34 | 35 | response = client.chat.completions.create( 36 | response_model=Options, 37 | messages=[ 38 | { 39 | "role": "user", 40 | "content": "Produce a Random but correct response given the desired output", 41 | }, 42 | ], 43 | ) 44 | assert response in [Options.A, Options.B, Options.C] 45 | 46 | 47 | def test_bool(): 48 | client = instructor.from_gemini( 49 | genai.GenerativeModel("models/gemini-1.5-flash-latest") 50 | ) 51 | 52 | response = client.chat.completions.create( 53 | response_model=bool, 54 | messages=[ 55 | { 56 | "role": "user", 57 | "content": "Produce a Random but correct response given the desired output", 58 | }, 59 | ], 60 | ) 61 | assert type(response) == bool 62 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/test_stream.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from collections.abc import Iterable 3 | from pydantic import BaseModel 4 | import pytest 5 | import instructor 6 | import google.generativeai as genai 7 | from instructor.dsl.partial import Partial 8 | 9 | from .util import models, modes 10 | 11 | 12 | class UserExtract(BaseModel): 13 | name: str 14 | age: int 15 | 16 | 17 | @pytest.mark.parametrize("model, mode, stream", product(models, modes, [True, False])) 18 | def test_iterable_model(model, mode, stream): 19 | client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode) 20 | model = client.chat.completions.create( 21 | response_model=Iterable[UserExtract], 22 | max_retries=2, 23 | stream=stream, 24 | messages=[ 25 | {"role": "user", "content": "Make two up people"}, 26 | ], 27 | ) 28 | for m in model: 29 | assert isinstance(m, UserExtract) 30 | 31 | 32 | @pytest.mark.parametrize("model,mode", product(models, modes)) 33 | def test_partial_model(model, mode): 34 | client = instructor.from_gemini(genai.GenerativeModel(model), mode=mode) 35 | model = client.chat.completions.create( 36 | response_model=Partial[UserExtract], 37 | max_retries=2, 38 | stream=True, 39 | messages=[ 40 | {"role": "user", "content": "{{ name }} is {{ age }} years old"}, 41 | ], 42 | context={"name": "Jason", "age": 12}, 43 | ) 44 | final_model = None 45 | for m in model: 46 | assert isinstance(m, UserExtract) 47 | final_model = m 48 | 49 | assert final_model.age == 12 50 | assert final_model.name == "Jason" 51 | -------------------------------------------------------------------------------- /tests/llm/test_gemini/util.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | models: list[str] = ["models/gemini-1.5-flash-8b"] 4 | modes = [instructor.Mode.GEMINI_TOOLS, instructor.Mode.GEMINI_JSON] 5 | -------------------------------------------------------------------------------- /tests/llm/test_genai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_genai/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_genai/conftest.py: -------------------------------------------------------------------------------- 1 | # conftest.py 2 | from google.genai import Client 3 | import pytest 4 | 5 | 6 | @pytest.fixture(scope="function") 7 | def client(): 8 | yield Client() 9 | 10 | 11 | @pytest.fixture(scope="function") 12 | def aclient(): 13 | yield Client() 14 | -------------------------------------------------------------------------------- /tests/llm/test_genai/test_retries.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from pydantic import AfterValidator, BaseModel, Field 3 | import pytest 4 | import instructor 5 | from itertools import product 6 | from .util import models, modes 7 | 8 | 9 | def uppercase_validator(v): 10 | if v.islower(): 11 | raise ValueError("Name must be ALL CAPS") 12 | return v 13 | 14 | 15 | class UserDetail(BaseModel): 16 | name: Annotated[str, AfterValidator(uppercase_validator)] = Field( 17 | ..., description="The name of the user" 18 | ) 19 | age: int 20 | 21 | 22 | @pytest.mark.parametrize("model, mode", product(models, modes)) 23 | def test_upper_case(model, mode, client): 24 | client = instructor.from_genai(client, mode=mode) 25 | response = client.chat.completions.create( 26 | model=model, 27 | response_model=UserDetail, 28 | messages=[ 29 | {"role": "user", "content": "Extract `jason is 12`"}, 30 | ], 31 | max_retries=3, 32 | ) 33 | assert response.name == "JASON" 34 | 35 | 36 | @pytest.mark.parametrize("model, mode", product(models, modes)) 37 | def test_upper_case_tenacity(model, mode, client): 38 | client = instructor.from_genai(client, mode=mode) 39 | from tenacity import Retrying, stop_after_attempt, wait_fixed 40 | 41 | retries = Retrying( 42 | stop=stop_after_attempt(2), 43 | wait=wait_fixed(1), 44 | ) 45 | 46 | response = client.chat.completions.create( 47 | model=model, 48 | response_model=UserDetail, 49 | messages=[ 50 | {"role": "user", "content": "Extract `jason is 12`"}, 51 | ], 52 | max_retries=retries, 53 | ) 54 | assert response.name == "JASON" 55 | -------------------------------------------------------------------------------- /tests/llm/test_genai/test_simple.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pydantic import BaseModel 3 | import instructor 4 | from .util import models, modes 5 | 6 | 7 | class User(BaseModel): 8 | name: str 9 | age: int 10 | 11 | 12 | class Users(BaseModel): 13 | users: list[User] 14 | 15 | 16 | @pytest.mark.parametrize("model", models) 17 | @pytest.mark.parametrize("mode", modes) 18 | def test_simple_extraction(client, model, mode): 19 | client = instructor.from_genai(client, mode=mode) 20 | response = client.chat.completions.create( 21 | model=model, 22 | messages=[ 23 | { 24 | "role": "user", 25 | "content": "Ivan is 28 years old", 26 | }, 27 | ], 28 | response_model=Users, 29 | ) 30 | assert isinstance(response, Users) 31 | assert len(response.users) > 0 32 | assert response.users[0].name == "Ivan" 33 | assert response.users[0].age == 28 34 | 35 | 36 | @pytest.mark.asyncio 37 | @pytest.mark.parametrize("model", models) 38 | @pytest.mark.parametrize("mode", modes) 39 | async def test_simple_extraction_async(aclient, model, mode): 40 | aclient = instructor.from_genai(aclient, mode=mode, use_async=True) 41 | response = await aclient.chat.completions.create( 42 | model=model, 43 | messages=[ 44 | { 45 | "role": "user", 46 | "content": "Ivan is 28 years old", 47 | }, 48 | ], 49 | response_model=Users, 50 | ) 51 | assert isinstance(response, Users) 52 | assert len(response.users) > 0 53 | assert response.users[0].name == "Ivan" 54 | assert response.users[0].age == 28 55 | -------------------------------------------------------------------------------- /tests/llm/test_genai/util.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | models = ["gemini-2.0-flash"] 4 | modes = [instructor.Mode.GENAI_STRUCTURED_OUTPUTS, instructor.Mode.GENAI_TOOLS] 5 | -------------------------------------------------------------------------------- /tests/llm/test_litellm.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | from litellm import acompletion, completion 3 | 4 | 5 | def test_litellm_create(): 6 | client = instructor.from_litellm(completion) 7 | 8 | assert isinstance(client, instructor.Instructor) 9 | 10 | 11 | def test_async_litellm_create(): 12 | client = instructor.from_litellm(acompletion) 13 | 14 | assert isinstance(client, instructor.AsyncInstructor) 15 | -------------------------------------------------------------------------------- /tests/llm/test_mistral/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_mistral/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_mistral/conftest.py: -------------------------------------------------------------------------------- 1 | # conftest.py 2 | import pytest 3 | import os 4 | from mistralai import Mistral 5 | 6 | 7 | @pytest.fixture(scope="function") 8 | def client(): 9 | yield Mistral(api_key=os.environ["MISTRAL_API_KEY"]) 10 | 11 | 12 | @pytest.fixture(scope="function") 13 | def aclient(): 14 | yield Mistral(api_key=os.environ["MISTRAL_API_KEY"]) 15 | -------------------------------------------------------------------------------- /tests/llm/test_mistral/test_multimodal.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pydantic import BaseModel 3 | import instructor 4 | from .util import modes, models 5 | 6 | pdf_url = "https://raw.githubusercontent.com/instructor-ai/instructor/main/tests/assets/invoice.pdf" 7 | 8 | 9 | class Invoice(BaseModel): 10 | total: float 11 | items: list[str] 12 | 13 | 14 | @pytest.mark.parametrize("mode", modes) 15 | @pytest.mark.parametrize("model", models) 16 | def test_mistral_retry_validation(client, model, mode): 17 | client = instructor.from_mistral(client, mode=mode) 18 | response = client.chat.completions.create( 19 | model=model, 20 | messages=[ 21 | { 22 | "role": "user", 23 | "content": [ 24 | "Extract information from the invoice.", 25 | instructor.multimodal.PDF.from_url(pdf_url), 26 | ], 27 | } 28 | ], 29 | response_model=Invoice, 30 | ) 31 | assert response.total == 220 32 | assert len(response.items) == 2 33 | 34 | 35 | @pytest.mark.parametrize("mode", modes) 36 | @pytest.mark.parametrize("model", models) 37 | @pytest.mark.asyncio 38 | async def test_mistral_retry_validation_async(client, model, mode): 39 | client = instructor.from_mistral(client, mode=mode, use_async=True) 40 | response = await client.chat.completions.create( 41 | model=model, 42 | messages=[ 43 | { 44 | "role": "user", 45 | "content": [ 46 | "Extract information from the invoice.", 47 | instructor.multimodal.PDF.from_url(pdf_url), 48 | ], 49 | } 50 | ], 51 | response_model=Invoice, 52 | ) 53 | assert response.total == 220 54 | assert len(response.items) == 2 55 | -------------------------------------------------------------------------------- /tests/llm/test_mistral/test_retries.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pydantic import BaseModel, field_validator 3 | import instructor 4 | from .util import modes, models 5 | 6 | 7 | class User(BaseModel): 8 | name: str 9 | age: int 10 | 11 | @field_validator("age") 12 | def validate_age(cls, v): 13 | if v > 0: 14 | raise ValueError( 15 | "Age must be expressed as a negative number (Eg. 25 is -25 )" 16 | ) 17 | return v 18 | 19 | 20 | @pytest.mark.parametrize("mode", modes) 21 | @pytest.mark.parametrize("model", models) 22 | def test_mistral_retry_validation(client, model, mode): 23 | patched_client = instructor.from_mistral(client, mode=mode) 24 | 25 | # Test extracting structured data with validation that should trigger retry 26 | response = patched_client.chat.completions.create( 27 | model=model, 28 | messages=[{"role": "user", "content": "Ivan is 25 years old"}], 29 | response_model=User, 30 | ) 31 | 32 | # Validate response has correct negative age after retry 33 | assert isinstance(response, User) 34 | assert response.name == "Ivan" 35 | assert response.age == -25 36 | 37 | 38 | @pytest.mark.asyncio 39 | @pytest.mark.parametrize("mode", modes) 40 | @pytest.mark.parametrize("model", models) 41 | async def test_mistral_retry_validation_async(aclient, model, mode): 42 | patched_client = instructor.from_mistral(aclient, mode=mode, use_async=True) 43 | 44 | # Test extracting structured data with validation that should trigger retry 45 | response = await patched_client.chat.completions.create( 46 | model=model, 47 | messages=[{"role": "user", "content": "Jack is 28 years old"}], 48 | response_model=User, 49 | ) 50 | 51 | # Validate response has correct negative age after retry 52 | assert isinstance(response, User) 53 | assert response.name == "Jack" 54 | assert response.age == -28 55 | -------------------------------------------------------------------------------- /tests/llm/test_mistral/util.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | models: list[str] = ["ministral-8b-latest"] 4 | modes = [instructor.Mode.MISTRAL_STRUCTURED_OUTPUTS, instructor.Mode.MISTRAL_TOOLS] 5 | -------------------------------------------------------------------------------- /tests/llm/test_openai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_openai/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_openai/conftest.py: -------------------------------------------------------------------------------- 1 | # conftest.py 2 | from openai import AsyncOpenAI, OpenAI 3 | import pytest 4 | import os 5 | 6 | try: 7 | import braintrust 8 | 9 | wrap_openai = braintrust.wrap_openai 10 | except ImportError: 11 | 12 | def wrap_openai(x): 13 | return x 14 | 15 | 16 | @pytest.fixture(scope="function") 17 | def client(): 18 | if os.environ.get("BRAINTRUST_API_KEY"): 19 | yield wrap_openai( 20 | OpenAI( 21 | api_key=os.environ["BRAINTRUST_API_KEY"], 22 | base_url="https://braintrustproxy.com/v1", 23 | ) 24 | ) 25 | else: 26 | yield OpenAI() 27 | 28 | 29 | @pytest.fixture(scope="function") 30 | def aclient(): 31 | if os.environ.get("BRAINTRUST_API_KEY"): 32 | yield wrap_openai( 33 | AsyncOpenAI( 34 | api_key=os.environ["BRAINTRUST_API_KEY"], 35 | base_url="https://braintrustproxy.com/v1", 36 | ) 37 | ) 38 | else: 39 | yield AsyncOpenAI() 40 | -------------------------------------------------------------------------------- /tests/llm/test_openai/docs/test_concepts.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pytest_examples import find_examples, CodeExample, EvalExample 3 | 4 | 5 | @pytest.mark.parametrize("example", find_examples("docs/concepts"), ids=str) 6 | def test_format_concepts(example: CodeExample, eval_example: EvalExample): 7 | if eval_example.update_examples: 8 | eval_example.format(example) 9 | eval_example.run_print_update(example) 10 | else: 11 | eval_example.lint(example) 12 | eval_example.run(example) 13 | -------------------------------------------------------------------------------- /tests/llm/test_openai/docs/test_docs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pytest_examples import find_examples, CodeExample, EvalExample 3 | 4 | 5 | @pytest.mark.parametrize("example", find_examples("README.md"), ids=str) 6 | def test_readme(example: CodeExample, eval_example: EvalExample): 7 | if eval_example.update_examples: 8 | eval_example.format(example) 9 | else: 10 | eval_example.lint(example) 11 | 12 | 13 | @pytest.mark.parametrize("example", find_examples("docs/index.md"), ids=str) 14 | def test_index(example: CodeExample, eval_example: EvalExample): 15 | if eval_example.update_examples: 16 | eval_example.format(example) 17 | else: 18 | eval_example.lint(example) 19 | -------------------------------------------------------------------------------- /tests/llm/test_openai/docs/test_examples.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pytest_examples import find_examples, CodeExample, EvalExample 3 | import glob 4 | import os 5 | 6 | exclusions = ["ollama.md", "watsonx.md", "local_classification.md"] 7 | 8 | markdown_files = [ 9 | file 10 | for file in glob.glob("docs/examples/*.md") 11 | if os.path.basename(file) not in exclusions 12 | ] 13 | 14 | code_examples = [] 15 | 16 | for markdown_file in markdown_files: 17 | code_examples.extend(find_examples(markdown_file)) 18 | 19 | 20 | @pytest.mark.parametrize("example", code_examples, ids=str) 21 | def test_index(example: CodeExample, eval_example: EvalExample): 22 | if eval_example.update_examples: 23 | eval_example.format(example) 24 | eval_example.run_print_update(example) 25 | else: 26 | eval_example.lint(example) 27 | -------------------------------------------------------------------------------- /tests/llm/test_openai/docs/test_hub.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pytest_examples import CodeExample, EvalExample 3 | 4 | 5 | @pytest.mark.skip(reason="Hub functionality is being removed") 6 | def test_format_blog(example: CodeExample, eval_example: EvalExample) -> None: 7 | """This test is being skipped as the hub functionality is being removed.""" 8 | excluded_sources: list[str] = [ 9 | "mistral", 10 | "ollama", 11 | "llama_cpp", 12 | "groq", 13 | "youtube", 14 | "contact", 15 | "langsmith", 16 | ] # sources that are not supported in testing 17 | if any(source in example.source for source in excluded_sources): 18 | return 19 | 20 | if eval_example.update_examples: 21 | eval_example.format(example) 22 | eval_example.run_print_update(example) 23 | else: 24 | eval_example.lint(example) 25 | eval_example.run(example) 26 | -------------------------------------------------------------------------------- /tests/llm/test_openai/docs/test_mkdocs.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pytest 3 | 4 | 5 | # Note the use of `str`, makes for pretty output 6 | @pytest.mark.parametrize( 7 | "fpath", pathlib.Path("docs/examples").glob("**/*.md"), ids=str 8 | ) 9 | @pytest.mark.skip(reason="This test is not yet implemented") 10 | def test_files_good(fpath): 11 | from mktestdocs import check_md_file 12 | 13 | check_md_file(fpath=fpath, memory=True) 14 | -------------------------------------------------------------------------------- /tests/llm/test_openai/docs/test_posts.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pytest_examples import find_examples, CodeExample, EvalExample 3 | 4 | 5 | @pytest.mark.parametrize("example", find_examples("docs/blog/posts"), ids=str) 6 | def test_index(example: CodeExample, eval_example: EvalExample): 7 | if eval_example.update_examples: 8 | eval_example.format(example) 9 | eval_example.run_print_update(example) 10 | else: 11 | eval_example.lint(example) 12 | -------------------------------------------------------------------------------- /tests/llm/test_openai/docs/test_prompt_tips.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pytest_examples import find_examples, CodeExample, EvalExample 3 | 4 | 5 | @pytest.mark.parametrize("example", find_examples("docs/prompting"), ids=str) 6 | @pytest.mark.skip(reason="Skipping this for now") 7 | def test_format_concepts(example: CodeExample, eval_example: EvalExample): 8 | if eval_example.update_examples: 9 | eval_example.format(example) 10 | # eval_example.run_print_update(example) 11 | else: 12 | eval_example.lint(example) 13 | # eval_example.run(example) 14 | -------------------------------------------------------------------------------- /tests/llm/test_openai/evals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_openai/evals/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_openai/evals/readme.md: -------------------------------------------------------------------------------- 1 | # How to Contribute: Writing and Running Evaluation Tests 2 | 3 | We welcome contributors to expand our suite of evaluation tests for data extraction. This guide provides instructions on creating tests with `pytest`, `pydantic`, and other tools, focusing on broad coverage and failure modalities understanding. 4 | 5 | ## Define Test Scenarios 6 | 7 | Identify data extraction scenarios relevant to you. Create test cases with inputs and expected outputs. 8 | 9 | Reference the `test_extract_users.py` which contains a test case for extracting users, using all models and all modes. The test case is parameterized with the model and mode, and the test function is parameterized with the input and expected output. 10 | -------------------------------------------------------------------------------- /tests/llm/test_openai/evals/test_extract_users.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from itertools import product 3 | from pydantic import BaseModel 4 | import instructor 5 | from instructor.function_calls import Mode 6 | from ..util import models, modes 7 | 8 | 9 | class UserDetails(BaseModel): 10 | name: str 11 | age: int 12 | 13 | 14 | # Lists for models, test data, and modes 15 | test_data = [ 16 | ("Jason is 10", "Jason", 10), 17 | ("Alice is 25", "Alice", 25), 18 | ("Bob is 35", "Bob", 35), 19 | ] 20 | 21 | 22 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes)) 23 | def test_extract(model, data, mode, client): 24 | sample_data, expected_name, expected_age = data 25 | 26 | if (mode, model) in { 27 | (Mode.JSON, "gpt-3.5-turbo"), 28 | (Mode.JSON, "gpt-4"), 29 | }: 30 | pytest.skip(f"{mode} mode is not supported for {model}, skipping test") 31 | 32 | # Setting up the client with the instructor patch 33 | client = instructor.from_openai(client, mode=mode) 34 | 35 | # Calling the extract function with the provided model, sample data, and mode 36 | response = client.chat.completions.create( 37 | model=model, 38 | response_model=UserDetails, 39 | messages=[ 40 | {"role": "user", "content": sample_data}, 41 | ], 42 | ) 43 | 44 | # Assertions 45 | assert ( 46 | response.name == expected_name 47 | ), f"Expected name {expected_name}, got {response.name}" 48 | assert ( 49 | response.age == expected_age 50 | ), f"Expected age {expected_age}, got {response.age}" 51 | -------------------------------------------------------------------------------- /tests/llm/test_openai/evals/test_sentiment_analysis.py: -------------------------------------------------------------------------------- 1 | import enum 2 | from itertools import product 3 | from pydantic import BaseModel 4 | import pytest 5 | import instructor 6 | from instructor.function_calls import Mode 7 | from ..util import models, modes 8 | 9 | 10 | class Sentiment(str, enum.Enum): 11 | POSITIVE = "positive" 12 | NEGATIVE = "negative" 13 | NEUTRAL = "neutral" 14 | 15 | 16 | class SentimentAnalysis(BaseModel): 17 | sentiment: Sentiment 18 | 19 | 20 | test_data = [ 21 | ( 22 | "I absolutely love this product! It has exceeded all my expectations.", 23 | Sentiment.POSITIVE, 24 | ), 25 | ( 26 | "The service was terrible. I will never use this company again.", 27 | Sentiment.NEGATIVE, 28 | ), 29 | ( 30 | "The movie was okay. It had some good moments but overall it was average.", 31 | Sentiment.NEUTRAL, 32 | ), 33 | ] 34 | 35 | 36 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes)) 37 | def test_sentiment_analysis(model, data, mode, client): 38 | sample_data, expected_sentiment = data 39 | 40 | if (mode, model) in { 41 | (Mode.JSON, "gpt-3.5-turbo"), 42 | (Mode.JSON, "gpt-4"), 43 | }: 44 | pytest.skip(f"{mode} mode is not supported for {model}, skipping test") 45 | 46 | client = instructor.from_openai(client, mode=mode) 47 | 48 | response = client.chat.completions.create( 49 | model=model, 50 | response_model=SentimentAnalysis, 51 | messages=[ 52 | { 53 | "role": "system", 54 | "content": "You are a sentiment analysis model. Analyze the sentiment of the given text and provide the sentiment (positive, negative, or neutral).", 55 | }, 56 | {"role": "user", "content": sample_data}, 57 | ], 58 | ) 59 | 60 | assert response.sentiment == expected_sentiment 61 | -------------------------------------------------------------------------------- /tests/llm/test_openai/test_attr.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | import openai 3 | import pytest 4 | 5 | 6 | def test_has_embedding(): 7 | oai = openai.OpenAI() 8 | client = instructor.from_openai(oai) 9 | 10 | embedding = client.embeddings.create( 11 | input="Hello world", model="text-embedding-3-small" 12 | ) 13 | assert embedding is not None, "The 'embeddings' attribute is None." 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_has_embedding_async(): 18 | oai = openai.AsyncOpenAI() 19 | client = instructor.from_openai(oai) 20 | 21 | # Check if the 'embeddings' attribute can be accessed through the client 22 | embedding = await client.embeddings.create( 23 | input="Hello world", model="text-embedding-3-small" 24 | ) 25 | assert embedding is not None, "The 'embeddings' attribute is None." 26 | -------------------------------------------------------------------------------- /tests/llm/test_openai/util.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | models = ["gpt-4o-mini"] 4 | modes = [ 5 | instructor.Mode.TOOLS, 6 | instructor.Mode.TOOLS_STRICT, 7 | instructor.Mode.RESPONSES_TOOLS, 8 | instructor.Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS, 9 | ] 10 | -------------------------------------------------------------------------------- /tests/llm/test_perplexity/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_perplexity/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_perplexity/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from openai import OpenAI 5 | 6 | 7 | @pytest.fixture(scope="session") 8 | def client(): 9 | if os.environ.get("PERPLEXITY_API_KEY"): 10 | yield OpenAI( 11 | api_key=os.environ["PERPLEXITY_API_KEY"], 12 | base_url="https://api.perplexity.ai", 13 | ) 14 | -------------------------------------------------------------------------------- /tests/llm/test_perplexity/util.py: -------------------------------------------------------------------------------- 1 | from instructor import Mode 2 | 3 | models = ["sonar", "sonar-pro"] 4 | modes = [Mode.PERPLEXITY_JSON] 5 | -------------------------------------------------------------------------------- /tests/llm/test_vertexai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_vertexai/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_vertexai/test_deprecated_async.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest.mock import patch, MagicMock 3 | from pydantic import BaseModel 4 | from instructor.client_vertexai import from_vertexai 5 | from instructor.exceptions import ConfigurationError 6 | 7 | class User(BaseModel): 8 | name: str 9 | age: int 10 | 11 | @patch('instructor.client_vertexai.isinstance', return_value=True) 12 | def test_deprecated_async_warning(_): 13 | """Test that using _async parameter raises a deprecation warning.""" 14 | mock_model = MagicMock() 15 | mock_model.generate_content = MagicMock() 16 | mock_model.generate_content_async = MagicMock() 17 | 18 | with pytest.warns(DeprecationWarning, match="'_async' is deprecated. Use 'use_async' instead."): 19 | client = from_vertexai( 20 | mock_model, 21 | _async=True 22 | ) 23 | 24 | @patch('instructor.client_vertexai.isinstance', return_value=True) 25 | def test_both_async_params_error(_): 26 | """Test that providing both _async and use_async raises an error.""" 27 | mock_model = MagicMock() 28 | mock_model.generate_content = MagicMock() 29 | mock_model.generate_content_async = MagicMock() 30 | 31 | with pytest.raises(ConfigurationError, match="Cannot provide both '_async' and 'use_async'. Use 'use_async' instead."): 32 | client = from_vertexai( 33 | mock_model, 34 | _async=True, 35 | use_async=True 36 | ) 37 | -------------------------------------------------------------------------------- /tests/llm/test_vertexai/test_format.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | from pydantic import BaseModel 3 | from .util import models, modes 4 | import pytest 5 | from itertools import product 6 | import vertexai.generative_models as gm 7 | 8 | 9 | class User(BaseModel): 10 | name: str 11 | age: int 12 | 13 | 14 | @pytest.mark.parametrize("model, mode, is_list", product(models, modes, [True, False])) 15 | def test_format_string(model, mode, is_list): 16 | client = instructor.from_vertexai( 17 | gm.GenerativeModel(model), 18 | mode=mode, 19 | ) 20 | 21 | content = ( 22 | [gm.Part.from_text("Extract {{name}} is {{age}} years old.")] 23 | if is_list 24 | else "Extract {{name}} is {{age}} years old." 25 | ) 26 | 27 | # note that client.chat.completions.create will also work 28 | resp = client.messages.create( 29 | messages=[ 30 | { 31 | "role": "user", 32 | "content": content, 33 | } 34 | ], 35 | response_model=User, 36 | context={"name": "Jason", "age": 25}, 37 | ) 38 | 39 | assert isinstance(resp, User) 40 | assert resp.name == "Jason" 41 | assert resp.age == 25 42 | -------------------------------------------------------------------------------- /tests/llm/test_vertexai/test_message_parser.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import vertexai.generative_models as gm 3 | from instructor.client_vertexai import vertexai_message_parser 4 | 5 | 6 | def test_vertexai_message_parser_string_content(): 7 | message = {"role": "user", "content": "Hello, world!"} 8 | result = vertexai_message_parser(message) 9 | 10 | assert isinstance(result, gm.Content) 11 | assert result.role == "user" 12 | assert len(result.parts) == 1 13 | assert isinstance(result.parts[0], gm.Part) 14 | assert result.parts[0].text == "Hello, world!" 15 | 16 | 17 | def test_vertexai_message_parser_list_content(): 18 | message = { 19 | "role": "user", 20 | "content": [ 21 | "Hello, ", 22 | gm.Part.from_text("world!"), 23 | gm.Part.from_text(" How are you?"), 24 | ], 25 | } 26 | result = vertexai_message_parser(message) 27 | 28 | assert isinstance(result, gm.Content) 29 | assert result.role == "user" 30 | assert len(result.parts) == 3 31 | assert isinstance(result.parts[0], gm.Part) 32 | assert isinstance(result.parts[1], gm.Part) 33 | assert isinstance(result.parts[2], gm.Part) 34 | assert result.parts[0].text == "Hello, " 35 | assert result.parts[1].text == "world!" 36 | assert result.parts[2].text == " How are you?" 37 | 38 | 39 | def test_vertexai_message_parser_invalid_content(): 40 | message = {"role": "user", "content": 123} # Invalid content type 41 | 42 | with pytest.raises(ValueError, match="Unsupported message content type"): 43 | vertexai_message_parser(message) 44 | 45 | 46 | def test_vertexai_message_parser_invalid_list_item(): 47 | message = {"role": "user", "content": ["Hello", 123, gm.Part.from_text("world!")]} 48 | 49 | with pytest.raises(ValueError, match="Unsupported content type in list"): 50 | vertexai_message_parser(message) 51 | -------------------------------------------------------------------------------- /tests/llm/test_vertexai/test_retries.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from typing import Annotated, cast 3 | from pydantic import AfterValidator, BaseModel, Field 4 | import pytest 5 | import instructor 6 | import vertexai.generative_models as gm # type: ignore 7 | 8 | from .util import models, modes 9 | 10 | 11 | def uppercase_validator(v: str): 12 | if v.islower(): 13 | raise ValueError("Name must be ALL CAPS") 14 | return v 15 | 16 | 17 | class UserDetail(BaseModel): 18 | name: Annotated[str, AfterValidator(uppercase_validator)] = Field( 19 | ..., description="The name of the user" 20 | ) 21 | age: int 22 | 23 | 24 | @pytest.mark.parametrize("model, mode", product(models, modes)) 25 | def test_upper_case(model, mode): 26 | client = instructor.from_vertexai(gm.GenerativeModel(model), mode) 27 | response = client.create( 28 | response_model=UserDetail, 29 | messages=[ 30 | {"role": "user", "content": "Extract `jason is 12`"}, 31 | ], 32 | max_retries=3, 33 | ) 34 | assert response.name == "JASON" 35 | 36 | 37 | @pytest.mark.parametrize("model, mode", product(models, modes)) 38 | def test_upper_case_tenacity(model, mode): 39 | client = instructor.from_vertexai(gm.GenerativeModel(model), mode) 40 | from tenacity import Retrying, stop_after_attempt, wait_fixed 41 | 42 | retries = Retrying( 43 | stop=stop_after_attempt(2), 44 | wait=wait_fixed(1), 45 | ) 46 | 47 | retries = cast(int, retries) 48 | 49 | response = client.create( 50 | response_model=UserDetail, 51 | messages=[ 52 | {"role": "user", "content": "Extract `jason is 12`"}, 53 | ], 54 | max_retries=retries, 55 | ) 56 | assert response.name == "JASON" 57 | -------------------------------------------------------------------------------- /tests/llm/test_vertexai/test_simple_types.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | import pytest 3 | import enum 4 | import vertexai.generative_models as gm # type: ignore 5 | from itertools import product 6 | from typing import Literal 7 | 8 | from .util import models, modes 9 | 10 | 11 | @pytest.mark.parametrize("model, mode", product(models, modes)) 12 | def test_literal(model, mode): 13 | client = instructor.from_vertexai(gm.GenerativeModel(model), mode) 14 | 15 | response = client.create( 16 | response_model=Literal["1231", "212", "331"], 17 | messages=[ 18 | { 19 | "role": "user", 20 | "content": "Produce a Random but correct response given the desired output", 21 | }, 22 | ], 23 | ) 24 | assert response in ["1231", "212", "331"] 25 | 26 | 27 | @pytest.mark.parametrize("model, mode", product(models, modes)) 28 | def test_enum(model, mode): 29 | class Options(enum.Enum): 30 | A = "A" 31 | B = "B" 32 | C = "C" 33 | 34 | client = instructor.from_vertexai(gm.GenerativeModel(model), mode) 35 | 36 | response = client.create( 37 | response_model=Options, 38 | messages=[ 39 | { 40 | "role": "user", 41 | "content": "Produce a Random but correct response given the desired output", 42 | }, 43 | ], 44 | ) 45 | assert response in [Options.A, Options.B, Options.C] 46 | 47 | 48 | @pytest.mark.parametrize("model, mode", product(models, modes)) 49 | def test_bool(model, mode): 50 | client = instructor.from_vertexai(gm.GenerativeModel(model), mode) 51 | 52 | response = client.create( 53 | response_model=bool, 54 | messages=[ 55 | { 56 | "role": "user", 57 | "content": "Produce a Random but correct response given the desired output", 58 | }, 59 | ], 60 | ) 61 | assert type(response) == bool 62 | -------------------------------------------------------------------------------- /tests/llm/test_vertexai/util.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | models = ["gemini-2.0-flash"] 4 | modes = [instructor.Mode.VERTEXAI_TOOLS, instructor.Mode.VERTEXAI_JSON] 5 | -------------------------------------------------------------------------------- /tests/llm/test_writer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_writer/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_writer/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | 5 | @pytest.fixture(scope="session", autouse=True) 6 | def configure_writer(): 7 | api_key = os.getenv("WRITER_API_KEY") 8 | if not api_key: 9 | pytest.skip("WRITER_API_KEY environment variable not set") 10 | -------------------------------------------------------------------------------- /tests/llm/test_writer/evals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/567-labs/instructor/ca2952270527e2c987f6eee7425fb2f0abcdabda/tests/llm/test_writer/evals/__init__.py -------------------------------------------------------------------------------- /tests/llm/test_writer/evals/test_extract_users.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from itertools import product 3 | from pydantic import BaseModel 4 | from writerai import Writer 5 | import instructor 6 | from ..util import models, modes 7 | 8 | 9 | class UserDetails(BaseModel): 10 | first_name: str 11 | age: int 12 | 13 | 14 | test_data = [ 15 | ("Jason is 10", "Jason", 10), 16 | ("Alice is 25", "Alice", 25), 17 | ("Bob is 35", "Bob", 35), 18 | ] 19 | 20 | 21 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes)) 22 | def test_writer_extract( 23 | model: str, data: list[tuple[str, str, int]], mode: instructor.Mode 24 | ): 25 | client = instructor.from_writer(client=Writer(), mode=mode) 26 | 27 | sample_data, expected_name, expected_age = data 28 | 29 | response = client.chat.completions.create( 30 | model=model, 31 | response_model=UserDetails, 32 | messages=[ 33 | {"role": "user", "content": sample_data}, 34 | ], 35 | ) 36 | 37 | assert ( 38 | response.first_name == expected_name 39 | ), f"Expected name {expected_name}, got {response.first_name}" 40 | assert ( 41 | response.age == expected_age 42 | ), f"Expected age {expected_age}, got {response.age}" 43 | -------------------------------------------------------------------------------- /tests/llm/test_writer/evals/test_sentiment_analysis.py: -------------------------------------------------------------------------------- 1 | import enum 2 | from itertools import product 3 | 4 | from pydantic import BaseModel 5 | from writerai import Writer 6 | import pytest 7 | import instructor 8 | from ..util import models, modes 9 | 10 | 11 | class Sentiment(str, enum.Enum): 12 | POSITIVE = "positive" 13 | NEGATIVE = "negative" 14 | NEUTRAL = "neutral" 15 | 16 | 17 | class SentimentAnalysis(BaseModel): 18 | sentiment: Sentiment 19 | 20 | 21 | test_data = [ 22 | ( 23 | "I absolutely love this product! It has exceeded all my expectations.", 24 | Sentiment.POSITIVE, 25 | ), 26 | ( 27 | "The service was terrible. I will never use this company again.", 28 | Sentiment.NEGATIVE, 29 | ), 30 | ( 31 | "The movie was okay. It had some good moments but overall it was average.", 32 | Sentiment.NEUTRAL, 33 | ), 34 | ] 35 | 36 | 37 | @pytest.mark.parametrize("model, data, mode", product(models, test_data, modes)) 38 | def test_writer_sentiment_analysis( 39 | model: str, data: list[tuple[str, Sentiment]], mode: instructor.Mode 40 | ): 41 | client = instructor.from_writer(client=Writer(), mode=mode) 42 | 43 | sample_data, expected_sentiment = data 44 | 45 | response = client.chat.completions.create( 46 | model=model, 47 | response_model=SentimentAnalysis, 48 | messages=[ 49 | { 50 | "role": "system", 51 | "content": "You are a sentiment analysis model. Analyze the sentiment of the given text and provide the sentiment (positive, negative, or neutral).", 52 | }, 53 | {"role": "user", "content": sample_data}, 54 | ], 55 | ) 56 | 57 | assert response.sentiment == expected_sentiment 58 | -------------------------------------------------------------------------------- /tests/llm/test_writer/util.py: -------------------------------------------------------------------------------- 1 | import instructor 2 | 3 | models: list[str] = ["palmyra-x4", "palmyra-x5"] 4 | modes = [instructor.Mode.WRITER_TOOLS, instructor.Mode.WRITER_JSON] 5 | -------------------------------------------------------------------------------- /tests/test_dynamic_model_creation.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, create_model, Field 2 | from instructor import openai_schema 3 | 4 | 5 | def test_dynamic_model_creation_with_field_description(): 6 | """ 7 | Test that dynamic model creation with Field(description) works correctly. 8 | This verifies the example in the documentation at docs/concepts/models.md. 9 | """ 10 | types = { 11 | 'string': str, 12 | 'integer': int, 13 | 'email': str, 14 | } 15 | 16 | mock_cursor = [ 17 | ('name', 'string', 'The name of the user.'), 18 | ('age', 'integer', 'The age of the user.'), 19 | ('email', 'email', 'The email of the user.'), 20 | ] 21 | 22 | DynamicModel = create_model( 23 | 'User', 24 | **{ 25 | property_name: (types[property_type], Field(description=description)) 26 | for property_name, property_type, description in mock_cursor 27 | }, 28 | __base__=BaseModel, 29 | ) 30 | 31 | schema = DynamicModel.model_json_schema() 32 | 33 | assert schema['properties']['name']['description'] == 'The name of the user.' 34 | assert schema['properties']['age']['description'] == 'The age of the user.' 35 | assert schema['properties']['email']['description'] == 'The email of the user.' 36 | 37 | assert 'default' not in schema['properties']['name'] 38 | assert 'default' not in schema['properties']['age'] 39 | assert 'default' not in schema['properties']['email'] 40 | 41 | OpenAISchemaModel = openai_schema(DynamicModel) 42 | openai_schema_json = OpenAISchemaModel.model_json_schema() 43 | 44 | assert openai_schema_json['properties']['name']['description'] == 'The name of the user.' 45 | assert openai_schema_json['properties']['age']['description'] == 'The age of the user.' 46 | assert openai_schema_json['properties']['email']['description'] == 'The email of the user.' 47 | -------------------------------------------------------------------------------- /tests/test_fizzbuzz_fix.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | from instructor.dsl.simple_type import is_simple_type 4 | from instructor.process_response import prepare_response_model 5 | 6 | 7 | class TestFizzbuzzFix(unittest.TestCase): 8 | def test_fizzbuzz_response_model(self): 9 | if sys.version_info < (3, 10): 10 | self.skipTest("Union pipe syntax is only available in Python 3.10+") 11 | """Test that list[int | str] works correctly as a response model.""" 12 | # This is the type used in the fizzbuzz example 13 | response_model = list[int | str] 14 | 15 | # First check that it's correctly identified as a simple type 16 | self.assertTrue( 17 | is_simple_type(response_model), 18 | f"list[int | str] should be a simple type in Python {sys.version_info.major}.{sys.version_info.minor}", 19 | ) 20 | 21 | # Then check that prepare_response_model handles it correctly 22 | prepared_model = prepare_response_model(response_model) 23 | self.assertIsNotNone( 24 | prepared_model, 25 | "prepare_response_model should not return None for list[int | str]", 26 | ) 27 | -------------------------------------------------------------------------------- /tests/test_multitask.py: -------------------------------------------------------------------------------- 1 | from instructor import OpenAISchema 2 | from instructor.dsl import IterableModel 3 | 4 | 5 | def test_multi_task(): 6 | class Search(OpenAISchema): 7 | """This is the search docstring""" 8 | 9 | id: int 10 | query: str 11 | 12 | IterableSearch = IterableModel(Search) 13 | assert IterableSearch.openai_schema["name"] == "IterableSearch" 14 | assert ( 15 | IterableSearch.openai_schema["description"] 16 | == "Correct segmentation of `Search` tasks" 17 | ) 18 | -------------------------------------------------------------------------------- /tests/test_patch.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from openai import AsyncOpenAI, OpenAI 4 | 5 | import instructor 6 | from instructor.utils import is_async 7 | 8 | 9 | def test_patch_completes_successfully(): 10 | instructor.patch(OpenAI()) 11 | 12 | 13 | def test_apatch_completes_successfully(): 14 | instructor.apatch(AsyncOpenAI()) 15 | 16 | 17 | def test_is_async_returns_true_if_function_is_async(): 18 | async def async_function(): 19 | pass 20 | 21 | assert is_async(async_function) is True 22 | 23 | 24 | def test_is_async_returns_false_if_function_is_not_async(): 25 | def sync_function(): 26 | pass 27 | 28 | assert is_async(sync_function) is False 29 | 30 | 31 | def test_is_async_returns_true_if_wrapped_function_is_async(): 32 | async def async_function(): 33 | pass 34 | 35 | @functools.wraps(async_function) 36 | def wrapped_function(): 37 | pass 38 | 39 | assert is_async(wrapped_function) is True 40 | 41 | 42 | def test_is_async_returns_true_if_double_wrapped_function_is_async(): 43 | async def async_function(): 44 | pass 45 | 46 | @functools.wraps(async_function) 47 | def wrapped_function(): 48 | pass 49 | 50 | @functools.wraps(wrapped_function) 51 | def double_wrapped_function(): 52 | pass 53 | 54 | assert is_async(double_wrapped_function) is True 55 | 56 | 57 | def test_is_async_returns_true_if_triple_wrapped_function_is_async(): 58 | async def async_function(): 59 | pass 60 | 61 | @functools.wraps(async_function) 62 | def wrapped_function(): 63 | pass 64 | 65 | @functools.wraps(wrapped_function) 66 | def double_wrapped_function(): 67 | pass 68 | 69 | @functools.wraps(double_wrapped_function) 70 | def triple_wrapped_function(): 71 | pass 72 | 73 | assert is_async(triple_wrapped_function) is True 74 | -------------------------------------------------------------------------------- /tests/test_process_response.py: -------------------------------------------------------------------------------- 1 | from typing_extensions import TypedDict 2 | from pydantic import BaseModel 3 | from instructor.process_response import handle_response_model 4 | 5 | 6 | def test_typed_dict_conversion() -> None: 7 | class User(TypedDict): # type: ignore 8 | name: str 9 | age: int 10 | 11 | _, user_tool_definition = handle_response_model(User) 12 | 13 | class User(BaseModel): 14 | name: str 15 | age: int 16 | 17 | _, pydantic_user_tool_definition = handle_response_model(User) 18 | assert user_tool_definition == pydantic_user_tool_definition 19 | -------------------------------------------------------------------------------- /tests/test_response_model_conversion.py: -------------------------------------------------------------------------------- 1 | from instructor.process_response import handle_response_model 2 | from pydantic import BaseModel, Field 3 | import instructor 4 | import pytest 5 | 6 | modes = [ 7 | instructor.Mode.ANTHROPIC_JSON, 8 | instructor.Mode.JSON, 9 | instructor.Mode.MD_JSON, 10 | instructor.Mode.GEMINI_JSON, 11 | instructor.Mode.VERTEXAI_JSON, 12 | ] 13 | 14 | 15 | def get_system_prompt(user_tool_definition, mode): 16 | if mode == instructor.Mode.ANTHROPIC_JSON: 17 | return user_tool_definition["system"] 18 | elif mode == instructor.Mode.GEMINI_JSON: 19 | return "\n".join(user_tool_definition["contents"][0]["parts"]) 20 | elif mode == instructor.Mode.VERTEXAI_JSON: 21 | return str(user_tool_definition["generation_config"]) 22 | return user_tool_definition["messages"][0]["content"] 23 | 24 | 25 | @pytest.mark.parametrize("mode", modes) 26 | def test_json_preserves_description_of_non_english_characters_in_json_mode( 27 | mode, 28 | ) -> None: 29 | messages = [ 30 | { 31 | "role": "user", 32 | "content": "Extract the user from the text : 张三 20岁", 33 | } 34 | ] 35 | 36 | class User(BaseModel): 37 | name: str = Field(description="用户的名字") 38 | age: int = Field(description="用户的年龄") 39 | 40 | _, user_tool_definition = handle_response_model(User, mode=mode, messages=messages) 41 | 42 | system_prompt = get_system_prompt(user_tool_definition, mode) 43 | assert "用户的名字" in system_prompt 44 | assert "用户的年龄" in system_prompt 45 | 46 | _, user_tool_definition = handle_response_model( 47 | User, 48 | mode=mode, 49 | system="你是一个AI助手", 50 | messages=messages, 51 | ) 52 | system_prompt = get_system_prompt(user_tool_definition, mode) 53 | assert "用户的名字" in system_prompt 54 | assert "用户的年龄" in system_prompt 55 | -------------------------------------------------------------------------------- /tests/test_simple_types.py: -------------------------------------------------------------------------------- 1 | from instructor.dsl import is_simple_type, Partial 2 | from pydantic import BaseModel 3 | 4 | 5 | def test_enum_simple(): 6 | from enum import Enum 7 | 8 | class Color(Enum): 9 | RED = 1 10 | GREEN = 2 11 | BLUE = 3 12 | 13 | assert is_simple_type(Color), "Failed for type: " + str(Color) 14 | 15 | 16 | def test_standard_types(): 17 | for t in [str, int, float, bool]: 18 | assert is_simple_type(t), "Failed for type: " + str(t) 19 | 20 | 21 | def test_partial_not_simple(): 22 | class SampleModel(BaseModel): 23 | data: int 24 | 25 | assert not is_simple_type(Partial[SampleModel]), "Failed for type: " + str( 26 | Partial[int] 27 | ) 28 | 29 | 30 | def test_annotated_simple(): 31 | from pydantic import Field 32 | from typing import Annotated 33 | 34 | new_type = Annotated[int, Field(description="test")] 35 | 36 | assert is_simple_type(new_type), "Failed for type: " + str(new_type) 37 | 38 | 39 | def test_literal_simple(): 40 | from typing import Literal 41 | 42 | new_type = Literal[1, 2, 3] 43 | 44 | assert is_simple_type(new_type), "Failed for type: " + str(new_type) 45 | 46 | 47 | def test_union_simple(): 48 | from typing import Union 49 | 50 | new_type = Union[int, str] 51 | 52 | assert is_simple_type(new_type), "Failed for type: " + str(new_type) 53 | 54 | 55 | def test_iterable_not_simple(): 56 | from collections.abc import Iterable 57 | 58 | new_type = Iterable[int] 59 | 60 | assert not is_simple_type(new_type), "Failed for type: " + str(new_type) 61 | --------------------------------------------------------------------------------