├── .clang-format
├── .gitattributes
├── .github
├── CONTRIBUTING.md
├── actions
│ ├── build_app
│ │ └── action.yml
│ ├── install_openvino
│ │ └── action.yml
│ ├── install_python_deps
│ │ └── action.yml
│ └── install_wheel
│ │ ├── .node-version
│ │ ├── .prettierignore
│ │ ├── .prettierrc.json
│ │ ├── action.yml
│ │ ├── dist
│ │ └── index.js
│ │ ├── package-lock.json
│ │ ├── package.json
│ │ └── src
│ │ └── install_packages.js
├── components.yml
├── dependabot.yml
├── dependency_review.yml
├── labeler.yml
├── pull_request_template.md
├── scripts
│ └── generate_reference_llava.py
└── workflows
│ ├── assign_issue.yml
│ ├── cleanup_caches.yml
│ ├── coverity.yml
│ ├── deploy_gh_pages.yml
│ ├── labeler.yml
│ ├── linux.yml
│ ├── mac.yml
│ ├── manylinux_2_28.yml
│ ├── sdl.yml
│ └── windows.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Jenkinsfile
├── LICENSE
├── README.md
├── SECURITY.md
├── bandit.yml
├── cmake
├── features.cmake
├── templates
│ ├── OpenVINOGenAIConfig.cmake.in
│ ├── version.cpp.in
│ ├── version.hpp.in
│ └── vs_version.rc.in
├── version.cmake
└── vs_version.cmake
├── pyproject.toml
├── requirements-build.txt
├── samples
├── CMakeLists.txt
├── c
│ ├── text_generation
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── benchmark_genai_c.c
│ │ ├── chat_sample_c.c
│ │ └── greedy_causal_lm_c.c
│ ├── visual_language_chat
│ │ ├── CMakeLists.txt
│ │ ├── load_image.c
│ │ ├── load_image.h
│ │ └── vlm_pipeline.c
│ └── whisper_speech_recognition
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── whisper_speech_recognition.c
│ │ ├── whisper_utils.c
│ │ └── whisper_utils.h
├── cpp
│ ├── README.md
│ ├── image_generation
│ │ ├── 512x512.bmp
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── baseline.bmp
│ │ ├── benchmark_image_gen.cpp
│ │ ├── heterogeneous_stable_diffusion.cpp
│ │ ├── image2image.cpp
│ │ ├── image2image_concurrency.cpp
│ │ ├── imageimage.bmp
│ │ ├── imwrite.cpp
│ │ ├── imwrite.hpp
│ │ ├── inpainting.bmp
│ │ ├── inpainting.cpp
│ │ ├── load_image.cpp
│ │ ├── load_image.hpp
│ │ ├── lora.bmp
│ │ ├── lora_text2image.cpp
│ │ ├── progress_bar.hpp
│ │ ├── stable_diffusion_export_import.cpp
│ │ ├── text2image.cpp
│ │ └── text2image_concurrency.cpp
│ ├── rag
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── text_embeddings.cpp
│ │ └── text_rerank.cpp
│ ├── speech_generation
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── audio_utils.cpp
│ │ ├── audio_utils.hpp
│ │ └── text2speech.cpp
│ ├── text_generation
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── beam_search_causal_lm.cpp
│ │ ├── benchmark_genai.cpp
│ │ ├── chat_sample.cpp
│ │ ├── encrypted_model_causal_lm.cpp
│ │ ├── greedy_causal_lm.cpp
│ │ ├── lora_greedy_causal_lm.cpp
│ │ ├── multinomial_causal_lm.cpp
│ │ ├── prompt_lookup_decoding_lm.cpp
│ │ ├── read_prompt_from_file.cpp
│ │ ├── read_prompt_from_file.h
│ │ ├── speculative_decoding_lm.cpp
│ │ └── structured_output_generation.cpp
│ ├── visual_language_chat
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── benchmark_vlm.cpp
│ │ ├── encrypted_model_vlm.cpp
│ │ ├── load_image.cpp
│ │ ├── load_image.hpp
│ │ └── visual_language_chat.cpp
│ └── whisper_speech_recognition
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── audio_utils.cpp
│ │ ├── audio_utils.hpp
│ │ └── whisper_speech_recognition.cpp
├── deployment-requirements.txt
├── export-requirements.txt
├── generation.gif
├── js
│ ├── .gitignore
│ ├── package-lock.json
│ ├── package.json
│ ├── rag
│ │ ├── README.md
│ │ └── text_embeddings.js
│ └── text_generation
│ │ ├── README.md
│ │ ├── beam_search_causal_lm.js
│ │ ├── benchmark_genai.js
│ │ ├── chat_sample.js
│ │ ├── compound_grammar_generation.js
│ │ ├── greedy_causal_lm.js
│ │ ├── helper.js
│ │ ├── multinomial_causal_lm.js
│ │ ├── react_sample.js
│ │ ├── structural_tags_generation.js
│ │ ├── structured_output_generation.js
│ │ └── tests
│ │ └── usage.test.js
├── python
│ ├── image_generation
│ │ ├── README.md
│ │ ├── benchmark_image_gen.py
│ │ ├── heterogeneous_stable_diffusion.py
│ │ ├── image2image.py
│ │ ├── inpainting.py
│ │ ├── lora_text2image.py
│ │ ├── stable_diffusion_export_import.py
│ │ └── text2image.py
│ ├── rag
│ │ ├── README.md
│ │ ├── text_embeddings.py
│ │ └── text_rerank.py
│ ├── speech_generation
│ │ ├── README.md
│ │ ├── create_speaker_embedding.py
│ │ └── text2speech.py
│ ├── text_generation
│ │ ├── README.md
│ │ ├── beam_search_causal_lm.py
│ │ ├── benchmark_genai.py
│ │ ├── chat_sample.py
│ │ ├── compound_grammar_generation.py
│ │ ├── encrypted_model_causal_lm.py
│ │ ├── greedy_causal_lm.py
│ │ ├── limit_checker.py
│ │ ├── lora_greedy_causal_lm.py
│ │ ├── multinomial_causal_lm.py
│ │ ├── prompt_lookup_decoding_lm.py
│ │ ├── react_sample.py
│ │ ├── speculative_decoding_lm.py
│ │ ├── structural_tags_generation.py
│ │ └── structured_output_generation.py
│ ├── visual_language_chat
│ │ ├── README.md
│ │ ├── benchmark_vlm.py
│ │ ├── encrypted_model_vlm.py
│ │ └── visual_language_chat.py
│ └── whisper_speech_recognition
│ │ ├── README.md
│ │ ├── recorder.py
│ │ └── whisper_speech_recognition.py
└── requirements.txt
├── site
├── .editorconfig
├── .gitignore
├── .prettierignore
├── .prettierrc
├── README.md
├── docs
│ ├── concepts
│ │ ├── _category_.json
│ │ ├── beam-search.md
│ │ ├── how-it-works.md
│ │ ├── lora.md
│ │ └── optimization-techniques
│ │ │ ├── _category_.json
│ │ │ ├── continuous-batching.md
│ │ │ ├── kvcache-eviction-algorithm.md
│ │ │ ├── prefix-caching.md
│ │ │ ├── sparse-attention-prefill.md
│ │ │ └── speculative-decoding.md
│ ├── getting-started
│ │ ├── _category_.json
│ │ ├── installation.mdx
│ │ └── introduction.mdx
│ ├── guides
│ │ ├── _category_.json
│ │ ├── chat-scenario.mdx
│ │ ├── debug-logging.mdx
│ │ ├── lora-adapters.mdx
│ │ ├── model-preparation
│ │ │ ├── _category_.json
│ │ │ ├── _use_cases_note.mdx
│ │ │ ├── convert-to-openvino.mdx
│ │ │ └── download-openvino-models.mdx
│ │ ├── performance-metrics.mdx
│ │ ├── streaming.mdx
│ │ ├── structured-output.mdx
│ │ └── tokenization.mdx
│ ├── samples
│ │ ├── _category_.json
│ │ ├── _components
│ │ │ └── samples-list
│ │ │ │ └── index.tsx
│ │ └── index.mdx
│ ├── supported-models
│ │ ├── _category_.json
│ │ ├── _components
│ │ │ ├── base-models-table
│ │ │ │ └── index.tsx
│ │ │ ├── image-generation-models-table
│ │ │ │ ├── index.tsx
│ │ │ │ └── models.ts
│ │ │ ├── llm-models-table
│ │ │ │ ├── index.tsx
│ │ │ │ └── models.ts
│ │ │ ├── speech-generation-models-table
│ │ │ │ ├── index.tsx
│ │ │ │ └── models.ts
│ │ │ ├── text-embeddings-models-table
│ │ │ │ ├── index.tsx
│ │ │ │ └── models.ts
│ │ │ ├── text-rerank-models-table
│ │ │ │ ├── index.tsx
│ │ │ │ └── models.ts
│ │ │ ├── vlm-models-table
│ │ │ │ ├── index.tsx
│ │ │ │ └── models.ts
│ │ │ └── whisper-models-table
│ │ │ │ ├── index.tsx
│ │ │ │ └── models.ts
│ │ └── index.mdx
│ └── use-cases
│ │ ├── _category_.json
│ │ ├── _shared
│ │ ├── _basic_generation_configuration.mdx
│ │ ├── _beam_search_generation.mdx
│ │ ├── _chat_scenario.mdx
│ │ ├── _convert_model.mdx
│ │ ├── _generation_configuration_workflow.mdx
│ │ └── _streaming.mdx
│ │ ├── image-generation
│ │ ├── _sections
│ │ │ ├── _run_model
│ │ │ │ ├── _image2image_cpp.mdx
│ │ │ │ ├── _image2image_python.mdx
│ │ │ │ ├── _inpainting_cpp.mdx
│ │ │ │ ├── _inpainting_python.mdx
│ │ │ │ ├── _text2image_cpp.mdx
│ │ │ │ ├── _text2image_python.mdx
│ │ │ │ └── index.mdx
│ │ │ └── _usage_options
│ │ │ │ └── index.mdx
│ │ └── index.mdx
│ │ ├── image-processing
│ │ ├── _sections
│ │ │ ├── _run_model
│ │ │ │ ├── _code_example_cpp.mdx
│ │ │ │ ├── _code_example_python.mdx
│ │ │ │ └── index.mdx
│ │ │ └── _usage_options
│ │ │ │ └── index.mdx
│ │ └── index.mdx
│ │ ├── speech-recognition
│ │ ├── _sections
│ │ │ ├── _run_model
│ │ │ │ ├── _code_example_cpp.mdx
│ │ │ │ ├── _code_example_python.mdx
│ │ │ │ └── index.mdx
│ │ │ └── _usage_options
│ │ │ │ └── index.mdx
│ │ └── index.mdx
│ │ ├── text-embedding
│ │ ├── _sections
│ │ │ ├── _run_model
│ │ │ │ ├── _code_example_cpp.mdx
│ │ │ │ ├── _code_example_python.mdx
│ │ │ │ └── index.mdx
│ │ │ └── _usage_options
│ │ │ │ └── index.mdx
│ │ └── index.mdx
│ │ ├── text-generation
│ │ ├── _sections
│ │ │ ├── _run_model
│ │ │ │ ├── _code_example_cpp.mdx
│ │ │ │ ├── _code_example_python.mdx
│ │ │ │ └── index.mdx
│ │ │ └── _usage_options
│ │ │ │ ├── _generation_parameters.mdx
│ │ │ │ ├── _lora_adapters.mdx
│ │ │ │ ├── _speculative_decoding.mdx
│ │ │ │ └── index.mdx
│ │ └── index.mdx
│ │ └── text-rerank
│ │ ├── _sections
│ │ └── _run_model
│ │ │ ├── _code_example_cpp.mdx
│ │ │ ├── _code_example_python.mdx
│ │ │ └── index.mdx
│ │ └── index.mdx
├── docusaurus.config.ts
├── eslint.config.mjs
├── package-lock.json
├── package.json
├── sidebars.ts
├── src
│ ├── components
│ │ ├── Button
│ │ │ ├── index.tsx
│ │ │ └── styles.module.css
│ │ ├── Carousel
│ │ │ ├── index.tsx
│ │ │ └── styles.module.css
│ │ ├── LanguageTabs
│ │ │ └── index.tsx
│ │ └── OptimumCLI
│ │ │ └── index.tsx
│ ├── css
│ │ ├── breadcrumbs.css
│ │ ├── custom.css
│ │ ├── footer.css
│ │ ├── menu.css
│ │ ├── navbar.css
│ │ ├── toc.css
│ │ └── typography.css
│ ├── hooks
│ │ └── use-screen-size.ts
│ ├── pages
│ │ ├── _sections
│ │ │ ├── FeaturesSection
│ │ │ │ ├── FeatureItem
│ │ │ │ │ ├── index.tsx
│ │ │ │ │ └── styles.module.css
│ │ │ │ ├── index.tsx
│ │ │ │ └── styles.module.css
│ │ │ ├── HeroSection
│ │ │ │ ├── PipelinesCarousel
│ │ │ │ │ ├── index.tsx
│ │ │ │ │ └── styles.module.css
│ │ │ │ ├── index.tsx
│ │ │ │ └── styles.module.css
│ │ │ ├── InstallSection
│ │ │ │ ├── index.tsx
│ │ │ │ └── styles.module.css
│ │ │ ├── UseCasesSection
│ │ │ │ ├── components
│ │ │ │ │ ├── UseCaseCard
│ │ │ │ │ │ ├── index.tsx
│ │ │ │ │ │ └── styles.module.css
│ │ │ │ │ ├── image-generation.tsx
│ │ │ │ │ ├── image-processing.tsx
│ │ │ │ │ ├── speech-recognition.tsx
│ │ │ │ │ ├── text-embedding.tsx
│ │ │ │ │ ├── text-generation.tsx
│ │ │ │ │ └── text-rerank.tsx
│ │ │ │ ├── index.tsx
│ │ │ │ └── styles.module.css
│ │ │ └── section-styles.module.css
│ │ ├── index.module.css
│ │ └── index.tsx
│ ├── plugins
│ │ └── genai-samples-docs-plugin.ts
│ ├── theme
│ │ └── MDXComponents.tsx
│ └── types
│ │ └── images.d.ts
├── static
│ ├── .nojekyll
│ └── img
│ │ ├── background.webp
│ │ ├── beam_idx-drop.gif
│ │ ├── beam_idx-fork.gif
│ │ ├── chevron-right.svg
│ │ ├── chevron-up.svg
│ │ ├── favicon.png
│ │ ├── image.svg
│ │ ├── intel-logo.svg
│ │ ├── kv-cache-areas-diagram.svg
│ │ ├── linux-logo.svg
│ │ ├── lora.png
│ │ ├── mac-os-logo.svg
│ │ ├── magnifying-glass.svg
│ │ ├── openvino-genai-workflow.svg
│ │ ├── openvino.svg
│ │ ├── sound-on.svg
│ │ ├── stateful.jpg
│ │ ├── stateless.jpg
│ │ ├── structured_output_work_example.png
│ │ ├── text.svg
│ │ ├── trishape.svg
│ │ └── windows-logo.svg
└── tsconfig.json
├── src
├── CMakeLists.txt
├── README.md
├── bindings_utils.hpp
├── c
│ ├── CMakeLists.txt
│ ├── include
│ │ └── openvino
│ │ │ └── genai
│ │ │ └── c
│ │ │ ├── generation_config.h
│ │ │ ├── llm_pipeline.h
│ │ │ ├── perf_metrics.h
│ │ │ ├── visibility.h
│ │ │ ├── vlm_pipeline.h
│ │ │ ├── whisper_generation_config.h
│ │ │ └── whisper_pipeline.h
│ └── src
│ │ ├── generation_config.cpp
│ │ ├── llm_pipeline.cpp
│ │ ├── perf_metrics.cpp
│ │ ├── types_c.h
│ │ ├── vlm_pipeline.cpp
│ │ ├── whisper_generation_config.cpp
│ │ └── whisper_pipeline.cpp
├── cpp
│ ├── CMakeLists.txt
│ ├── include
│ │ └── openvino
│ │ │ └── genai
│ │ │ ├── cache_eviction.hpp
│ │ │ ├── chat_history.hpp
│ │ │ ├── common_types.hpp
│ │ │ ├── continuous_batching_pipeline.hpp
│ │ │ ├── generation_config.hpp
│ │ │ ├── generation_handle.hpp
│ │ │ ├── image_generation
│ │ │ ├── autoencoder_kl.hpp
│ │ │ ├── clip_text_model.hpp
│ │ │ ├── clip_text_model_with_projection.hpp
│ │ │ ├── flux_transformer_2d_model.hpp
│ │ │ ├── generation_config.hpp
│ │ │ ├── image2image_pipeline.hpp
│ │ │ ├── image_generation_perf_metrics.hpp
│ │ │ ├── inpainting_pipeline.hpp
│ │ │ ├── scheduler.hpp
│ │ │ ├── sd3_transformer_2d_model.hpp
│ │ │ ├── t5_encoder_model.hpp
│ │ │ ├── text2image_pipeline.hpp
│ │ │ └── unet2d_condition_model.hpp
│ │ │ ├── json_container.hpp
│ │ │ ├── llm_pipeline.hpp
│ │ │ ├── lora_adapter.hpp
│ │ │ ├── parsers.hpp
│ │ │ ├── perf_metrics.hpp
│ │ │ ├── rag
│ │ │ ├── text_embedding_pipeline.hpp
│ │ │ └── text_rerank_pipeline.hpp
│ │ │ ├── scheduler_config.hpp
│ │ │ ├── sparse_attention.hpp
│ │ │ ├── speculative_decoding
│ │ │ └── perf_metrics.hpp
│ │ │ ├── speech_generation
│ │ │ ├── speech_generation_config.hpp
│ │ │ ├── speech_generation_perf_metrics.hpp
│ │ │ └── text2speech_pipeline.hpp
│ │ │ ├── streamer_base.hpp
│ │ │ ├── text_streamer.hpp
│ │ │ ├── tokenizer.hpp
│ │ │ ├── visibility.hpp
│ │ │ ├── visual_language
│ │ │ ├── perf_metrics.hpp
│ │ │ └── pipeline.hpp
│ │ │ ├── whisper_generation_config.hpp
│ │ │ └── whisper_pipeline.hpp
│ └── src
│ │ ├── chat_history.cpp
│ │ ├── circular_buffer_queue.hpp
│ │ ├── continuous_batching
│ │ ├── attention_output.hpp
│ │ ├── block_manager.hpp
│ │ ├── cache_eviction.cpp
│ │ ├── cache_eviction.hpp
│ │ ├── cache_manager.hpp
│ │ ├── cache_state_dumper.hpp
│ │ ├── kvcrush.cpp
│ │ ├── kvcrush.hpp
│ │ ├── model_runner.hpp
│ │ ├── paged_attention_transformations.cpp
│ │ ├── paged_attention_transformations.hpp
│ │ ├── pipeline.cpp
│ │ ├── pipeline_base.cpp
│ │ ├── pipeline_base.hpp
│ │ ├── pipeline_impl.cpp
│ │ ├── pipeline_impl.hpp
│ │ ├── scheduler.hpp
│ │ ├── sparse_attention.cpp
│ │ ├── sparse_attention.hpp
│ │ ├── threaded_streamer.hpp
│ │ └── timer.hpp
│ │ ├── debug_utils.hpp
│ │ ├── generation_config.cpp
│ │ ├── generation_handle.cpp
│ │ ├── generation_stream.hpp
│ │ ├── gguf_utils
│ │ ├── building_blocks.cpp
│ │ ├── building_blocks.hpp
│ │ ├── gguf.cpp
│ │ ├── gguf.hpp
│ │ ├── gguf_modeling.cpp
│ │ ├── gguf_modeling.hpp
│ │ ├── gguf_quants.cpp
│ │ ├── gguf_tokenizer.cpp
│ │ └── gguf_tokenizer.hpp
│ │ ├── image_generation
│ │ ├── diffusion_pipeline.hpp
│ │ ├── flux_fill_pipeline.hpp
│ │ ├── flux_pipeline.hpp
│ │ ├── generation_config.cpp
│ │ ├── image2image_pipeline.cpp
│ │ ├── image_generation_perf_metrics.cpp
│ │ ├── image_processor.cpp
│ │ ├── image_processor.hpp
│ │ ├── inpainting_pipeline.cpp
│ │ ├── models
│ │ │ ├── autoencoder_kl.cpp
│ │ │ ├── clip_text_model.cpp
│ │ │ ├── flux_transformer_2d_model.cpp
│ │ │ ├── sd3_transformer_2d_model.cpp
│ │ │ ├── sd3transformer_2d_inference.hpp
│ │ │ ├── sd3transformer_2d_inference_dynamic.hpp
│ │ │ ├── sd3transformer_2d_inference_static_bs1.hpp
│ │ │ ├── t5_encoder_model.cpp
│ │ │ ├── unet2d_condition_model.cpp
│ │ │ ├── unet_inference.hpp
│ │ │ ├── unet_inference_dynamic.hpp
│ │ │ └── unet_inference_static_bs1.hpp
│ │ ├── numpy_utils.cpp
│ │ ├── numpy_utils.hpp
│ │ ├── schedulers
│ │ │ ├── ddim.cpp
│ │ │ ├── ddim.hpp
│ │ │ ├── euler_ancestral_discrete.cpp
│ │ │ ├── euler_ancestral_discrete.hpp
│ │ │ ├── euler_discrete.cpp
│ │ │ ├── euler_discrete.hpp
│ │ │ ├── flow_match_euler_discrete.cpp
│ │ │ ├── flow_match_euler_discrete.hpp
│ │ │ ├── ischeduler.hpp
│ │ │ ├── lcm.cpp
│ │ │ ├── lcm.hpp
│ │ │ ├── pndm.cpp
│ │ │ ├── pndm.hpp
│ │ │ ├── scheduler.cpp
│ │ │ ├── types.cpp
│ │ │ └── types.hpp
│ │ ├── stable_diffusion_3_pipeline.hpp
│ │ ├── stable_diffusion_pipeline.hpp
│ │ ├── stable_diffusion_xl_pipeline.hpp
│ │ ├── text2image_pipeline.cpp
│ │ └── threaded_callback.hpp
│ │ ├── json_container.cpp
│ │ ├── json_utils.hpp
│ │ ├── llm
│ │ ├── pipeline.cpp
│ │ ├── pipeline_base.hpp
│ │ ├── pipeline_continuous_batching_adapter.hpp
│ │ ├── pipeline_stateful.cpp
│ │ ├── pipeline_stateful.hpp
│ │ ├── pipeline_static.cpp
│ │ └── pipeline_static.hpp
│ │ ├── lm_encoding.cpp
│ │ ├── lm_encoding.hpp
│ │ ├── logger.hpp
│ │ ├── lora
│ │ ├── adapter.cpp
│ │ ├── common.hpp
│ │ ├── helper.cpp
│ │ ├── helper.hpp
│ │ ├── names_mapping.cpp
│ │ ├── names_mapping.hpp
│ │ └── safetensors.c
│ │ ├── parsers.cpp
│ │ ├── perf_metrics.cpp
│ │ ├── prompt_lookup
│ │ ├── continuous_batching_for_prompt_lookup.cpp
│ │ ├── continuous_batching_for_prompt_lookup.hpp
│ │ ├── prompt_lookup_impl.cpp
│ │ └── prompt_lookup_impl.hpp
│ │ ├── rag
│ │ ├── text_embedding_pipeline.cpp
│ │ └── text_rerank_pipeline.cpp
│ │ ├── sampling
│ │ ├── logit_processor.hpp
│ │ ├── logit_transformers.hpp
│ │ ├── sampler.cpp
│ │ ├── sampler.hpp
│ │ ├── structured_output
│ │ │ ├── structured_output_controller.cpp
│ │ │ ├── structured_output_controller.hpp
│ │ │ ├── xgrammar_backend.cpp
│ │ │ └── xgrammar_backend.hpp
│ │ └── threadpool.hpp
│ │ ├── sequence_group.cpp
│ │ ├── sequence_group.hpp
│ │ ├── speculative_decoding
│ │ ├── continuous_batching_for_speculative_decoding_impl.cpp
│ │ ├── continuous_batching_for_speculative_decoding_impl.hpp
│ │ ├── speculative_decoding_impl.cpp
│ │ ├── speculative_decoding_impl.hpp
│ │ ├── speculative_decoding_metrics.cpp
│ │ ├── speculative_decoding_metrics.hpp
│ │ ├── speculative_decoding_perf_metrics.cpp
│ │ ├── speculative_decoding_stateful.cpp
│ │ ├── speculative_decoding_stateful.hpp
│ │ └── update_request_structs.hpp
│ │ ├── speech_generation
│ │ ├── default_speaker_embedding.hpp
│ │ ├── speech_generation_config.cpp
│ │ ├── speech_generation_perf_metrics.cpp
│ │ ├── speecht5_tts_decoder.cpp
│ │ ├── speecht5_tts_decoder.hpp
│ │ ├── speecht5_tts_model.cpp
│ │ ├── speecht5_tts_model.hpp
│ │ ├── text2speech_pipeline.cpp
│ │ ├── text2speech_pipeline_impl.cpp
│ │ └── text2speech_pipeline_impl.hpp
│ │ ├── synchronized_queue.hpp
│ │ ├── text_streamer.cpp
│ │ ├── tokenizer
│ │ ├── add_second_input_pass.cpp
│ │ ├── add_second_input_pass.hpp
│ │ ├── chat_template_fallback_map.hpp
│ │ ├── make_tokenizer_stateful.cpp
│ │ ├── make_tokenizer_stateful.hpp
│ │ ├── tokenizer.cpp
│ │ ├── tokenizer_impl.cpp
│ │ ├── tokenizer_impl.hpp
│ │ ├── tokenizers_path.cpp
│ │ └── tokenizers_path.hpp
│ │ ├── utils.cpp
│ │ ├── utils.hpp
│ │ ├── visual_language
│ │ ├── clip.cpp
│ │ ├── clip.hpp
│ │ ├── continuous_batching_adapter.hpp
│ │ ├── embedding_model.cpp
│ │ ├── embedding_model.hpp
│ │ ├── gemma3
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── inputs_embedder.cpp
│ │ ├── inputs_embedder.hpp
│ │ ├── internvl_chat
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── llava
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── llava_next
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── llava_next_video
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── minicpm
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── nanollava
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── perf_metrics.cpp
│ │ ├── phi3_vision
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── phi4mm
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── pipeline.cpp
│ │ ├── pipeline_base.hpp
│ │ ├── processor_config.cpp
│ │ ├── processor_config.hpp
│ │ ├── qwen2_5_vl
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── qwen2vl
│ │ │ ├── classes.cpp
│ │ │ └── classes.hpp
│ │ ├── vision_encoder.cpp
│ │ ├── vision_encoder.hpp
│ │ ├── vl_sdpa_transformations.cpp
│ │ ├── vl_sdpa_transformations.hpp
│ │ ├── vlm_config.cpp
│ │ └── vlm_config.hpp
│ │ └── whisper
│ │ ├── config.cpp
│ │ ├── config.hpp
│ │ ├── context_tokens.cpp
│ │ ├── context_tokens.hpp
│ │ ├── feature_extractor.cpp
│ │ ├── feature_extractor.hpp
│ │ ├── generation_config.cpp
│ │ ├── logit_processor.cpp
│ │ ├── logit_processor.hpp
│ │ ├── models.hpp
│ │ ├── models
│ │ ├── decoder.cpp
│ │ ├── decoder.hpp
│ │ ├── statefull_decoder.cpp
│ │ ├── statefull_decoder.hpp
│ │ ├── with_past_decoder.cpp
│ │ └── with_past_decoder.hpp
│ │ ├── perf_metrics.cpp
│ │ ├── pipeline.cpp
│ │ ├── pipeline_base.hpp
│ │ ├── pipeline_static.cpp
│ │ ├── pipeline_static.hpp
│ │ ├── timestamps.cpp
│ │ ├── timestamps.hpp
│ │ ├── whisper.cpp
│ │ ├── whisper.hpp
│ │ ├── whisper_utils.cpp
│ │ └── whisper_utils.hpp
├── docs
│ ├── BUILD.md
│ ├── DEBUG_LOG.md
│ ├── DOCKER.md
│ ├── HOW_IT_WORKS.md
│ ├── beam_idx-drop.gif
│ ├── beam_idx-fork.gif
│ ├── openvino_genai.svg
│ ├── stateful.jpg
│ └── stateless.jpg
├── js
│ ├── .gitignore
│ ├── .npmignore
│ ├── .prettierrc
│ ├── BUILD.md
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── eslint.config.cjs
│ ├── include
│ │ ├── addon.hpp
│ │ ├── chat_history.hpp
│ │ ├── helper.hpp
│ │ ├── llm_pipeline
│ │ │ ├── finish_chat_worker.hpp
│ │ │ ├── init_worker.hpp
│ │ │ ├── llm_pipeline_wrapper.hpp
│ │ │ └── start_chat_worker.hpp
│ │ ├── perf_metrics.hpp
│ │ ├── text_embedding_pipeline
│ │ │ ├── embed_documents_worker.hpp
│ │ │ ├── embed_query_worker.hpp
│ │ │ ├── init_worker.hpp
│ │ │ └── pipeline_wrapper.hpp
│ │ └── tokenizer.hpp
│ ├── lib
│ │ ├── addon.ts
│ │ ├── chatHistory.ts
│ │ ├── index.ts
│ │ ├── pipelines
│ │ │ ├── llmPipeline.ts
│ │ │ └── textEmbeddingPipeline.ts
│ │ └── utils.ts
│ ├── package-lock.json
│ ├── package.json
│ ├── scripts
│ │ └── download-runtime.cjs
│ ├── src
│ │ ├── addon.cpp
│ │ ├── chat_history.cpp
│ │ ├── helper.cpp
│ │ ├── llm_pipeline
│ │ │ ├── finish_chat_worker.cpp
│ │ │ ├── init_worker.cpp
│ │ │ ├── llm_pipeline_wrapper.cpp
│ │ │ └── start_chat_worker.cpp
│ │ ├── perf_metrics.cpp
│ │ ├── text_embedding_pipeline
│ │ │ ├── embed_documents_worker.cpp
│ │ │ ├── embed_query_worker.cpp
│ │ │ ├── init_worker.cpp
│ │ │ └── pipeline_wrapper.cpp
│ │ └── tokenizer.cpp
│ ├── tests
│ │ ├── bindings.test.js
│ │ ├── chatHistory.test.js
│ │ ├── models.js
│ │ ├── module.test.js
│ │ ├── setup.js
│ │ ├── structuredOutput.test.js
│ │ ├── textEmbeddingsPipeline.test.js
│ │ ├── tokenizer.test.js
│ │ └── utils.js
│ ├── thirdparty
│ │ ├── node-lib.def
│ │ └── win_delay_load_hook.cc
│ └── tsconfig.json
└── python
│ ├── CMakeLists.txt
│ ├── clean_version.cmake
│ ├── compare_pyi.cmake
│ ├── openvino_genai
│ ├── __init__.py
│ ├── __init__.pyi
│ └── py_openvino_genai.pyi
│ ├── py_chat_history.cpp
│ ├── py_continuous_batching_pipeline.cpp
│ ├── py_generation_config.cpp
│ ├── py_image_generation_models.cpp
│ ├── py_image_generation_pipelines.cpp
│ ├── py_llm_pipeline.cpp
│ ├── py_lora_adapter.cpp
│ ├── py_openvino_genai.cpp
│ ├── py_parsers.cpp
│ ├── py_perf_metrics.cpp
│ ├── py_rag.cpp
│ ├── py_speech_generation.cpp
│ ├── py_streamers.cpp
│ ├── py_tokenizer.cpp
│ ├── py_utils.cpp
│ ├── py_utils.hpp
│ ├── py_vlm_pipeline.cpp
│ ├── py_whisper_pipeline.cpp
│ └── remove_abi_specific_info.cmake
├── tests
├── cpp
│ ├── CMakeLists.txt
│ ├── block_allocator.cpp
│ ├── block_hash_store.cpp
│ ├── block_manager.cpp
│ ├── cache_eviction.cpp
│ ├── cache_manager.cpp
│ ├── data
│ │ ├── cache_rotation_poc_ref_coefficients_per_block_0.txt
│ │ ├── cache_rotation_poc_ref_coefficients_per_block_1.txt
│ │ ├── cache_rotation_poc_ref_coefficients_per_block_2.txt
│ │ └── cache_rotation_poc_ref_coefficients_per_block_3.txt
│ ├── helper.cpp
│ ├── helper.hpp
│ ├── kvcrush.cpp
│ ├── logit_filtering.cpp
│ ├── parser.cpp
│ ├── sampler.cpp
│ ├── scheduler.cpp
│ ├── sparse_attention.cpp
│ ├── speculative_decoding.cpp
│ ├── test_add_second_input_pass.cpp
│ ├── test_json_container.cpp
│ └── utils.cpp
└── python_tests
│ ├── README.md
│ ├── conftest.py
│ ├── data
│ ├── __init__.py
│ ├── long_prompts.txt
│ ├── models.py
│ ├── short_prompts.txt
│ ├── test_dataset.py
│ └── tokenizer_configs.py
│ ├── models
│ ├── nightly
│ ├── precommit
│ └── real_models
│ ├── pytest.ini
│ ├── requirements.txt
│ ├── samples
│ ├── conftest.py
│ ├── test_beam_search_causal_lm.py
│ ├── test_benchmark_genai.py
│ ├── test_benchmark_image_gen.py
│ ├── test_benchmark_vlm.py
│ ├── test_chat_sample.py
│ ├── test_compound_grammar_sample.py
│ ├── test_continuous_batching_tools.py
│ ├── test_encrypted_model_causal_lm.py
│ ├── test_encrypted_model_vlm.py
│ ├── test_greedy_causal_lm.py
│ ├── test_heterogeneous_stable_diffusion.py
│ ├── test_image2image.py
│ ├── test_inpainting.py
│ ├── test_lora.py
│ ├── test_lora_text2image.py
│ ├── test_multinomial_causal_lm.py
│ ├── test_prompt_lookup_decoding_lm.py
│ ├── test_rag_sample.py
│ ├── test_react_sample.py
│ ├── test_scheduler_config.py
│ ├── test_speculative_decoding_lm.py
│ ├── test_structural_tag_generation.py
│ ├── test_structured_output_sample.py
│ ├── test_text2image.py
│ ├── test_text2speech.py
│ ├── test_tools_llm_benchmark.py
│ ├── test_utils.py
│ ├── test_visual_language_chat.py
│ └── test_whisper_speech_recognition.py
│ ├── test_continuous_batching.py
│ ├── test_generation_config.py
│ ├── test_gguf_reader.py
│ ├── test_kv_cache_eviction
│ ├── kv_cache_eviction_utils.py
│ ├── test_kv_cache_eviction_1.py
│ └── test_kv_cache_eviction_2.py
│ ├── test_llm_pipeline.py
│ ├── test_llm_pipeline_static.py
│ ├── test_parsers.py
│ ├── test_rag.py
│ ├── test_sampling.py
│ ├── test_stateful_speculative_decoding.py
│ ├── test_structured_output.py
│ ├── test_text_streamer.py
│ ├── test_tokenizer.py
│ ├── test_vlm_pipeline.py
│ ├── test_whisper_pipeline.py
│ ├── test_whisper_pipeline_static.py
│ └── utils
│ ├── __init__.py
│ ├── comparation.py
│ ├── constants.py
│ ├── generation_config.py
│ ├── hugging_face.py
│ ├── longbench.py
│ ├── network.py
│ ├── ov_genai_pipelines.py
│ ├── qwen3_reranker_utils.py
│ └── tokenizers.py
├── third-party-programs.txt
├── thirdparty
└── CMakeLists.txt
└── tools
├── __init__.py
├── cacheviz
├── __init__.py
├── cacheviz.py
└── requirements.txt
├── continuous_batching
├── CMakeLists.txt
├── accuracy
│ ├── CMakeLists.txt
│ ├── continuous_batching_accuracy.cpp
│ └── continuous_batching_speculative_decoding.cpp
└── benchmark
│ ├── CMakeLists.txt
│ └── continuous_batching_benchmark.cpp
├── llm_bench
├── README.md
├── benchmark.py
├── doc
│ ├── NOTES.md
│ └── PROMPT.md
├── llm_bench_utils
│ ├── config_class.py
│ ├── gen_output_data.py
│ ├── get_use_case.py
│ ├── hook_beam_search.py
│ ├── hook_common.py
│ ├── hook_forward.py
│ ├── hook_forward_whisper.py
│ ├── hook_greedy_search.py
│ ├── llm_hook_beam_search
│ │ ├── __init__.py
│ │ ├── hook_beam_search_v40.py
│ │ ├── hook_beam_search_v51.py
│ │ ├── hook_beam_search_v52.py
│ │ └── hook_beam_search_v55.py
│ ├── llm_hook_sample
│ │ ├── __init__.py
│ │ ├── hook_sample.py
│ │ ├── hook_sample_v43.py
│ │ ├── hook_sample_v45.py
│ │ ├── hook_sample_v51.py
│ │ ├── hook_sample_v52.py
│ │ └── hook_sample_v55.py
│ ├── memory_monitor.py
│ ├── metrics_print.py
│ ├── model_utils.py
│ ├── output_csv.py
│ ├── output_file.py
│ ├── output_json.py
│ ├── ov_model_classes.py
│ ├── ov_utils.py
│ ├── parse_json_data.py
│ ├── prompt_utils.py
│ └── pt_utils.py
├── prompts
│ ├── llama-2-7b-chat_l.jsonl
│ ├── llava-1.5-7b.jsonl
│ ├── scheduler_config.json
│ ├── stable-diffusion-i2i.jsonl
│ ├── stable-diffusion-inpainting.jsonl
│ ├── stable-diffusion.jsonl
│ └── texts_for_rerank.jsonl
├── requirements.txt
├── requirements
│ ├── requirements_conversion.txt
│ └── requirements_gptq.txt
├── setup.cfg
└── task
│ ├── image_generation.py
│ ├── pipeline_utils.py
│ ├── speech_to_text_generation.py
│ ├── super_resolution_generation.py
│ ├── text_embeddings.py
│ ├── text_generation.py
│ ├── text_reranker.py
│ ├── text_to_speech_generation.py
│ └── visual_language_generation.py
└── who_what_benchmark
├── README.md
├── examples
├── gptq_eval.py
├── huggingface_eval.py
├── openvino_batched_eval.py
└── openvino_eval.py
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
├── test_cli_embeddings.py
├── test_cli_image.py
├── test_cli_reranking.py
├── test_cli_text.py
└── test_cli_vlm.py
└── whowhatbench
├── __init__.py
├── embeddings_evaluator.py
├── im2im_evaluator.py
├── inpaint_evaluator.py
├── model_loaders.py
├── prompts
├── text_long_prompts.yaml
└── text_prompts.yaml
├── registry.py
├── reranking_evaluator.py
├── text2image_evaluator.py
├── text_evaluator.py
├── utils.py
├── visualtext_evaluator.py
├── whowhat_metrics.py
└── wwb.py
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: Google
2 | IndentWidth: 4
3 | UseTab: Never
4 | ColumnLimit: 120
5 |
6 | Language: Cpp
7 | Standard: Cpp11
8 |
9 | AccessModifierOffset: -4
10 | AlignConsecutiveMacros: true
11 | AllowAllArgumentsOnNextLine: false
12 | AllowAllConstructorInitializersOnNextLine: false
13 | AllowAllParametersOfDeclarationOnNextLine: false
14 | AllowShortFunctionsOnASingleLine: Empty
15 | AllowShortIfStatementsOnASingleLine: Never
16 | AllowShortLambdasOnASingleLine: Empty
17 | AllowShortLoopsOnASingleLine: false
18 | AlwaysBreakBeforeMultilineStrings: false
19 | BinPackArguments: false
20 | BinPackParameters: false
21 | CommentPragmas: '^#'
22 | DerivePointerAlignment: false
23 | FixNamespaceComments: true
24 | IndentCaseLabels: false
25 | IndentPPDirectives: AfterHash
26 | ForEachMacros:
27 | - foreach
28 | - FOREACH_CHILD
29 |
--------------------------------------------------------------------------------
/.github/actions/build_app/action.yml:
--------------------------------------------------------------------------------
1 | name: 'Build App'
2 | inputs:
3 | ov_dir:
4 | description: 'Directory where OpenVINO is installed'
5 | default: './ov'
6 | required: false
7 | build_dir:
8 | description: 'Directory where the app is built'
9 | default: './build'
10 | required: false
11 | build_target:
12 | description: 'Target to build'
13 | default: ''
14 | required: false
15 | runs:
16 | using: "composite"
17 | steps:
18 | - name: Build app
19 | shell: bash
20 | run: |
21 | source ${{ inputs.ov_dir }}/setupvars.sh
22 | cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ inputs.build_dir }}
23 | cmake --build ${{ inputs.build_dir }} --config Release ${{ inputs.build_target && format('--target {0}', inputs.build_target) || '' }} -j
24 |
--------------------------------------------------------------------------------
/.github/actions/install_openvino/action.yml:
--------------------------------------------------------------------------------
1 | name: 'Install OpenVINO'
2 | inputs:
3 | ov_link:
4 | description: 'URL to download OpenVINO'
5 | required: true
6 | ov_dir:
7 | description: 'Directory to install OpenVINO'
8 | default: './ov'
9 | required: false
10 | runs:
11 | using: "composite"
12 | steps:
13 | - name: 'Install OpenVINO'
14 | shell: bash
15 | run: |
16 | mkdir ${{ inputs.ov_dir }}
17 | curl ${{ inputs.ov_link }} | tar --directory ${{ inputs.ov_dir }} --strip-components 1 -xz
18 | sudo ${{ inputs.ov_dir }}/install_dependencies/install_openvino_dependencies.sh
19 |
--------------------------------------------------------------------------------
/.github/actions/install_python_deps/action.yml:
--------------------------------------------------------------------------------
1 | name: 'Install Python Dependencies'
2 | inputs:
3 | ov_dir:
4 | description: 'Directory where OpenVINO is installed'
5 | default: './ov'
6 | required: false
7 | runs:
8 | using: "composite"
9 | steps:
10 | - name: Install Python dependencies
11 | shell: bash
12 | run: |
13 | source ${{ inputs.ov_dir }}/setupvars.sh
14 | python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
15 | python -m pip install -r ./samples/requirements.txt
16 |
--------------------------------------------------------------------------------
/.github/actions/install_wheel/.node-version:
--------------------------------------------------------------------------------
1 | 20.6.0
2 |
--------------------------------------------------------------------------------
/.github/actions/install_wheel/.prettierignore:
--------------------------------------------------------------------------------
1 | dist/
2 | node_modules/
3 | coverage/
4 |
--------------------------------------------------------------------------------
/.github/actions/install_wheel/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "printWidth": 80,
3 | "tabWidth": 2,
4 | "useTabs": false,
5 | "semi": true,
6 | "singleQuote": true,
7 | "quoteProps": "as-needed",
8 | "jsxSingleQuote": false,
9 | "trailingComma": "none",
10 | "bracketSpacing": true,
11 | "bracketSameLine": true,
12 | "arrowParens": "avoid",
13 | "proseWrap": "always",
14 | "htmlWhitespaceSensitivity": "css",
15 | "endOfLine": "lf"
16 | }
17 |
--------------------------------------------------------------------------------
/.github/actions/install_wheel/action.yml:
--------------------------------------------------------------------------------
1 | name: 'Install Python Packages with Local Wheels and Extras'
2 | description:
3 | 'Installs specified Python packages with support for local wheels and optional
4 | extras.'
5 | author: 'OpenVINO Developers'
6 | inputs:
7 | packages:
8 | description:
9 | "Semicolon-separated list of packages to install, e.g.,
10 | 'openvino;openvino_tokenizers[extra1,extra2]'"
11 | required: true
12 | requirements_files:
13 | description:
14 | "Semicolon-separated list of requirements.txt to install, e.g.,
15 | 'requirements.txt;requirements-dev.txt'"
16 | required: false
17 | local_wheel_dir:
18 | description: 'Path to the directory containing local wheel files'
19 | required: true
20 | runs:
21 | using: 'node20'
22 | main: 'dist/index.js'
23 |
--------------------------------------------------------------------------------
/.github/actions/install_wheel/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "install-wheel-action",
3 | "description": "Action to install local python wheels together with their dependencies",
4 | "version": "0.0.1",
5 | "author": "OpenVINO Developers",
6 | "private": true,
7 | "keywords": [
8 | "GitHub",
9 | "Actions",
10 | "JavaScript"
11 | ],
12 | "engines": {
13 | "node": ">=20"
14 | },
15 | "main": "dist/index.js",
16 | "scripts": {
17 | "bundle": "npm run format:write && npm run package",
18 | "format:write": "npx prettier --write .",
19 | "format:check": "npx prettier --check .",
20 | "package": "npx ncc build src/install_packages.js -o dist",
21 | "package:watch": "npm run package -- --watch",
22 | "all": "npm run format:write && npm run package"
23 | },
24 | "dependencies": {
25 | "@actions/core": "^1.11.1",
26 | "glob": "^11.0.1"
27 | },
28 | "devDependencies": {
29 | "@vercel/ncc": "^0.38.3",
30 | "prettier": "^3.5.0"
31 | },
32 | "license": "Apache-2.0"
33 | }
34 |
--------------------------------------------------------------------------------
/.github/dependency_review.yml:
--------------------------------------------------------------------------------
1 | fail-on-severity: "low"
2 | allow-licenses:
3 | - "BSD-2-Clause"
4 | - "BSD-3-Clause"
5 | - "MIT"
6 | - "Apache-2.0"
7 | - "ISC"
8 | - "BlueOak-1.0.0"
9 | - "0BSD"
10 | - "Python-2.0"
11 | - "MIT-CMU" # Pillow's license
12 | - "CC-BY-NC-4.0"
13 | - "GPL-1.0-or-later"
14 | - "LGPL-2.0-or-later"
15 | - "LicenseRef-scancode-proprietary-license"
16 | fail-on-scopes:
17 | - "runtime"
18 | - "development"
19 | - "unknown"
20 | license-check: true
21 | vulnerability-check: true
22 | allow-dependencies-licenses:
23 | - "pkg:npm/thingies" # Docs site (dependency of dependency)
24 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | <!-- Keep your pull requests (PRs) as atomic as possible. That increases the likelihood that an individual PR won't be stuck because of adjacent problems, merge conflicts, or code review.
2 | Your merged PR is going to appear in the automatically generated release notes on GitHub. So the clearer the title the better. -->
3 | ## Description
4 | <!-- Please include a summary of the change. Also include relevant motivation and context. -->
5 |
6 | <!-- Jira ticket number (e.g., 123). Delete if there's no ticket. -->
7 | CVS-###
8 |
9 | <!-- Remove if not applicable -->
10 | Fixes #(issue)
11 |
12 | ## Checklist:
13 | - [ ] Tests have been updated or added to cover the new code. <!-- If the change isn't maintenance related, update the tests at https://github.com/openvinotoolkit/openvino.genai/tree/master/tests or explain in the description why the tests don't need an update. -->
14 | - [ ] This patch fully addresses the ticket. <!--- If follow-up pull requests are needed, specify in description. -->
15 | - [ ] I have made corresponding changes to the documentation. <!-- Run github.com/\<username>/openvino.genai/actions/workflows/deploy_gh_pages.yml on your fork with your branch as a parameter to deploy a test version with the updated content. Replace this comment with the link to the built docs. -->
16 |
--------------------------------------------------------------------------------
/.github/workflows/assign_issue.yml:
--------------------------------------------------------------------------------
1 | name: Take Issue
2 |
3 | on:
4 | issue_comment:
5 | types:
6 | - created
7 | - edited
8 |
9 | permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions
10 |
11 | jobs:
12 | take-issue:
13 | name: Take issue
14 | runs-on: ubuntu-latest
15 | permissions:
16 | issues: write
17 | timeout-minutes: 10
18 | steps:
19 | - name: take an issue
20 | uses: bdougie/take-action@v1.6.1
21 | with:
22 | message: Thank you for looking into this issue! Please let us know if you have any questions or require any help.
23 | issueCurrentlyAssignedMessage: Thanks for being interested in this issue. It looks like this ticket is already assigned to a contributor. Please communicate with the assigned contributor to confirm the status of the issue.
24 | trigger: .take
25 | token: ${{ secrets.GITHUB_TOKEN }}
26 |
--------------------------------------------------------------------------------
/.github/workflows/cleanup_caches.yml:
--------------------------------------------------------------------------------
1 | name: Cleanup caches
2 | on:
3 | workflow_dispatch:
4 | schedule:
5 | # at 00:00 on workdays
6 | - cron: '0 0 * * 1,2,3,4,5'
7 |
8 | permissions: read-all
9 |
10 | jobs:
11 | Cleanup_OV_CACHE:
12 | name: Cleanup OV_CACHE
13 | runs-on: aks-linux-4-cores-16gb
14 | if: ${{ github.repository_owner == 'openvinotoolkit' }}
15 | container:
16 | image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04
17 | volumes:
18 | - /mount:/mount
19 | env:
20 | OV_CACHE: /mount/caches/huggingface/.ov_cache
21 |
22 | steps:
23 | - name: Pre-Collecting Cache Info
24 | run: |
25 | echo "Cache info: "
26 | du -h -d2 ${{ env.OV_CACHE }}
27 | - name: Cleanup cache
28 | run: |
29 | echo "Delete cache files if they have not been used in over 3 days"
30 | [ ! -z "${{ env.OV_CACHE }}" ] && find ${{ env.OV_CACHE }} ! -type d -atime +3 -delete
31 |
32 | - name: Post-Collecting Cache Info
33 | run: |
34 | echo "Cache info: "
35 | du -h -d2 ${{ env.OV_CACHE }}
--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
1 | name: "Pull Request Labeler"
2 | on:
3 | - pull_request_target
4 |
5 | permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions
6 |
7 | jobs:
8 | triage:
9 | permissions:
10 | contents: read
11 | pull-requests: write
12 | issues: write
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: akladiev/labeler@eeac5941e7fb6f980d47e038ac0665168851c874 # v4.3.1
16 | with:
17 | repo-token: "${{ secrets.GITHUB_TOKEN }}"
18 | configuration-path: '.github/labeler.yml'
19 | sync-labels: 'true'
20 | dot: 'true'
21 | non-matching-label: 'no-match-files'
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # build/artifact dirs
2 | [Bb]uild*/
3 |
4 | # but ensure we don't skip __init__.py and __main__.py
5 | !__init__.py
6 | !__main__.py
7 |
8 | # don't skip GitHub Actions files and directories
9 | !.github/**
10 |
11 | # developer tools
12 | *.idea
13 | .vscode
14 | .vs/
15 | .vsconan/
16 | .DS_Store
17 | **/tags
18 | compile_commands.json
19 | .local_vimrc
20 | .gdb_history
21 | .vimspector.json
22 | doc/
23 | temp/
24 | .repo/
25 | CMakeLists.txt.user
26 | CMakeUserPresets.json
27 | .env
28 |
29 | *.project
30 | *.cproject
31 | *.pydevproject
32 | *.settings
33 | */gen/
34 | *.swp
35 | /config.xml
36 |
37 | # Python-specific
38 | *.?env*
39 | *.pyc
40 | __pycache__
41 | .py-build-cmake_cache
42 | *.egg-info
43 |
44 | # CodeQL artifacts
45 | _codeql_detected_source_root
46 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "thirdparty/openvino_tokenizers"]
2 | path = thirdparty/openvino_tokenizers
3 | url = https://github.com/openvinotoolkit/openvino_tokenizers.git
4 |
--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
1 | #!groovy
2 |
3 | properties([
4 | parameters([
5 | booleanParam(defaultValue: false,
6 | description: 'Cancel the rest of parallel stages if one of them fails and return status immediately',
7 | name: 'failFast'),
8 | booleanParam(defaultValue: true,
9 | description: 'Whether to propagate commit status to GitHub',
10 | name: 'propagateStatus'),
11 | booleanParam(defaultValue: false,
12 | description: 'If true, forces running pre-commit scope',
13 | name: 'forceRunPrecommitScope'),
14 | string(defaultValue: '',
15 | description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty',
16 | name: 'library_version')
17 | ])
18 | ])
19 |
20 | loadOpenVinoLibrary {
21 | entrypoint(this)
22 | }
23 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Report a Vulnerability
4 |
5 | Please report security issues or vulnerabilities to the [Intel® Security Center].
6 |
7 | For more information on how Intel® works to resolve security issues, see
8 | [Vulnerability Handling Guidelines].
9 |
10 | [Intel® Security Center]:https://www.intel.com/security
11 |
12 | [Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html
13 |
--------------------------------------------------------------------------------
/cmake/features.cmake:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 | #
4 |
5 | option(ENABLE_PYTHON "Enable Python API build" ON)
6 | option(ENABLE_GIL_PYTHON_API "Build Python API with Global Interpreter Lock" ON)
7 | option(ENABLE_JS "Enable JS API build" OFF)
8 | option(ENABLE_SAMPLES "Enable samples build" ON)
9 | option(ENABLE_TESTS "Enable tests build" ON)
10 | option(ENABLE_TOOLS "Enable tools build" ON)
11 | option(ENABLE_GGUF "Enable support for GGUF format" ON)
12 | option(ENABLE_XGRAMMAR "Enable support for structured output generation with xgrammar backend" ON)
13 |
14 | # Disable building samples for NPM package
15 | if(CPACK_GENERATOR STREQUAL "NPM")
16 | set(ENABLE_SAMPLES OFF)
17 | set(ENABLE_PYTHON OFF)
18 | set(ENABLE_JS ON)
19 | else()
20 | set(ENABLE_JS OFF)
21 | endif()
22 |
--------------------------------------------------------------------------------
/cmake/templates/OpenVINOGenAIConfig.cmake.in:
--------------------------------------------------------------------------------
1 | @PACKAGE_INIT@
2 |
3 | include(CMakeFindDependencyMacro)
4 | find_dependency(OpenVINO COMPONENTS Runtime)
5 |
6 | if(NOT TARGET openvino_genai)
7 | include("${CMAKE_CURRENT_LIST_DIR}/OpenVINOGenAITargets.cmake")
8 | endif()
9 |
10 | check_required_components(OpenVINOGenAI)
11 |
--------------------------------------------------------------------------------
/cmake/templates/version.cpp.in:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include "openvino/genai/version.hpp"
5 |
6 | namespace ov {
7 | namespace genai {
8 |
9 | const Version get_version() {
10 | const static Version version = {
11 | "@OpenVINOGenAI_FULL_VERSION@",
12 | "OpenVINO GenAI version",
13 | };
14 |
15 | return version;
16 | }
17 |
18 | } // namespace genai
19 | } // namespace ov
20 |
--------------------------------------------------------------------------------
/cmake/templates/version.hpp.in:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "openvino/core/version.hpp"
7 | #include "openvino/genai/visibility.hpp"
8 |
9 | /**
10 | * OpenVINO GenAI major version
11 | */
12 | #define OPENVINO_GENAI_VERSION_MAJOR @OpenVINOGenAI_VERSION_MAJOR@
13 |
14 | /**
15 | * OpenVINO GenAI minor version
16 | */
17 | #define OPENVINO_GENAI_VERSION_MINOR @OpenVINOGenAI_VERSION_MINOR@
18 |
19 | /**
20 | * OpenVINO GenAI patch version
21 | */
22 | #define OPENVINO_GENAI_VERSION_PATCH @OpenVINOGenAI_VERSION_PATCH@
23 |
24 | namespace ov {
25 | namespace genai {
26 |
27 | /**
28 | * Returns OpenVINO GenAI full version including git commit and hash information in form of:
29 | * <MAJOR>.<MINOR>.<PATCH>.<REVISION>-<COMMIT NUMBER>-<COMMIT HASH>[-<BRANCH SUFFIX>]
30 | */
31 | OPENVINO_EXTERN_C OPENVINO_GENAI_EXPORTS const ov::Version OPENVINO_CDECL get_version();
32 |
33 | } // namespace genai
34 | } // namespace ov
35 |
--------------------------------------------------------------------------------
/cmake/templates/vs_version.rc.in:
--------------------------------------------------------------------------------
1 | #include <winver.h>
2 |
3 | VS_VERSION_INFO VERSIONINFO
4 | FILEVERSION @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@PROJECT_VERSION_TWEAK@
5 | PRODUCTVERSION @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@PROJECT_VERSION_TWEAK@
6 | FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
7 | #ifdef _DEBUG
8 | FILEFLAGS 1
9 | #else
10 | FILEFLAGS 0
11 | #endif
12 | FILEOS VOS__WINDOWS32
13 | FILETYPE VFT_DLL
14 | FILESUBTYPE 0
15 | BEGIN
16 | BLOCK "StringFileInfo"
17 | BEGIN
18 | BLOCK "040904E4"
19 | BEGIN
20 | VALUE "CompanyName", "@PROJECT_COMPANY_NAME@\0"
21 | VALUE "FileDescription", "@PROJECT_DESCRIPTION@\0"
22 | VALUE "FileVersion", "@PROJECT_VERSION@\0"
23 | VALUE "LegalCopyright", "@PROJECT_COPYRIGHT@\0"
24 | VALUE "ProductName", "@PROJECT_PRODUCT_NAME@\0"
25 | VALUE "ProductVersion", "@OpenVINOGenAI_FULL_VERSION@\0"
26 | VALUE "Comments", "@PROJECT_COMMENTS@\0"
27 | END
28 | END
29 | BLOCK "VarFileInfo"
30 | BEGIN
31 | VALUE "Translation", 0x0409, 1252
32 | END
33 | END
--------------------------------------------------------------------------------
/cmake/vs_version.cmake:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 | #
4 |
5 | set(PROJECT_COMPANY_NAME "Intel Corporation")
6 | set(PROJECT_PRODUCT_NAME "OpenVINO GenAI")
7 | set(PROJECT_COPYRIGHT "Copyright (C) 2018-2025, Intel Corporation")
8 | set(PROJECT_COMMENTS "https://docs.openvino.ai/")
9 |
10 | # This function generates a version resource (.rc) file from a template and adds it to the given target.
11 | function(add_vs_version_resource TARGET_NAME)
12 | set(VS_VERSION_TEMPLATE "${PROJECT_SOURCE_DIR}/cmake/templates/vs_version.rc.in")
13 | set(VS_VERSION_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/vs_version.rc")
14 |
15 | configure_file("${VS_VERSION_TEMPLATE}" "${VS_VERSION_OUTPUT}" @ONLY)
16 |
17 | target_sources(${TARGET_NAME} PRIVATE "${VS_VERSION_OUTPUT}")
18 | endfunction()
19 |
--------------------------------------------------------------------------------
/requirements-build.txt:
--------------------------------------------------------------------------------
1 | cmake~=3.23.0; platform_system != 'Darwin' or platform_machine == 'x86_64'
2 | cmake~=3.24.0; platform_system == 'Darwin' and platform_machine == 'arm64'
3 | pybind11-stubgen==2.5.5
--------------------------------------------------------------------------------
/samples/c/text_generation/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | find_package(OpenVINOGenAI REQUIRED
5 | PATHS
6 | "${CMAKE_BINARY_DIR}" # Reuse the package from the build.
7 | ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO.
8 | NO_CMAKE_FIND_ROOT_PATH
9 | )
10 |
11 | function(add_sample_executable target_name)
12 | add_executable(${target_name} ${target_name}.c)
13 | # Specifies that the source file should be compiled as a C source file
14 | set_source_files_properties(${target_name}.c PROPERTIES LANGUAGE C)
15 | target_link_libraries(${target_name} PRIVATE openvino::genai::c)
16 | set_target_properties(${target_name} PROPERTIES
17 | # Ensure out-of-box LC_RPATH on macOS with SIP
18 | INSTALL_RPATH_USE_LINK_PATH ON)
19 | install(TARGETS ${target_name}
20 | RUNTIME DESTINATION samples_bin/
21 | COMPONENT samples_bin
22 | EXCLUDE_FROM_ALL)
23 | endfunction()
24 |
25 | set (SAMPLE_LIST
26 | greedy_causal_lm_c
27 | chat_sample_c
28 | benchmark_genai_c)
29 |
30 | foreach(sample IN LISTS SAMPLE_LIST)
31 | add_sample_executable(${sample})
32 | endforeach()
33 |
34 |
--------------------------------------------------------------------------------
/samples/c/visual_language_chat/load_image.h:
--------------------------------------------------------------------------------
1 | #ifndef LOAD_IMAGE_H
2 | #define LOAD_IMAGE_H
3 |
4 | #include <stddef.h>
5 | #include <stdint.h>
6 |
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | typedef struct ov_tensor ov_tensor_t;
12 |
13 | ov_tensor_t* load_image(const char* image_path);
14 |
15 | const ov_tensor_t** load_images(const char* image_path, size_t* tensor_count);
16 |
17 | void free_tensor(ov_tensor_t* tensor);
18 |
19 | void free_tensor_array(ov_tensor_t** tensors, size_t count);
20 |
21 | int file_exists(const char* path);
22 |
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 |
27 | #endif // LOAD_IMAGE_H
28 |
--------------------------------------------------------------------------------
/samples/c/whisper_speech_recognition/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | find_package(OpenVINOGenAI REQUIRED
5 | PATHS
6 | "${CMAKE_BINARY_DIR}" # Reuse the package from the build.
7 | ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO.
8 | NO_CMAKE_FIND_ROOT_PATH
9 | )
10 |
11 | # Whisper Speech Recognition Sample
12 | add_executable(whisper_speech_recognition_c whisper_speech_recognition.c whisper_utils.c)
13 |
14 | # Specifies that the source file should be compiled as a C source file
15 | set_source_files_properties(whisper_speech_recognition.c whisper_utils.c PROPERTIES LANGUAGE C)
16 | target_link_libraries(whisper_speech_recognition_c PRIVATE openvino::genai::c)
17 |
18 | set_target_properties(whisper_speech_recognition_c PROPERTIES
19 | # Ensure out-of-box LC_RPATH on macOS with SIP
20 | INSTALL_RPATH_USE_LINK_PATH ON)
21 |
22 | # Install
23 | install(TARGETS whisper_speech_recognition_c
24 | RUNTIME DESTINATION samples_bin/
25 | COMPONENT samples_bin
26 | EXCLUDE_FROM_ALL)
27 |
--------------------------------------------------------------------------------
/samples/cpp/README.md:
--------------------------------------------------------------------------------
1 | Please refer to the following blogs for the setup instructions.
2 |
3 | https://medium.com/openvino-toolkit/how-to-build-openvino-genai-app-in-c-32dcbe42fa67
4 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/512x512.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:73fc1a2b80048752350d108852f3598395666b9208d5e0ab34c0613cea9cfd04
3 | size 786486
4 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/baseline.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bb8491607e8c2cce4394ac0b796350745dde04dba7d754c3fad24d86e1c4d2e1
3 | size 1376310
4 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/imageimage.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2ecb4783a8f3a0962659ebf80eeaf0c0e48c44995c1e60001f215e0697ab9397
3 | size 2162742
4 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/imwrite.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <string>
7 |
8 | #include "openvino/runtime/tensor.hpp"
9 |
10 | /**
11 | * @brief Writes multiple images (depending on `image` tensor batch size) to BPM file(s)
12 | * @param name File name or pattern to use to write images
13 | * @param image Image(s) tensor
14 | * @param convert_bgr2rgb Convert BGR to RGB
15 | */
16 | void imwrite(const std::string& name, ov::Tensor images, bool convert_bgr2rgb);
17 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/inpainting.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:527cee8f7d451c7e5004bc58c079d4c853443644eaeb2d84a343016cd25214c1
3 | size 786486
4 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/load_image.hpp:
--------------------------------------------------------------------------------
1 |
2 | // Copyright (C) 2023-2025 Intel Corporation
3 | // SPDX-License-Identifier: Apache-2.0
4 |
5 | #pragma once
6 |
7 | #include <openvino/runtime/tensor.hpp>
8 | #include <filesystem>
9 |
10 | namespace utils {
11 | ov::Tensor load_image(const std::filesystem::path& image_path);
12 | }
13 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/lora.bmp:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:72760b8ae70a02cf318cfb9a08d520bd4800abb22b5eafe57eafb3cfbed7303d
3 | size 1376310
4 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/progress_bar.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include <optional>
5 |
6 | #include "indicators/progress_bar.hpp"
7 |
8 | bool progress_bar(size_t step, size_t num_steps, ov::Tensor& /* latent */) {
9 | using namespace indicators;
10 |
11 | static std::optional<ProgressBar> bar;
12 |
13 | if (!bar) {
14 | bar.emplace(
15 | option::BarWidth{50},
16 | option::ForegroundColor{Color::green},
17 | option::FontStyles{std::vector<FontStyle>{FontStyle::bold}},
18 | option::ShowElapsedTime{true},
19 | option::ShowRemainingTime{true}
20 | );
21 | }
22 |
23 | std::stringstream stream;
24 | stream << "Image generation step " << (step + 1) << " / " << num_steps;
25 |
26 | bar->set_option(option::PostfixText{stream.str()});
27 | bar->set_progress((100 * (step + 1)) / num_steps);
28 |
29 | if (step + 1 == num_steps) {
30 | bar.reset(); // Required when multiple progress bars are used, without recreation of the object the second progress bar won't be displayed correctly
31 | }
32 |
33 | return false;
34 | }
35 |
--------------------------------------------------------------------------------
/samples/cpp/image_generation/text2image.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include "openvino/genai/image_generation/text2image_pipeline.hpp"
5 |
6 | #include "imwrite.hpp"
7 | #include "progress_bar.hpp"
8 |
9 | int32_t main(int32_t argc, char* argv[]) try {
10 | OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>'");
11 |
12 | const std::string models_path = argv[1], prompt = argv[2];
13 | const std::string device = "CPU"; // GPU can be used as well
14 |
15 | ov::genai::Text2ImagePipeline pipe(models_path, device);
16 | ov::Tensor image = pipe.generate(prompt,
17 | ov::genai::width(512),
18 | ov::genai::height(512),
19 | ov::genai::num_inference_steps(20),
20 | ov::genai::num_images_per_prompt(1),
21 | ov::genai::callback(progress_bar));
22 |
23 | // writes `num_images_per_prompt` images by pattern name
24 | imwrite("image_%d.bmp", image, true);
25 |
26 | return EXIT_SUCCESS;
27 | } catch (const std::exception& error) {
28 | try {
29 | std::cerr << error.what() << '\n';
30 | } catch (const std::ios_base::failure&) {}
31 | return EXIT_FAILURE;
32 | } catch (...) {
33 | try {
34 | std::cerr << "Non-exception object thrown\n";
35 | } catch (const std::ios_base::failure&) {}
36 | return EXIT_FAILURE;
37 | }
38 |
--------------------------------------------------------------------------------
/samples/cpp/rag/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | find_package(OpenVINOGenAI REQUIRED
5 | PATHS
6 | "${CMAKE_BINARY_DIR}" # Reuse the package from the build.
7 | ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO.
8 | NO_CMAKE_FIND_ROOT_PATH
9 | )
10 |
11 | function(add_sample_executable target_name)
12 | add_executable(${target_name} ${target_name}.cpp)
13 | target_link_libraries(${target_name} PRIVATE openvino::genai)
14 | set_target_properties(${target_name} PROPERTIES
15 | COMPILE_PDB_NAME ${target_name}
16 | # Ensure out-of-box LC_RPATH on macOS with SIP
17 | INSTALL_RPATH_USE_LINK_PATH ON)
18 | install(TARGETS ${target_name}
19 | RUNTIME DESTINATION samples_bin/
20 | COMPONENT samples_bin
21 | EXCLUDE_FROM_ALL)
22 | endfunction()
23 |
24 | set(SAMPLE_LIST text_embeddings text_rerank)
25 |
26 | foreach(sample ${SAMPLE_LIST})
27 | add_sample_executable(${sample})
28 | endforeach()
29 |
30 |
31 | # benchmark_genai
32 | include(FetchContent)
33 |
34 | if(POLICY CMP0135)
35 | cmake_policy(SET CMP0135 NEW)
36 | endif()
37 |
--------------------------------------------------------------------------------
/samples/cpp/rag/text_embeddings.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
5 |
6 | int main(int argc, char* argv[]) try {
7 | if (argc < 3) {
8 | throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<TEXT 1>' ['<TEXT 2>' ...]");
9 | }
10 | auto documents = std::vector<std::string>(argv + 2, argv + argc);
11 | std::string models_path = argv[1];
12 |
13 | std::string device = "CPU"; // GPU can be used as well
14 |
15 | ov::genai::TextEmbeddingPipeline::Config config;
16 | config.pooling_type = ov::genai::TextEmbeddingPipeline::PoolingType::MEAN;
17 |
18 | ov::genai::TextEmbeddingPipeline pipeline(models_path, device, config);
19 |
20 | ov::genai::EmbeddingResults documents_embeddings = pipeline.embed_documents(documents);
21 | ov::genai::EmbeddingResult query_embedding = pipeline.embed_query("What is the capital of France?");
22 | } catch (const std::exception& error) {
23 | try {
24 | std::cerr << error.what() << '\n';
25 | } catch (const std::ios_base::failure&) {
26 | }
27 | return EXIT_FAILURE;
28 | } catch (...) {
29 | try {
30 | std::cerr << "Non-exception object thrown\n";
31 | } catch (const std::ios_base::failure&) {
32 | }
33 | return EXIT_FAILURE;
34 | }
35 |
--------------------------------------------------------------------------------
/samples/cpp/speech_generation/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | find_package(OpenVINOGenAI REQUIRED
5 | PATHS
6 | "${CMAKE_BINARY_DIR}" # Reuse the package from the build.
7 | ${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO.
8 | NO_CMAKE_FIND_ROOT_PATH
9 | )
10 |
11 | include(FetchContent)
12 |
13 | if(NOT TARGET dr_libs)
14 | FetchContent_Declare(dr_libs
15 | URL https://github.com/mackron/dr_libs/archive/da35f9d6c7374a95353fd1df1d394d44ab66cf01.tar.gz
16 | URL_HASH SHA256=2704d347f480ca1bc92233fb01747e4550cc8031735b6ea62ca9990ebb8851ae)
17 | FetchContent_MakeAvailable(dr_libs)
18 | endif()
19 |
20 | if(POLICY CMP0135)
21 | cmake_policy(SET CMP0135 NEW)
22 | endif()
23 |
24 | # create main sample executable
25 |
26 | add_executable(text2speech text2speech.cpp audio_utils.cpp)
27 |
28 | target_include_directories(text2speech PRIVATE "lt;BUILD_INTERFACE:${dr_libs_SOURCE_DIR}>")
29 | target_link_libraries(text2speech PRIVATE openvino::genai)
30 |
31 | set_target_properties(text2speech PROPERTIES
32 | # Ensure out of box LC_RPATH on macOS with SIP
33 | INSTALL_RPATH_USE_LINK_PATH ON)
34 | target_compile_features(text2speech PRIVATE cxx_std_11)
35 |
36 | install(TARGETS text2speech
37 | RUNTIME DESTINATION samples_bin/
38 | COMPONENT samples_bin
39 | EXCLUDE_FROM_ALL)
40 |
--------------------------------------------------------------------------------
/samples/cpp/speech_generation/audio_utils.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <filesystem>
7 | #include <string>
8 | #include <vector>
9 |
10 | #include "openvino/runtime/tensor.hpp"
11 |
12 | namespace utils {
13 | namespace audio {
14 | /**
15 | * This function saves an audio waveform, provided as an array of floating-point samples, to a WAV file.
16 | *
17 | * @param waveform_ptr Pointer to the array of float samples representing the audio waveform
18 | * @param waveform_size The number of samples in the waveform array
19 | * @param file_path The name (and path) of the WAV file to be created
20 | * @param bits_per_sample The bit depth used to store each sample in the WAV file
21 | */
22 | void save_to_wav(const float* waveform_ptr,
23 | size_t waveform_size,
24 | const std::filesystem::path& file_path,
25 | uint32_t bits_per_sample);
26 |
27 | /**
28 | * This function reads a binary file containing speaker embedding or 32-bit floating-point values and returns
29 | * ov::Tensor
30 | *
31 | * @param file_path The path to the binary file to be read
32 | * @returns a std::vector<float> containing all float values read from the binary file
33 | */
34 | ov::Tensor read_speaker_embedding(const std::filesystem::path& file_path);
35 | } // namespace audio
36 | } // namespace utils
37 |
--------------------------------------------------------------------------------
/samples/cpp/text_generation/beam_search_causal_lm.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include <openvino/genai/llm_pipeline.hpp>
5 |
6 | int main(int argc, char* argv[]) try {
7 | if (argc < 3) {
8 | throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<PROMPT 1>' ['<PROMPT 2>' ...]");
9 | }
10 | auto prompts = std::vector<std::string>(argv + 2, argv + argc);
11 | std::string models_path = argv[1];
12 |
13 | std::string device = "CPU"; // GPU can be used as well
14 | ov::genai::LLMPipeline pipe(models_path, device);
15 |
16 | ov::genai::GenerationConfig config;
17 | config.max_new_tokens = 20;
18 | config.num_beam_groups = 3;
19 | config.num_beams = 15;
20 | config.diversity_penalty = 1.0f;
21 | config.num_return_sequences = config.num_beams;
22 |
23 | auto beams = pipe.generate(prompts, config);
24 | std::cout << beams << '\n';
25 | } catch (const std::exception& error) {
26 | try {
27 | std::cerr << error.what() << '\n';
28 | } catch (const std::ios_base::failure&) {}
29 | return EXIT_FAILURE;
30 | } catch (...) {
31 | try {
32 | std::cerr << "Non-exception object thrown\n";
33 | } catch (const std::ios_base::failure&) {}
34 | return EXIT_FAILURE;
35 | }
36 |
--------------------------------------------------------------------------------
/samples/cpp/text_generation/greedy_causal_lm.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include "openvino/genai/llm_pipeline.hpp"
5 |
6 | int main(int argc, char* argv[]) try {
7 | if (3 > argc)
8 | throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<PROMPT>\"");
9 |
10 | std::string models_path = argv[1];
11 | std::string prompt = argv[2];
12 | std::string device = "CPU"; // GPU can be used as well
13 |
14 | ov::genai::LLMPipeline pipe(models_path, device);
15 | ov::genai::GenerationConfig config;
16 | config.max_new_tokens = 100;
17 | std::string result = pipe.generate(prompt, config);
18 | std::cout << result << std::endl;
19 | } catch (const std::exception& error) {
20 | try {
21 | std::cerr << error.what() << '\n';
22 | } catch (const std::ios_base::failure&) {}
23 | return EXIT_FAILURE;
24 | } catch (...) {
25 | try {
26 | std::cerr << "Non-exception object thrown\n";
27 | } catch (const std::ios_base::failure&) {}
28 | return EXIT_FAILURE;
29 | }
30 |
--------------------------------------------------------------------------------
/samples/cpp/text_generation/read_prompt_from_file.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include <iostream>
5 | #include <fstream>
6 | #include "read_prompt_from_file.h"
7 |
8 | std::string utils::read_prompt(const std::string& file_path) {
9 | std::ifstream file(file_path);
10 | if (file.is_open()) {
11 | std::stringstream buffer;
12 | buffer << file.rdbuf();
13 | return buffer.str();
14 | } else {
15 | std::stringstream error_message;
16 | error_message << "Error opening prompt file: '" << file_path << "'";
17 | throw std::runtime_error{error_message.str()};
18 | }
19 | }
--------------------------------------------------------------------------------
/samples/cpp/text_generation/read_prompt_from_file.h:
--------------------------------------------------------------------------------
1 |
2 | // Copyright (C) 2023-2025 Intel Corporation
3 | // SPDX-License-Identifier: Apache-2.0
4 |
5 | #pragma once
6 |
7 | #include <sstream>
8 |
9 | namespace utils {
10 | std::string read_prompt(const std::string& file_path);
11 | }
--------------------------------------------------------------------------------
/samples/cpp/visual_language_chat/load_image.hpp:
--------------------------------------------------------------------------------
1 |
2 | // Copyright (C) 2023-2025 Intel Corporation
3 | // SPDX-License-Identifier: Apache-2.0
4 |
5 | #pragma once
6 |
7 | #include <openvino/runtime/tensor.hpp>
8 | #include <filesystem>
9 |
10 | namespace utils {
11 | ov::Tensor load_image(const std::filesystem::path& image_path);
12 | std::vector<ov::Tensor> load_images(const std::filesystem::path& image_path);
13 | }
14 |
--------------------------------------------------------------------------------
/samples/cpp/whisper_speech_recognition/audio_utils.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "openvino/genai/whisper_pipeline.hpp"
7 |
8 | namespace utils {
9 | namespace audio {
10 | ov::genai::RawSpeechInput read_wav(const std::string& filename);
11 | } // namespace audio
12 | } // namespace utils
13 |
--------------------------------------------------------------------------------
/samples/deployment-requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
2 | openvino_genai~=2026.0.0.0.dev
3 | librosa==0.11.0 # For Whisper
4 | pillow==12.0.0 # Image processing for VLMs
5 | json5==0.12.1 # For ReAct
6 | pydantic==2.12.4 # For Structured output json schema
7 |
--------------------------------------------------------------------------------
/samples/export-requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://download.pytorch.org/whl/cpu
2 | --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
3 | openvino-tokenizers[transformers]~=2026.0.0.0.dev
4 | optimum-intel[nncf]==1.26.0
5 | numpy==1.26.4; platform_system == "Darwin" and platform_machine == "x86_64"
6 | safetensors==0.6.2; platform_system == "Darwin" and platform_machine == "x86_64"
7 | einops==0.8.1 # For Qwen
8 | transformers_stream_generator==0.0.5 # For Qwen
9 | diffusers==0.35.2 # For image generation pipelines
10 | timm==1.0.22 # For exporting InternVL2
11 | # torchvision for visual language models
12 | torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
13 | torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64"
14 | transformers==4.55.4 # For Whisper
15 | hf_transfer==0.1.9 # for faster models download, should used with env var HF_HUB_ENABLE_HF_TRANSFER=1
16 | backoff==2.2.1 # for microsoft/Phi-3.5-vision-instruct
17 | peft==0.17.1 # For microsoft/Phi-4-multimodal-instruct
18 |
--------------------------------------------------------------------------------
/samples/generation.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8b3ea717def68df6493c629551b80e74f58d03be02d837e6a16541b3d95787df
3 | size 5550657
4 |
--------------------------------------------------------------------------------
/samples/js/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
--------------------------------------------------------------------------------
/samples/js/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openvino-genai-node-demo",
3 | "version": "1.0.0",
4 | "license": "Apache-2.0",
5 | "type": "module",
6 | "devDependencies": {
7 | "openvino-genai-node": "^2025.4.0",
8 | "yargs": "^18.0.0",
9 | "zod": "^4.1.12"
10 | },
11 | "engines": {
12 | "node": ">=21.0.0"
13 | },
14 | "scripts": {
15 | "test": "node tests/usage.test.js"
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/samples/js/rag/text_embeddings.js:
--------------------------------------------------------------------------------
1 | import { TextEmbeddingPipeline, PoolingType } from 'openvino-genai-node';
2 | import { basename } from 'node:path';
3 |
4 | main();
5 |
6 | async function main() {
7 | const modelPath = process.argv[2];
8 | const texts = process.argv.slice(3);
9 |
10 | const usageCommand = `Usage: node ${basename(process.argv[1])} <MODEL_DIR> '<TEXT 1>' ['<TEXT 2>' ...]`;
11 | if (!modelPath) {
12 | console.error('Please specify path to model directory');
13 | console.error(usageCommand);
14 | process.exit(1);
15 | }
16 | if (!texts.length) {
17 | console.error('Please specify prompt');
18 | console.error(usageCommand);
19 | process.exit(1);
20 | }
21 |
22 | const device = 'CPU'; // GPU can be used as well
23 | const config = {
24 | 'pooling_type': PoolingType.MEAN
25 | };
26 |
27 | const pipeline = await TextEmbeddingPipeline(modelPath, device, config);
28 |
29 | await pipeline.embedDocuments(texts);
30 | }
31 |
--------------------------------------------------------------------------------
/samples/js/text_generation/beam_search_causal_lm.js:
--------------------------------------------------------------------------------
1 | import { LLMPipeline } from 'openvino-genai-node';
2 | import { basename } from 'node:path';
3 |
4 | main();
5 |
6 | async function main() {
7 | const modelPath = process.argv[2];
8 | const prompts = process.argv.slice(3);
9 |
10 | if (!modelPath) {
11 | console.error('Please specify path to model directory\n'
12 | + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompts*'`);
13 | process.exit(1);
14 | }
15 | if (!prompts) {
16 | console.error('Please specify prompts\n'
17 | + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompts*'`);
18 | process.exit(1);
19 | }
20 |
21 | const device = 'CPU'; // GPU can be used as well
22 | const pipe = await LLMPipeline(modelPath, device);
23 |
24 | const numBeams = 15;
25 | const config = {
26 | 'max_new_tokens': 20,
27 | 'num_beam_groups': 3,
28 | 'num_beams': numBeams,
29 | 'diversity_penalty': 1,
30 | 'num_return_sequences': numBeams,
31 | 'return_decoded_results': true,
32 |
33 | };
34 | const beams = await pipe.generate(prompts, config);
35 | console.log(beams.toString());
36 | }
37 |
--------------------------------------------------------------------------------
/samples/js/text_generation/greedy_causal_lm.js:
--------------------------------------------------------------------------------
1 | import { LLMPipeline } from 'openvino-genai-node';
2 | import { basename } from 'node:path';
3 |
4 | main();
5 |
6 | async function main() {
7 | const modelPath = process.argv[2];
8 | const prompt = process.argv[3];
9 |
10 | if (process.argv.length > 4) {
11 | console.error(`Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`);
12 | process.exit(1);
13 | }
14 | if (!modelPath) {
15 | console.error('Please specify path to model directory\n'
16 | + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`);
17 | process.exit(1);
18 | }
19 | if (!prompt) {
20 | console.error('Please specify prompt\n'
21 | + `Run command must be: 'node ${basename(process.argv[1])} *path_to_model_dir* *prompt*'`);
22 | process.exit(1);
23 | }
24 |
25 | const device = 'CPU'; // GPU can be used as well
26 | const pipe = await LLMPipeline(modelPath, device);
27 |
28 | const config = {
29 | 'max_new_tokens': 100,
30 | 'return_decoded_results': true,
31 | };
32 | const result = await pipe.generate(prompt, config);
33 |
34 | console.log(result.toString());
35 | }
--------------------------------------------------------------------------------
/samples/js/text_generation/helper.js:
--------------------------------------------------------------------------------
1 | // Copyright(C) 2025 Intel Corporation
2 | // SPDX - License - Identifier: Apache - 2.0
3 |
4 | import { z } from 'zod';
5 |
6 | /** Serialize a JavaScript object to a JSON string
7 | * with specific formatting to align with Python. */
8 | export function serialize_json(object) {
9 | return JSON.stringify(object)
10 | // Add a space after every colon or comma not already followed by a space
11 | .replace(/(:|,)(?! )/g, '$1 ');
12 | }
13 |
14 | /** Convert a Zod schema to a JSON Schema
15 | * with specific formatting to align with Python */
16 | export function toJSONSchema(zodSchema, params) {
17 | const jsonSchema = z.toJSONSchema(
18 | zodSchema,
19 | {
20 | override: (ctx) => {
21 | if (params && params.override) {
22 | params.override(ctx);
23 | }
24 | const keys = Object.keys(ctx.jsonSchema).sort();
25 | for (const key of keys) {
26 | const value = ctx.jsonSchema[key];
27 | delete ctx.jsonSchema[key];
28 | ctx.jsonSchema[key] = value;
29 | }
30 | }
31 | });
32 | delete jsonSchema.$schema;
33 | delete jsonSchema.additionalProperties;
34 | return jsonSchema;
35 | }
--------------------------------------------------------------------------------
/samples/python/image_generation/image2image.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2024 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 | import openvino
7 | import openvino_genai
8 | import numpy as np
9 |
10 | from PIL import Image
11 |
12 | def read_image(path: str) -> openvino.Tensor:
13 | pic = Image.open(path).convert("RGB")
14 | image_data = np.array(pic)[None]
15 | return openvino.Tensor(image_data)
16 |
17 | def main():
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('model_dir')
20 | parser.add_argument('prompt')
21 | parser.add_argument('image')
22 | args = parser.parse_args()
23 |
24 | device = 'CPU' # GPU can be used as well
25 | pipe = openvino_genai.Image2ImagePipeline(args.model_dir, device)
26 |
27 | image = read_image(args.image)
28 |
29 | image_tensor = pipe.generate(args.prompt, image,
30 | strength=0.8 # controls how initial image is noised after being converted to latent space. `1` means initial image is fully noised
31 | )
32 |
33 | image = Image.fromarray(image_tensor.data[0])
34 | image.save("image.bmp")
35 |
36 |
37 | if '__main__' == __name__:
38 | main()
39 |
--------------------------------------------------------------------------------
/samples/python/image_generation/inpainting.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2024 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 | import openvino
7 | import openvino_genai
8 | import numpy as np
9 |
10 | from PIL import Image
11 |
12 | def read_image(path: str) -> openvino.Tensor:
13 | pic = Image.open(path).convert("RGB")
14 | image_data = np.array(pic)[None]
15 | return openvino.Tensor(image_data)
16 |
17 | def main():
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('model_dir')
20 | parser.add_argument('prompt')
21 | parser.add_argument('image')
22 | parser.add_argument('mask')
23 | args = parser.parse_args()
24 |
25 | device = 'CPU' # GPU can be used as well
26 | pipe = openvino_genai.InpaintingPipeline(args.model_dir, device)
27 |
28 | image = read_image(args.image)
29 | mask_image = read_image(args.mask)
30 |
31 | image_tensor = pipe.generate(args.prompt, image, mask_image)
32 |
33 | image = Image.fromarray(image_tensor.data[0])
34 | image.save("image.bmp")
35 |
36 |
37 | if '__main__' == __name__:
38 | main()
39 |
--------------------------------------------------------------------------------
/samples/python/image_generation/text2image.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2024 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 |
7 | import openvino_genai
8 | from PIL import Image
9 |
10 |
11 | def main():
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument('model_dir')
14 | parser.add_argument('prompt')
15 | args = parser.parse_args()
16 |
17 | device = 'CPU' # GPU can be used as well
18 | pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device)
19 |
20 | image_tensor = pipe.generate(
21 | args.prompt,
22 | width=512,
23 | height=512,
24 | num_inference_steps=20,
25 | num_images_per_prompt=1)
26 |
27 | image = Image.fromarray(image_tensor.data[0])
28 | image.save("image.bmp")
29 |
30 |
31 | if '__main__' == __name__:
32 | main()
--------------------------------------------------------------------------------
/samples/python/rag/text_embeddings.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2025 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 | import openvino_genai
7 |
8 |
9 | def main():
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("model_dir")
12 | parser.add_argument("texts", nargs="+")
13 | args = parser.parse_args()
14 |
15 | device = "CPU" # GPU can be used as well
16 |
17 | config = openvino_genai.TextEmbeddingPipeline.Config()
18 | config.pooling_type = openvino_genai.TextEmbeddingPipeline.PoolingType.MEAN
19 |
20 | pipeline = openvino_genai.TextEmbeddingPipeline(args.model_dir, device, config)
21 |
22 | text_embeddings = pipeline.embed_documents(args.texts)
23 | query_embeddings = pipeline.embed_query("What is the capital of France?")
24 |
25 |
26 | if "__main__" == __name__:
27 | main()
28 |
--------------------------------------------------------------------------------
/samples/python/rag/text_rerank.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2025 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 | import openvino_genai
7 |
8 |
9 | def main():
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("model_dir")
12 | parser.add_argument("query")
13 | parser.add_argument("texts", nargs="+")
14 | args = parser.parse_args()
15 |
16 | device = "CPU" # GPU can be used as well
17 |
18 | config = openvino_genai.TextRerankPipeline.Config()
19 | config.top_n = 3
20 |
21 | pipeline = openvino_genai.TextRerankPipeline(args.model_dir, device, config)
22 |
23 | rerank_result = pipeline.rerank(args.query, args.texts)
24 |
25 | print("Reranked documents:")
26 | for index, score in rerank_result:
27 | print(f"Document {index} (score: {score:.4f}): {args.texts[index]}")
28 |
29 |
30 | if __name__ == "__main__":
31 | main()
32 |
--------------------------------------------------------------------------------
/samples/python/text_generation/beam_search_causal_lm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2024 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 | import openvino_genai
7 |
8 |
9 | def main():
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('model_dir')
12 | parser.add_argument('prompts', nargs='+')
13 | args = parser.parse_args()
14 |
15 | device = 'CPU' # GPU can be used as well
16 | pipe = openvino_genai.LLMPipeline(args.model_dir, device)
17 |
18 | config = openvino_genai.GenerationConfig()
19 | config.max_new_tokens = 20
20 | config.num_beam_groups = 3
21 | config.num_beams = 15
22 | config.diversity_penalty = 1
23 | config.num_return_sequences = config.num_beams
24 |
25 | beams = pipe.generate(args.prompts, config)
26 | print(beams)
27 |
28 |
29 | if '__main__' == __name__:
30 | main()
31 |
--------------------------------------------------------------------------------
/samples/python/text_generation/greedy_causal_lm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2024 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 | import openvino_genai
7 |
8 |
9 | def main():
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('model_dir')
12 | parser.add_argument('prompt')
13 | args = parser.parse_args()
14 |
15 | device = 'CPU' # GPU can be used as well
16 | pipe = openvino_genai.LLMPipeline(args.model_dir, device)
17 |
18 | config = openvino_genai.GenerationConfig()
19 | config.max_new_tokens = 100
20 |
21 | print(pipe.generate(args.prompt, config))
22 |
23 |
24 | if '__main__' == __name__:
25 | main()
26 |
--------------------------------------------------------------------------------
/samples/python/text_generation/lora_greedy_causal_lm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2024 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 | import openvino_genai
7 |
8 |
9 | def main():
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('models_path')
12 | parser.add_argument('adapter_path')
13 | parser.add_argument('prompt')
14 | args = parser.parse_args()
15 |
16 | device = 'CPU' # GPU can be used as well
17 | adapter = openvino_genai.Adapter(args.adapter_path)
18 | adapter_config = openvino_genai.AdapterConfig(adapter)
19 | pipe = openvino_genai.LLMPipeline(args.models_path, device, adapters=adapter_config) # register all required adapters here
20 |
21 | print("Generate with LoRA adapter and alpha set to 0.75:")
22 | print(pipe.generate(args.prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig(adapter, 0.75)))
23 |
24 | print("\n-----------------------------")
25 | print("Generate without LoRA adapter:")
26 | print(pipe.generate(args.prompt, max_new_tokens=100, adapters=openvino_genai.AdapterConfig()))
27 |
28 | if '__main__' == __name__:
29 | main()
30 |
--------------------------------------------------------------------------------
/samples/python/text_generation/prompt_lookup_decoding_lm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (C) 2024 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | import argparse
6 | import openvino_genai
7 |
8 | def streamer(subword):
9 | print(subword, end='', flush=True)
10 | # Return flag corresponds whether generation should be stopped.
11 | return openvino_genai.StreamingStatus.RUNNING
12 |
13 | def main():
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('model_dir')
16 | parser.add_argument('prompt')
17 | args = parser.parse_args()
18 |
19 | device = 'CPU'
20 |
21 | pipe = openvino_genai.LLMPipeline(args.model_dir, device, prompt_lookup=True)
22 |
23 | config = openvino_genai.GenerationConfig()
24 | config.max_new_tokens = 100
25 | # add parameter to enable prompt lookup decoding to generate `num_assistant_tokens` candidates per iteration
26 | config.num_assistant_tokens = 5
27 | # Define max_ngram_size
28 | config.max_ngram_size = 3
29 |
30 | # Since the streamer is set, the results will be printed
31 | # every time a new token is generated and put into the streamer queue.
32 | pipe.generate(args.prompt, config, streamer)
33 | print()
34 |
35 | if '__main__' == __name__:
36 | main()
37 |
--------------------------------------------------------------------------------
/samples/requirements.txt:
--------------------------------------------------------------------------------
1 | -r ./deployment-requirements.txt
2 | -r ./export-requirements.txt
3 | pydantic
--------------------------------------------------------------------------------
/site/.editorconfig:
--------------------------------------------------------------------------------
1 | # http://editorconfig.org
2 |
3 | root = false
4 |
5 | [*]
6 | charset = utf-8
7 | end_of_line = lf
8 | insert_final_newline = true
9 | indent_style = space
10 | indent_size = 2
11 | max_line_length = 100
12 | trim_trailing_whitespace = true
13 |
14 | [*.md]
15 | insert_final_newline = true
16 | trim_trailing_whitespace = false
17 |
--------------------------------------------------------------------------------
/site/.gitignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | /node_modules
3 |
4 | # Production
5 | /build
6 |
7 | # Generated files
8 | .docusaurus
9 | .cache-loader
10 |
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 |
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 |
22 | # Generated docs files for samples
23 | /docs/samples/*/
24 | !/docs/samples/_*/
25 | # !/docs/samples/index.mdx
26 | # !/docs/samples/_category_.json
27 |
--------------------------------------------------------------------------------
/site/.prettierignore:
--------------------------------------------------------------------------------
1 | dist
2 | node_modules
3 | .yarn
4 | build
5 | coverage
6 | .docusaurus
7 | .idea
8 |
9 | .svg
10 | *.svg
11 |
12 | *.mdx
13 |
--------------------------------------------------------------------------------
/site/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "printWidth": 100,
3 | "trailingComma": "es5",
4 | "useTabs": false,
5 | "tabWidth": 2,
6 | "semi": true,
7 | "bracketSpacing": true,
8 | "singleQuote": true,
9 | "arrowParens": "always"
10 | }
11 |
--------------------------------------------------------------------------------
/site/README.md:
--------------------------------------------------------------------------------
1 | # Website
2 |
3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator.
4 |
5 | ### Installation
6 |
7 | ```
8 | $ npm i
9 | ```
10 |
11 | ### Local Development
12 |
13 | ```
14 | $ npm run start
15 | ```
16 |
17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
18 |
19 | ### Build
20 |
21 | ```
22 | $ npm run build
23 | ```
24 |
25 | This command generates static content into the `build` directory and can be served using any static contents hosting service.
26 |
27 | ### Deployment
28 |
29 | Using SSH:
30 |
31 | ```
32 | $ USE_SSH=true npm run deploy
33 | ```
34 |
35 | Not using SSH:
36 |
37 | ```
38 | $ GIT_USER=<Your GitHub username> npm run deploy
39 | ```
40 |
41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
42 |
--------------------------------------------------------------------------------
/site/docs/concepts/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "Concepts",
3 | "position": 6,
4 | "link": {
5 | "type": "generated-index",
6 | "description": "Concepts to OpenVINO GenAI."
7 | }
8 | }
--------------------------------------------------------------------------------
/site/docs/concepts/beam-search.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 3
3 | ---
4 |
5 | # Beam Search
6 |
7 | > **Note:** This page is a work in progress.
8 |
--------------------------------------------------------------------------------
/site/docs/concepts/optimization-techniques/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "Optimization techniques",
3 | "position": 4,
4 | "link": {
5 | "type": "generated-index",
6 | "description": "Optimization techniques to OpenVINO GenAI."
7 | }
8 | }
--------------------------------------------------------------------------------
/site/docs/concepts/optimization-techniques/continuous-batching.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 3
3 | ---
4 |
5 | # Continuous Batching
6 |
7 | > **Note:** This page is a work in progress.
8 |
--------------------------------------------------------------------------------
/site/docs/concepts/optimization-techniques/prefix-caching.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 4
3 | ---
4 |
5 | # Prefix Caching
6 |
7 | > **Note:** This page is a work in progress.
8 |
--------------------------------------------------------------------------------
/site/docs/concepts/optimization-techniques/speculative-decoding.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 1
3 | ---
4 |
5 | # Speculative Decoding
6 |
7 | > **Note:** This page is a work in progress.
8 |
--------------------------------------------------------------------------------
/site/docs/getting-started/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "Getting Started",
3 | "position": 1,
4 | "link": {
5 | "type": "generated-index",
6 | "description": "Getting started guide for OpenVINO GenAI"
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/site/docs/guides/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "Guides",
3 | "position": 3,
4 | "link": null
5 | }
6 |
--------------------------------------------------------------------------------
/site/docs/guides/model-preparation/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "Model Preparation",
3 | "position": 1,
4 | "link": {
5 | "type": "generated-index",
6 | "description": "Prepare generative models for inference with OpenVINO GenAI."
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/site/docs/guides/model-preparation/_use_cases_note.mdx:
--------------------------------------------------------------------------------
1 | :::info
2 |
3 | Refer to the [Use Cases](/docs/category/use-cases) for detailed instructions on using models with OpenVINO GenAI.
4 |
5 | :::
6 |
--------------------------------------------------------------------------------
/site/docs/samples/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "Samples",
3 | "position": 5,
4 | "link": {
5 | "type": "doc",
6 | "id": "samples/index"
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/site/docs/samples/_components/samples-list/index.tsx:
--------------------------------------------------------------------------------
1 | import Link from '@docusaurus/Link';
2 | import { usePluginData } from '@docusaurus/useGlobalData';
3 | import { type GenAISamples } from '@site/src/plugins/genai-samples-docs-plugin';
4 | import Heading from '@theme/Heading';
5 | import React from 'react';
6 |
7 | function SamplesListItem({
8 | item: { language, name, githubLink },
9 | }: {
10 | item: GenAISamples[string][number];
11 | }): React.JSX.Element {
12 | return (
13 | <li>
14 | <Link href={`./${language}/${name}`}>{name}</Link> (<Link href={githubLink}>GitHub</Link>)
15 | </li>
16 | );
17 | }
18 |
19 | export default function SamplesList(): React.JSX.Element {
20 | const samplesMap = usePluginData('genai-samples-docs-plugin') as GenAISamples;
21 |
22 | return (
23 | <>
24 | {Object.entries(samplesMap)
25 | .sort(([a], [b]) => a.localeCompare(b))
26 | .map(([language, samples]) => (
27 | <div key={language}>
28 | <Heading as="h2">{samples[0]?.languageTitle}</Heading>
29 | <ul>
30 | {samples
31 | .sort((a, b) => a.name.localeCompare(b.name))
32 | .map((sample) => (
33 | <SamplesListItem key={`${language}-${sample.name}`} item={sample} />
34 | ))}
35 | </ul>
36 | </div>
37 | ))}
38 | </>
39 | );
40 | }
41 |
--------------------------------------------------------------------------------
/site/docs/samples/index.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 1
3 | ---
4 |
5 | import SamplesList from './_components/samples-list';
6 |
7 | # OpenVINO GenAI Samples
8 |
9 | <SamplesList />
10 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "Supported Models",
3 | "position": 4,
4 | "link": {
5 | "type": "doc",
6 | "id": "supported-models/index"
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/base-models-table/index.tsx:
--------------------------------------------------------------------------------
1 | import Link from '@docusaurus/Link';
2 | import { Children } from 'react';
3 |
4 | type BaseModelsTableProps = {
5 | headers: string[];
6 | rows: React.JSX.Element[];
7 | };
8 |
9 | export function BaseModelsTable({ headers, rows }: BaseModelsTableProps): React.JSX.Element {
10 | return (
11 | <table>
12 | <thead>
13 | <tr>
14 | {headers.map((v) => (
15 | <th key={v}>{v}</th>
16 | ))}
17 | </tr>
18 | </thead>
19 | <tbody style={{ verticalAlign: 'baseline' }}>{Children.map(rows, (row) => row)}</tbody>
20 | </table>
21 | );
22 | }
23 |
24 | export const LinksCell = ({ links }: { links: string[] }) => (
25 | <td>
26 | <ul>
27 | {links.map((link) => (
28 | <li key={link}>
29 | <Link href={link}>{new URL(link).pathname.slice(1)}</Link>
30 | </li>
31 | ))}
32 | </ul>
33 | </td>
34 | );
35 |
36 | export const StatusCell = ({ value }: { value: boolean }) => (
37 | <td style={{ textAlign: 'center' }}>{value ? '✅' : '❌'}</td>
38 | );
39 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/image-generation-models-table/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
3 | import { IMAGE_GENERATION_MODELS } from './models';
4 |
5 | export default function ImageGenerationModelsTable(): React.JSX.Element {
6 | const headers = [
7 | 'Architecture',
8 | 'Text to Image',
9 | 'Image to Image',
10 | 'Inpainting',
11 | 'LoRA Support',
12 | 'Example HuggingFace Models',
13 | ];
14 |
15 | const rows = IMAGE_GENERATION_MODELS.map(
16 | ({ architecture, textToImage, imageToImage, inpainting, loraSupport, links }) => (
17 | <tr key={architecture}>
18 | <td>
19 | <code style={{ whiteSpace: 'pre' }}>{architecture}</code>
20 | </td>
21 | <StatusCell value={textToImage} />
22 | <StatusCell value={imageToImage} />
23 | <StatusCell value={inpainting} />
24 | <StatusCell value={loraSupport} />
25 | <LinksCell links={links} />
26 | </tr>
27 | )
28 | );
29 |
30 | return <BaseModelsTable headers={headers} rows={rows} />;
31 | }
32 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/llm-models-table/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { BaseModelsTable, LinksCell } from '../base-models-table';
3 | import { LLM_MODELS } from './models';
4 |
5 | export default function LLMModelsTable(): React.JSX.Element {
6 | const headers = ['Architecture', 'Models', 'Example HuggingFace Models'];
7 |
8 | const rows = LLM_MODELS.map(({ architecture, models }) => (
9 | <>
10 | <tr key={architecture}>
11 | <td rowSpan={models.length}>
12 | <code>{architecture}</code>
13 | </td>
14 | <td>{models[0].name}</td>
15 | <LinksCell links={models[0].links} />
16 | </tr>
17 | {models.slice(1).map(({ name, links }) => (
18 | <tr key={name}>
19 | <td>{name}</td>
20 | <LinksCell links={links} />
21 | </tr>
22 | ))}
23 | </>
24 | ));
25 |
26 | return <BaseModelsTable headers={headers} rows={rows} />;
27 | }
28 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/speech-generation-models-table/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
3 | import { SPEECH_GENERATION_MODELS } from './models';
4 |
5 | export default function SpeechGenerationModelsTable(): React.JSX.Element {
6 | const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
7 |
8 | const rows = SPEECH_GENERATION_MODELS.map(({ architecture, models }) => (
9 | <>
10 | <tr key={architecture}>
11 | <td rowSpan={models.length}>
12 | <code>{architecture}</code>
13 | </td>
14 | <td>{models[0].name}</td>
15 | <StatusCell value={models[0].loraSupport} />
16 | <LinksCell links={models[0].links} />
17 | </tr>
18 | {models.slice(1).map(({ name, loraSupport, links }) => (
19 | <tr key={name}>
20 | <td>{name}</td>
21 | <StatusCell value={loraSupport} />
22 | <LinksCell links={links} />
23 | </tr>
24 | ))}
25 | </>
26 | ));
27 |
28 | return <BaseModelsTable headers={headers} rows={rows} />;
29 | }
30 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/speech-generation-models-table/models.ts:
--------------------------------------------------------------------------------
1 | type SpeechGenerationModelType = {
2 | architecture: string;
3 | models: Array<{
4 | name: string;
5 | loraSupport: boolean;
6 | links: string[];
7 | }>;
8 | };
9 |
10 | export const SPEECH_GENERATION_MODELS: SpeechGenerationModelType[] = [
11 | {
12 | architecture: 'SpeechT5ForTextToSpeech',
13 | models: [
14 | {
15 | name: 'SpeechT5 TTS',
16 | loraSupport: false,
17 | links: ['https://huggingface.co/microsoft/speecht5_tts'],
18 | },
19 | ],
20 | },
21 | ];
22 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/text-embeddings-models-table/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { BaseModelsTable, LinksCell } from '../base-models-table';
3 | import { TEXT_EMBEDDINGS_MODELS } from './models';
4 |
5 | export default function TextEmbeddingsModelsTable(): React.JSX.Element {
6 | const headers = ['Architecture', 'Example HuggingFace Models'];
7 |
8 | const rows = TEXT_EMBEDDINGS_MODELS.map(({ architecture, models }) => (
9 | <tr key={architecture}>
10 | <td>
11 | <code>{architecture}</code>
12 | </td>
13 | <LinksCell links={models[0].links} />
14 | </tr>
15 | ));
16 |
17 | return <BaseModelsTable headers={headers} rows={rows} />;
18 | }
19 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/text-rerank-models-table/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { TEXT_RERANK_MODELS } from './models';
3 | import { BaseModelsTable, LinksCell } from '../base-models-table';
4 |
5 | export default function TextRerankModelsTable(): React.JSX.Element {
6 | const headers = ['Architecture', '`optimum-cli` task', 'Example HuggingFace Models'];
7 |
8 | const rows = TEXT_RERANK_MODELS.map(({ architecture, optimumIntelTask, models }) => (
9 | <>
10 | <tr key={architecture}>
11 | <td rowSpan={models.length}>
12 | <code>{architecture}</code>
13 | </td>
14 | <td rowSpan={models.length}>
15 | <code>{optimumIntelTask}</code>
16 | </td>
17 | <LinksCell links={models[0].links} />
18 | </tr>
19 | </>
20 | ));
21 |
22 | return <BaseModelsTable headers={headers} rows={rows} />;
23 | }
24 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/vlm-models-table/index.tsx:
--------------------------------------------------------------------------------
1 | import Link from '@docusaurus/Link';
2 | import React from 'react';
3 | import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
4 | import { VLM_MODELS } from './models';
5 |
6 | export default function VLMModelsTable(): React.JSX.Element {
7 | const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
8 |
9 | const rows = VLM_MODELS.map(({ architecture, models }) => (
10 | <>
11 | <tr key={architecture}>
12 | <td rowSpan={models.length}>
13 | <code>{architecture}</code>
14 | </td>
15 | <td>
16 | {models[0].name}
17 | {models[0].notesLink && (
18 | <>
19 | (<Link href={models[0].notesLink}>Notes</Link>)
20 | </>
21 | )}
22 | </td>
23 | <StatusCell value={models[0].loraSupport} />
24 | <LinksCell links={models[0].links} />
25 | </tr>
26 | {models.slice(1).map(({ name, loraSupport, links }) => (
27 | <tr key={name}>
28 | <td>{name}</td>
29 | <StatusCell value={loraSupport} />
30 | <LinksCell links={links} />
31 | </tr>
32 | ))}
33 | </>
34 | ));
35 |
36 | return <BaseModelsTable headers={headers} rows={rows} />;
37 | }
38 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/whisper-models-table/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { BaseModelsTable, LinksCell, StatusCell } from '../base-models-table';
3 | import { WHISPER_MODELS } from './models';
4 |
5 | export default function WhisperModelsTable(): React.JSX.Element {
6 | const headers = ['Architecture', 'Models', 'LoRA Support', 'Example HuggingFace Models'];
7 |
8 | const rows = WHISPER_MODELS.map(({ architecture, models }) => (
9 | <>
10 | <tr key={architecture}>
11 | <td rowSpan={models.length}>
12 | <code>{architecture}</code>
13 | </td>
14 | <td>{models[0].name}</td>
15 | <StatusCell value={models[0].loraSupport} />
16 | <LinksCell links={models[0].links} />
17 | </tr>
18 | {models.slice(1).map(({ name, loraSupport, links }) => (
19 | <tr key={name}>
20 | <td>{name}</td>
21 | <StatusCell value={loraSupport} />
22 | <LinksCell links={links} />
23 | </tr>
24 | ))}
25 | </>
26 | ));
27 |
28 | return <BaseModelsTable headers={headers} rows={rows} />;
29 | }
30 |
--------------------------------------------------------------------------------
/site/docs/supported-models/_components/whisper-models-table/models.ts:
--------------------------------------------------------------------------------
1 | type WhisperModelType = {
2 | architecture: string;
3 | models: Array<{
4 | name: string;
5 | loraSupport: boolean;
6 | links: string[];
7 | }>;
8 | };
9 |
10 | export const WHISPER_MODELS: WhisperModelType[] = [
11 | {
12 | architecture: 'WhisperForConditionalGeneration',
13 | models: [
14 | {
15 | name: 'Whisper',
16 | loraSupport: false,
17 | links: [
18 | 'https://huggingface.co/openai/whisper-tiny',
19 | 'https://huggingface.co/openai/whisper-tiny.en',
20 | 'https://huggingface.co/openai/whisper-base',
21 | 'https://huggingface.co/openai/whisper-base.en',
22 | 'https://huggingface.co/openai/whisper-small',
23 | 'https://huggingface.co/openai/whisper-small.en',
24 | 'https://huggingface.co/openai/whisper-medium',
25 | 'https://huggingface.co/openai/whisper-medium.en',
26 | 'https://huggingface.co/openai/whisper-large-v3',
27 | ],
28 | },
29 | {
30 | name: 'Distil-Whisper',
31 | loraSupport: false,
32 | links: [
33 | 'https://huggingface.co/distil-whisper/distil-small.en',
34 | 'https://huggingface.co/distil-whisper/distil-medium.en',
35 | 'https://huggingface.co/distil-whisper/distil-large-v3',
36 | ],
37 | },
38 | ],
39 | },
40 | ];
41 |
--------------------------------------------------------------------------------
/site/docs/use-cases/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "Use Cases",
3 | "position": 2,
4 | "link": {
5 | "type": "generated-index",
6 | "description": "OpenVINO GenAI provides support for following use cases"
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_beam_search_generation.mdx:
--------------------------------------------------------------------------------
1 | #### Optimizing Generation with Grouped Beam Search
2 |
3 | Beam search helps explore multiple possible text completions simultaneously, often leading to higher quality outputs.
4 |
5 | {/* Python and C++ code examples */}
6 | {props.children}
7 |
8 | :::info Understanding Beam Search Generation Parameters
9 |
10 | - `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`.
11 | - `num_beams`: The number of beams for beam search. 1 disables beam search.
12 | - `num_beam_groups`: The number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
13 | - `diversity_penalty`: value is subtracted from a beam's score if it generates the same token as any beam from other group at a particular time.
14 |
15 | For the full list of generation parameters, refer to the [Generation Config API](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig).
16 |
17 | :::
18 |
--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_chat_scenario.mdx:
--------------------------------------------------------------------------------
1 | ### Use OpenVINO GenAI in Chat Scenario
2 |
3 | Refer to the [Chat Scenario](/docs/guides/chat-scenario) guide for more information on using OpenVINO GenAI in chat applications.
4 |
--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_convert_model.mdx:
--------------------------------------------------------------------------------
1 | ## Convert and Optimize Model
2 |
3 | {/* optimum-cli export code examples */}
4 | {props.children}
5 |
6 | :::info
7 | Refer to the [Model Preparation](/docs/category/model-preparation) guide for detailed instructions on how to download, convert and optimize models for OpenVINO GenAI.
8 | :::
9 |
--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_generation_configuration_workflow.mdx:
--------------------------------------------------------------------------------
1 | #### Generation Configuration Workflow
2 |
3 | 1. Get the model default config with `get_generation_config()`
4 | 2. Modify parameters
5 | 3. Apply the updated config using one of the following methods:
6 | - Use `set_generation_config(config)`
7 | - Pass config directly to `generate()` (e.g. `generate(prompt, config)`)
8 | - Specify options as inputs in the `generate()` method (e.g. `generate(prompt, max_new_tokens=100)`)
9 |
--------------------------------------------------------------------------------
/site/docs/use-cases/_shared/_streaming.mdx:
--------------------------------------------------------------------------------
1 | ### Streaming the Output
2 |
3 | Refer to the [Streaming](/docs/guides/streaming) guide for more information on streaming the output with OpenVINO GenAI.
4 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="cpp" showLineNumbers>
4 | {`#include "openvino/genai/image_generation/image2image_pipeline.hpp"
5 | #include "load_image.hpp"
6 | #include "imwrite.hpp"
7 |
8 | int main(int argc, char* argv[]) {
9 | const std::string models_path = argv[1], prompt = argv[2], image_path = argv[3];
10 |
11 | ov::Tensor input_image = utils::load_image(image_path);
12 |
13 | ov::genai::Image2ImagePipeline pipe(models_path, "${props.device || 'CPU'}");
14 | ov::Tensor generated_image = pipe.generate(prompt, input_image, ov::genai::strength(0.8f));
15 |
16 | imwrite("image.bmp", generated_image, true);
17 | }
18 | `}
19 | </CodeBlock>
20 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="python" showLineNumbers>
4 | {`import openvino_genai as ov_genai
5 | import openvino as ov
6 | from PIL import Image
7 | import numpy as np
8 |
9 | def read_image(path: str) -> ov.Tensor:
10 | pic = Image.open(path).convert("RGB")
11 | image_data = np.array(pic)[None]
12 | return ov.Tensor(image_data)
13 |
14 | input_image_data = read_image("input_image.jpg")
15 |
16 | pipe = ov_genai.Image2ImagePipeline(model_path, "${props.device || 'CPU'}")
17 | image_tensor = pipe.generate(prompt, image=input_image_data, strength=0.8)
18 |
19 | image = Image.fromarray(image_tensor.data[0])
20 | image.save("image.bmp")
21 | `}
22 | </CodeBlock>
23 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="cpp" showLineNumbers>
4 | {`#include "openvino/genai/image_generation/inpainting_pipeline.hpp"
5 | #include "load_image.hpp"
6 | #include "imwrite.hpp"
7 |
8 | int main(int argc, char* argv[]) {
9 | const std::string models_path = argv[1], prompt = argv[2];
10 |
11 | ov::Tensor input_image = utils::load_image(argv[3]);
12 | ov::Tensor mask_image = utils::load_image(argv[4]);
13 |
14 | ov::genai::InpaintingPipeline pipe(models_path, "${props.device || 'CPU'}");
15 | ov::Tensor generated_image = pipe.generate(prompt, input_image, mask_image);
16 |
17 | imwrite("image.bmp", generated_image, true);
18 | }
19 | `}
20 | </CodeBlock>
21 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="python" showLineNumbers>
4 | {`import openvino_genai as ov_genai
5 | import openvino as ov
6 | from PIL import Image
7 | import numpy as np
8 |
9 | def read_image(path: str) -> ov.Tensor:
10 | pic = Image.open(path).convert("RGB")
11 | image_data = np.array(pic)[None]
12 | return ov.Tensor(image_data)
13 |
14 | input_image_data = read_image("input_image.jpg")
15 | mask_image = read_image("mask.jpg")
16 |
17 | pipe = ov_genai.InpaintingPipeline(model_path, "${props.device || 'CPU'}")
18 | image_tensor = pipe.generate(prompt, image=input_image_data, mask_image=mask_image)
19 |
20 | image = Image.fromarray(image_tensor.data[0])
21 | image.save("image.bmp")
22 | `}
23 | </CodeBlock>
24 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="cpp" showLineNumbers>
4 | {`#include "openvino/genai/image_generation/text2image_pipeline.hpp"
5 | #include "imwrite.hpp"
6 |
7 | int main(int argc, char* argv[]) {
8 | const std::string models_path = argv[1], prompt = argv[2];
9 |
10 | ov::genai::Text2ImagePipeline pipe(models_path, "${props.device || 'CPU'}");
11 | ov::Tensor image = pipe.generate(prompt);
12 |
13 | imwrite("image.bmp", image, true);
14 | }
15 | `}
16 | </CodeBlock>
17 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="python" showLineNumbers>
4 | {`import openvino_genai as ov_genai
5 | from PIL import Image
6 |
7 | pipe = ov_genai.Text2ImagePipeline(model_path, "${props.device || 'CPU'}")
8 | image_tensor = pipe.generate(prompt)
9 |
10 | image = Image.fromarray(image_tensor.data[0])
11 | image.save("image.bmp")
12 | `}
13 | </CodeBlock>
14 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-generation/index.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 2
3 | ---
4 | import OptimumCLI from '@site/src/components/OptimumCLI';
5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
6 | import RunModelSection from './_sections/_run_model/index.mdx';
7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
8 |
9 | # Image Generation Using Diffusers
10 |
11 | <ConvertModelSection>
12 | Download and convert model (e.g. [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)) to OpenVINO format from Hugging Face:
13 |
14 | <OptimumCLI model='stabilityai/stable-diffusion-xl-base-1.0' outputDir='stable_diffusion_xl_base_1_0_ov' weightFormat='int4' trustRemoteCode />
15 |
16 | See all supported [Image Generation Models](/docs/supported-models/#image-generation-models).
17 | </ConvertModelSection>
18 |
19 | <RunModelSection />
20 |
21 | <UsageOptionsSection />
22 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="cpp" showLineNumbers>
4 | {`#include "openvino/genai/visual_language/pipeline.hpp"
5 | #include "load_image.hpp"
6 | #include <iostream>
7 |
8 | int main(int argc, char* argv[]) {
9 | std::string models_path = argv[1], images_path = argv[2];;
10 | std::vector<ov::Tensor> images = utils::load_images(images_path);
11 |
12 | ov::genai::VLMPipeline pipe(models_path, "${props.device || 'CPU'}");
13 | ov::genai::VLMDecodedResults result = pipe.generate(
14 | prompt,
15 | ov::genai::images(images),
16 | ov::genai::max_new_tokens(100)
17 | );
18 | std::cout << result.texts[0] << std::endl;
19 | }
20 | `}
21 | </CodeBlock>
22 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="python" showLineNumbers>
4 | {`import openvino_genai as ov_genai
5 | import openvino as ov
6 | from PIL import Image
7 | import numpy as np
8 | from pathlib import Path
9 |
10 | def read_image(path: str) -> ov.Tensor:
11 | pic = Image.open(path).convert("RGB")
12 | image_data = np.array(pic)[None]
13 | return ov.Tensor(image_data)
14 |
15 | def read_images(path: str) -> list[ov.Tensor]:
16 | entry = Path(path)
17 | if entry.is_dir():
18 | return [read_image(str(file)) for file in sorted(entry.iterdir())]
19 | return [read_image(path)]
20 |
21 | images = read_images("./images")
22 |
23 | pipe = ov_genai.VLMPipeline(model_path, "${props.device || 'CPU'}")
24 | result = pipe.generate(prompt, images=images, max_new_tokens=100)
25 | print(result.texts[0])
26 | `}
27 | </CodeBlock>
28 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx:
--------------------------------------------------------------------------------
1 | import CodeExampleCPP from './_code_example_cpp.mdx';
2 | import CodeExamplePython from './_code_example_python.mdx';
3 |
4 | ## Run Model Using OpenVINO GenAI
5 |
6 | OpenVINO GenAI introduces the [`VLMPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.VLMPipeline.html) pipeline for inference of multimodal text-generation Vision Language Models (VLMs).
7 | It can generate text from a text prompt and images as inputs.
8 |
9 | <LanguageTabs>
10 | <TabItemPython>
11 | <Tabs groupId="device">
12 | <TabItem label="CPU" value="cpu">
13 | <CodeExamplePython device="CPU" />
14 | </TabItem>
15 | <TabItem label="GPU" value="gpu">
16 | <CodeExamplePython device="GPU" />
17 | </TabItem>
18 | </Tabs>
19 | </TabItemPython>
20 | <TabItemCpp>
21 | <Tabs groupId="device">
22 | <TabItem label="CPU" value="cpu">
23 | <CodeExampleCPP device="CPU" />
24 | </TabItem>
25 | <TabItem label="GPU" value="gpu">
26 | <CodeExampleCPP device="GPU" />
27 | </TabItem>
28 | </Tabs>
29 | </TabItemCpp>
30 | </LanguageTabs>
31 |
32 | :::tip
33 |
34 | Use CPU or GPU as devices without any other code change.
35 |
36 | :::
37 |
--------------------------------------------------------------------------------
/site/docs/use-cases/image-processing/index.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 4
3 | ---
4 | import OptimumCLI from '@site/src/components/OptimumCLI';
5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
6 | import RunModelSection from './_sections/_run_model/index.mdx';
7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
8 |
9 | # Image Processing Using VLMs
10 |
11 | <ConvertModelSection>
12 | Download and convert model (e.g. [openbmb/MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)) to OpenVINO format from Hugging Face:
13 |
14 | <OptimumCLI model='openbmb/MiniCPM-V-2_6' outputDir='MiniCPM_V_2_6_ov' weightFormat='int4' trustRemoteCode />
15 |
16 | See all supported [Visual Language Models](/docs/supported-models/#visual-language-models-vlms).
17 | </ConvertModelSection>
18 |
19 | <RunModelSection />
20 |
21 | <UsageOptionsSection />
22 |
--------------------------------------------------------------------------------
/site/docs/use-cases/speech-recognition/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="cpp" showLineNumbers>
4 | {`#include "openvino/genai/whisper_pipeline.hpp"
5 | #include "audio_utils.hpp"
6 | #include <iostream>
7 |
8 | int main(int argc, char* argv[]) {
9 | std::filesystem::path models_path = argv[1];
10 | std::string wav_file_path = argv[2];
11 |
12 | ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
13 |
14 | ov::genai::WhisperPipeline pipe(models_path, "${props.device || 'CPU'}");
15 | auto result = pipe.generate(raw_speech, ov::genai::max_new_tokens(100));
16 | std::cout << result << std::endl;
17 | }
18 | `}
19 | </CodeBlock>
20 |
--------------------------------------------------------------------------------
/site/docs/use-cases/speech-recognition/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="python" showLineNumbers>
4 | {`import openvino_genai as ov_genai
5 | import librosa
6 |
7 | def read_wav(filepath):
8 | raw_speech, samplerate = librosa.load(filepath, sr=16000)
9 | return raw_speech.tolist()
10 |
11 | raw_speech = read_wav('sample.wav')
12 |
13 | pipe = ov_genai.WhisperPipeline(model_path, "${props.device || 'CPU'}")
14 | result = pipe.generate(raw_speech, max_new_tokens=100)
15 | print(result)
16 | `}
17 | </CodeBlock>
18 |
--------------------------------------------------------------------------------
/site/docs/use-cases/speech-recognition/index.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 3
3 | ---
4 | import OptimumCLI from '@site/src/components/OptimumCLI';
5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
6 | import RunModelSection from './_sections/_run_model/index.mdx';
7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
8 |
9 | # Speech Recognition Using Whisper
10 |
11 | <ConvertModelSection>
12 | Download and convert model (e.g. [openai/whisper-base](https://huggingface.co/openai/whisper-base)) to OpenVINO format from Hugging Face:
13 |
14 | <OptimumCLI model='openai/whisper-base' outputDir='whisper_ov' trustRemoteCode />
15 |
16 | See all supported [Speech Recognition Models](/docs/supported-models/#speech-recognition-models-whisper-based).
17 | </ConvertModelSection>
18 |
19 | <RunModelSection />
20 |
21 | <UsageOptionsSection />
22 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-embedding/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="cpp" showLineNumbers>
4 | {`#include "openvino/genai/rag/text_embedding_pipeline.hpp"
5 |
6 | int main(int argc, char* argv[]) try {
7 | auto documents = std::vector<std::string>(argv + 2, argv + argc);
8 | std::string models_path = argv[1];
9 |
10 | ov::genai::TextEmbeddingPipeline pipeline(
11 | models_path,
12 | "${props.device || 'CPU'}",
13 | ov::genai::pooling_type(ov::genai::TextEmbeddingPipeline::PoolingType::MEAN),
14 | ov::genai::normalize(true)
15 | );
16 |
17 | ov::genai::EmbeddingResults documents_embeddings = pipeline.embed_documents(documents);
18 | ov::genai::EmbeddingResult query_embedding = pipeline.embed_query("What is the capital of France?");
19 | }
20 | `}
21 | </CodeBlock>
22 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-embedding/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="python" showLineNumbers>
4 | {`import openvino_genai as ov_genai
5 |
6 | pipeline = ov_genai.TextEmbeddingPipeline(
7 | models_path,
8 | "${props.device || 'CPU'}",
9 | pooling_type = ov_genai.TextEmbeddingPipeline.PoolingType.MEAN,
10 | normalize = True
11 | )
12 |
13 | documents_embeddings = pipeline.embed_documents(documents)
14 | query_embeddings = pipeline.embed_query("What is the capital of France?")
15 | `}
16 | </CodeBlock>
17 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-embedding/_sections/_run_model/index.mdx:
--------------------------------------------------------------------------------
1 | import CodeExampleCPP from './_code_example_cpp.mdx';
2 | import CodeExamplePython from './_code_example_python.mdx';
3 |
4 | ## Run Model Using OpenVINO GenAI
5 |
6 | [`TextEmbeddingPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.TextEmbeddingPipeline.html) generates vector representations for text using embedding models.
7 |
8 | <LanguageTabs>
9 | <TabItemPython>
10 | <Tabs groupId="device">
11 | <TabItem label="CPU" value="cpu">
12 | <CodeExamplePython device="CPU" />
13 | </TabItem>
14 | <TabItem label="GPU" value="gpu">
15 | <CodeExamplePython device="GPU" />
16 | </TabItem>
17 | </Tabs>
18 | </TabItemPython>
19 | <TabItemCpp>
20 | <Tabs groupId="device">
21 | <TabItem label="CPU" value="cpu">
22 | <CodeExampleCPP device="CPU" />
23 | </TabItem>
24 | <TabItem label="GPU" value="gpu">
25 | <CodeExampleCPP device="GPU" />
26 | </TabItem>
27 | </Tabs>
28 | </TabItemCpp>
29 | </LanguageTabs>
30 |
31 | :::tip
32 | Use CPU or GPU as devices without any other code change.
33 | :::
34 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-embedding/index.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 4
3 | ---
4 | import OptimumCLI from '@site/src/components/OptimumCLI';
5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
6 | import RunModelSection from './_sections/_run_model/index.mdx';
7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
8 |
9 | # Semantic Search using Text Embedding
10 |
11 | <ConvertModelSection>
12 | Download and convert a text embedding model (e.g. [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5)) to OpenVINO format from Hugging Face:
13 |
14 | <OptimumCLI model='BAAI/bge-small-en-v1.5' outputDir='bge-small-en-v1_5_ov' trustRemoteCode />
15 |
16 | See all supported [Text Embedding Models](/docs/supported-models/#text-embeddings-models).
17 | </ConvertModelSection>
18 |
19 | <RunModelSection />
20 |
21 | <UsageOptionsSection />
22 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="cpp" showLineNumbers>
4 | {`#include "openvino/genai/llm_pipeline.hpp"
5 | #include <iostream>
6 |
7 | int main(int argc, char* argv[]) {
8 | std::string models_path = argv[1];
9 | ov::genai::LLMPipeline pipe(model_path, "${props.device || 'CPU'}");
10 | std::cout << pipe.generate("What is OpenVINO?", ov::genai::max_new_tokens(100)) << '\\n';
11 | }
12 | `}
13 | </CodeBlock>
14 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="python" showLineNumbers>
4 | {`import openvino_genai as ov_genai
5 |
6 | pipe = ov_genai.LLMPipeline(model_path, "${props.device || 'CPU'}")
7 | print(pipe.generate("What is OpenVINO?", max_new_tokens=100))
8 | `}
9 | </CodeBlock>
10 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_run_model/index.mdx:
--------------------------------------------------------------------------------
1 | import CodeExampleCPP from './_code_example_cpp.mdx';
2 | import CodeExamplePython from './_code_example_python.mdx';
3 |
4 | ## Run Model Using OpenVINO GenAI
5 |
6 | [`LLMPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.LLMPipeline.html) is the main object used for decoding. You can construct it straight away from the folder with the converted model.
7 | It will automatically load the main model, tokenizer, detokenizer and default generation configuration.
8 |
9 | <LanguageTabs>
10 | <TabItemPython>
11 | <Tabs groupId="device">
12 | <TabItem label="CPU" value="cpu">
13 | <CodeExamplePython device="CPU" />
14 | </TabItem>
15 | <TabItem label="GPU" value="gpu">
16 | <CodeExamplePython device="GPU" />
17 | </TabItem>
18 | </Tabs>
19 | </TabItemPython>
20 | <TabItemCpp>
21 | <Tabs groupId="device">
22 | <TabItem label="CPU" value="cpu">
23 | <CodeExampleCPP device="CPU" />
24 | </TabItem>
25 | <TabItem label="GPU" value="gpu">
26 | <CodeExampleCPP device="GPU" />
27 | </TabItem>
28 | </Tabs>
29 | </TabItemCpp>
30 | </LanguageTabs>
31 |
32 | :::tip
33 |
34 | Use CPU or GPU as devices without any other code change.
35 |
36 | :::
37 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_usage_options/_lora_adapters.mdx:
--------------------------------------------------------------------------------
1 | ### Working with LoRA Adapters
2 |
3 | LoRA adapters can be used to customize LLM outputs for specific tasks or styles.
4 | In text generation, adapters can help models perform better at particular activities like coding, creative writing, or domain-specific knowledge.
5 |
6 | Refer to the [LoRA Adapters](/docs/guides/lora-adapters.mdx) for more details on working with LoRA adapters.
7 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx:
--------------------------------------------------------------------------------
1 | import ChatScenario from '@site/docs/use-cases/_shared/_chat_scenario.mdx';
2 | import Streaming from '@site/docs/use-cases/_shared/_streaming.mdx';
3 | import GenerationParameters from './_generation_parameters.mdx';
4 | import LoraAdapters from './_lora_adapters.mdx';
5 | import SpeculativeDecoding from './_speculative_decoding.mdx';
6 |
7 | ## Additional Usage Options
8 |
9 | :::tip
10 | Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/text_generation) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/text_generation) text generation samples.
11 | :::
12 |
13 | <GenerationParameters />
14 |
15 | <ChatScenario />
16 |
17 | <Streaming />
18 |
19 | <LoraAdapters />
20 |
21 | <SpeculativeDecoding />
22 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-generation/index.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 1
3 | ---
4 | import OptimumCLI from '@site/src/components/OptimumCLI';
5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
6 | import RunModelSection from './_sections/_run_model/index.mdx';
7 | import UsageOptionsSection from './_sections/_usage_options/index.mdx';
8 |
9 | # Text Generation Using LLMs
10 |
11 | <ConvertModelSection>
12 | Download and convert model (e.g. [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0)) to OpenVINO format from Hugging Face:
13 |
14 | <OptimumCLI model='TinyLlama/TinyLlama-1.1B-Chat-v1.0' outputDir='TinyLlama_1_1b_v1_ov' weightFormat='int4' trustRemoteCode />
15 |
16 | See all supported [Large Language Models](/docs/supported-models/#large-language-models-llms).
17 | </ConvertModelSection>
18 |
19 | <RunModelSection />
20 |
21 | <UsageOptionsSection />
22 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-rerank/_sections/_run_model/_code_example_cpp.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="cpp" showLineNumbers>
4 | {`#include "openvino/genai/rag/text_rerank_pipeline.hpp"
5 |
6 | int main(int argc, char* argv[]) {
7 | std::vector<std::string> documents(argv + 3, argv + argc);
8 | std::string models_path = argv[1], query = argv[2];
9 |
10 | ov::genai::TextRerankPipeline pipeline(models_path, "${props.device || 'CPU'}", ov::genai::top_n(3));
11 |
12 | auto rerank_result = pipeline.rerank(query, documents);
13 |
14 | std::cout << "Reranked documents:\\n";
15 | for (const auto& [index, score] : rerank_result) {
16 | std::cout << "Document " << index << " (score: " << score << "): " << documents[index] << '\\n';
17 | }
18 | }
19 | `}
20 | </CodeBlock>
21 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-rerank/_sections/_run_model/_code_example_python.mdx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | <CodeBlock language="python" showLineNumbers>
4 | {`import openvino_genai
5 |
6 | pipeline = openvino_genai.TextRerankPipeline(model_path, "${props.device || 'CPU'}", top_n=3)
7 |
8 | rerank_result = pipeline.rerank(query, documents)
9 |
10 | print("Reranked documents:")
11 | for index, score in rerank_result:
12 | print(f"Document {index} (score: {score:.4f}): {documents[index]}")
13 | `}
14 | </CodeBlock>
15 |
--------------------------------------------------------------------------------
/site/docs/use-cases/text-rerank/_sections/_run_model/index.mdx:
--------------------------------------------------------------------------------
1 | import CodeExampleCPP from './_code_example_cpp.mdx';
2 | import CodeExamplePython from './_code_example_python.mdx';
3 |
4 | ## Run Model Using OpenVINO GenAI
5 |
6 | The `TextRerankPipeline` enables you to reorder candidate documents or passages by semantic relevance to a query using a cross-encoder or reranker model. You can control how many top results are returned using the `top_n` parameter.
7 |
8 | <LanguageTabs>
9 | <TabItemPython>
10 | <Tabs groupId="device">
11 | <TabItem label="CPU" value="cpu">
12 | <CodeExamplePython device="CPU" />
13 | </TabItem>
14 | <TabItem label="GPU" value="gpu">
15 | <CodeExamplePython device="GPU" />
16 | </TabItem>
17 | </Tabs>
18 | </TabItemPython>
19 | <TabItemCpp>
20 | <Tabs groupId="device">
21 | <TabItem label="CPU" value="cpu">
22 | <CodeExampleCPP device="CPU" />
23 | </TabItem>
24 | <TabItem label="GPU" value="gpu">
25 | <CodeExampleCPP device="GPU" />
26 | </TabItem>
27 | </Tabs>
28 | </TabItemCpp>
29 | </LanguageTabs>
--------------------------------------------------------------------------------
/site/docs/use-cases/text-rerank/index.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 2
3 | ---
4 | import OptimumCLI from '@site/src/components/OptimumCLI';
5 | import ConvertModelSection from '../_shared/_convert_model.mdx';
6 | import RunModelSection from './_sections/_run_model/index.mdx';
7 |
8 | # Text Reranking
9 |
10 | <ConvertModelSection>
11 | Download and convert a reranker model (e.g. [cross-encoder/ms-marco-MiniLM-L6-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2)) to OpenVINO format from Hugging Face:
12 |
13 | <OptimumCLI model='cross-encoder/ms-marco-MiniLM-L6-v2' outputDir='cross-encoder/ms-marco-MiniLM-L6-v2' trustRemoteCode />
14 |
15 | See all supported [Reranker Models](/docs/supported-models/#text-rerank-models).
16 | </ConvertModelSection>
17 |
18 | <RunModelSection />
19 |
--------------------------------------------------------------------------------
/site/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import pluginJs from '@eslint/js';
2 | import pluginReact from 'eslint-plugin-react';
3 | import globals from 'globals';
4 | import tsEslint from 'typescript-eslint';
5 |
6 | import { FlatCompat } from '@eslint/eslintrc';
7 | import path from 'path';
8 | import { fileURLToPath } from 'url';
9 |
10 | // mimic CommonJS variables -- not needed if using CommonJS
11 | const __filename = fileURLToPath(import.meta.url);
12 | const __dirname = path.dirname(__filename);
13 |
14 | const compat = new FlatCompat({
15 | baseDirectory: __dirname,
16 | });
17 |
18 | /** @type {import('eslint').Linter.Config[]} */
19 | export default [
20 | { files: ['**/*.{js,mjs,cjs,ts,jsx,tsx}'] },
21 | { ignores: ['node_modules/', '.docusaurus/'] },
22 | {
23 | languageOptions: {
24 | ...pluginReact.configs.flat.recommended.languageOptions,
25 | globals: { ...globals.browser, ...globals.node },
26 | },
27 | },
28 | pluginJs.configs.recommended,
29 | ...tsEslint.configs.recommended,
30 | pluginReact.configs.flat.recommended,
31 | pluginReact.configs.flat['jsx-runtime'],
32 | ...compat.extends('plugin:@docusaurus/recommended'),
33 | { settings: { react: { version: 'detect' } } },
34 | ];
35 |
--------------------------------------------------------------------------------
/site/sidebars.ts:
--------------------------------------------------------------------------------
1 | import type { SidebarsConfig } from '@docusaurus/plugin-content-docs';
2 |
3 | // This runs in Node.js - Don't use client-side code here (browser APIs, JSX...)
4 |
5 | /**
6 | * Creating a sidebar enables you to:
7 | - create an ordered group of docs
8 | - render a sidebar for each doc of that group
9 | - provide next/previous navigation
10 |
11 | The sidebars can be generated from the filesystem, or explicitly defined here.
12 |
13 | Create as many sidebars as you want.
14 | */
15 | const sidebars: SidebarsConfig = {
16 | // By default, Docusaurus generates a sidebar from the docs folder structure
17 | genaiDocsSidebar: [
18 | {
19 | type: 'autogenerated',
20 | dirName: '.',
21 | },
22 | ],
23 | };
24 |
25 | export default sidebars;
26 |
--------------------------------------------------------------------------------
/site/src/components/Button/styles.module.css:
--------------------------------------------------------------------------------
1 | :global(.button) {
2 | display: inline-flex;
3 | align-items: center;
4 | justify-content: center;
5 |
6 | --button-icon-size: calc(1.5rem * var(--ifm-button-size-multiplier));
7 |
8 | .buttonIcon {
9 | display: inline-flex;
10 | height: var(--button-icon-size);
11 | width: var(--button-icon-size);
12 | margin-right: 1rem;
13 |
14 | svg {
15 | height: 100%;
16 | width: 100%;
17 | }
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/site/src/components/Carousel/styles.module.css:
--------------------------------------------------------------------------------
1 | .carousel {
2 | position: relative;
3 | padding: 0 2rem;
4 |
5 | .slidesWrapper {
6 | overflow: hidden;
7 | }
8 |
9 | .slidesContainer {
10 | display: flex;
11 | transition: transform 0.5s ease-in-out;
12 |
13 | .slide {
14 | display: flex;
15 | align-items: center;
16 | justify-content: center;
17 | padding: 0 1rem;
18 | }
19 | }
20 | }
21 |
22 | .chevron {
23 | position: absolute;
24 | top: 50%;
25 | transform: translateY(-50%);
26 | background-color: transparent;
27 | border: none;
28 | width: 30px;
29 | height: 30px;
30 | display: flex;
31 | align-items: center;
32 | justify-content: center;
33 | cursor: pointer;
34 | transition: all 0.3s ease;
35 | z-index: 2;
36 | color: white;
37 | padding: 0;
38 | }
39 |
40 | .chevronLeft {
41 | left: 0px;
42 | }
43 |
44 | .chevronRight {
45 | right: 0px;
46 | }
47 |
48 | .pagination {
49 | display: flex;
50 | justify-content: center;
51 | gap: 8px;
52 | padding-bottom: 5px;
53 |
54 | .dot {
55 | padding: 0;
56 | width: 8px;
57 | height: 8px;
58 | border-radius: 50%;
59 | border: none;
60 | cursor: pointer;
61 | transition: all 0.3s ease;
62 | opacity: 0.5;
63 | }
64 |
65 | .dotActive {
66 | opacity: 1;
67 | transform: scale(1.3);
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/site/src/components/OptimumCLI/index.tsx:
--------------------------------------------------------------------------------
1 | import CodeBlock from '@theme/CodeBlock';
2 |
3 | type OptimumCLIProps = {
4 | model?: string;
5 | outputDir?: string;
6 | weightFormat?: 'fp32' | 'fp16' | 'int8' | 'int4';
7 | task?: string;
8 | trustRemoteCode?: boolean;
9 | };
10 |
11 | export default function OptimumCLI({
12 | model = '<model_id_or_path>',
13 | outputDir = '<output_dir>',
14 | weightFormat,
15 | task,
16 | trustRemoteCode,
17 | }: OptimumCLIProps): React.JSX.Element {
18 | const args = [`--model ${model}`];
19 | if (weightFormat) {
20 | args.push(`--weight-format ${weightFormat}`);
21 | }
22 | if (task) {
23 | args.push(`--task ${task}`);
24 | }
25 | if (trustRemoteCode) {
26 | args.push('--trust-remote-code');
27 | }
28 | return (
29 | <CodeBlock language="bash">{`optimum-cli export openvino ${args.join(
30 | ' '
31 | )} ${outputDir}`}</CodeBlock>
32 | );
33 | }
34 |
--------------------------------------------------------------------------------
/site/src/css/breadcrumbs.css:
--------------------------------------------------------------------------------
1 | .breadcrumbs > .breadcrumbs__item:first-child {
2 | display: none;
3 | }
4 |
5 | .breadcrumbs__link {
6 | font-size: 14px;
7 | }
8 |
9 | .breadcrumbs__item:not(:last-child)::after {
10 | background: none;
11 | content: '/';
12 | margin: 0;
13 | }
14 |
--------------------------------------------------------------------------------
/site/src/css/footer.css:
--------------------------------------------------------------------------------
1 | .footer {
2 | --ifm-footer-background-color: #f9f9f9;
3 | --ifm-footer-title-color: var(--genai-color-text-black);
4 | --ifm-footer-link-color: var(--genai-color-classic-blue);
5 | --ifm-footer-color: var(--genai-color-text-black);
6 | --ifm-footer-link-hover-color: var(--genai-color-darker-classic-blue);
7 | }
8 |
9 | .footer__links {
10 | padding: 0 80px;
11 | margin-bottom: 2rem;
12 | }
13 |
14 | .footer__col {
15 | display: grid;
16 | justify-content: center;
17 | }
18 |
19 | .footer__copyright {
20 | white-space: pre-wrap;
21 | font-size: 12px;
22 | }
23 |
--------------------------------------------------------------------------------
/site/src/css/menu.css:
--------------------------------------------------------------------------------
1 | .menu__link {
2 | font-size: 0.875rem;
3 | position: relative;
4 | }
5 |
6 | .menu__link--active:not(.menu__link--sublist) {
7 | --ifm-menu-color-active: var(--genai-color-classic-blue);
8 | }
9 |
10 | .menu__link--sublist-caret:after,
11 | .menu__caret:before {
12 | --ifm-menu-link-sublist-icon: url('../../static/img/chevron-up.svg');
13 | background: var(--ifm-menu-link-sublist-icon);
14 | background-position: center;
15 | background-repeat: no-repeat;
16 | }
17 |
--------------------------------------------------------------------------------
/site/src/css/navbar.css:
--------------------------------------------------------------------------------
1 | :root {
2 | --search-local-input-active-border-color: var(--genai-color-white);
3 |
4 | --ifm-navbar-link-color: var(--genai-color-text-white);
5 | --ifm-navbar-link-hover-color: var(--genai-text-color-white-hover);
6 | --ifm-navbar-link-active-color: var(--genai-color-blue-energy);
7 | }
8 |
9 | .navbar {
10 | background: linear-gradient(90deg, #27317f 6.98%, #1a3483 46.6%, #02227c 79.11%);
11 | width: 100%;
12 | }
13 |
14 | .navbar__items {
15 | margin-left: var(--ifm-navbar-height);
16 | }
17 |
18 | .navbar__inner {
19 | align-items: center;
20 | }
21 |
22 | .navbar__logo {
23 | top: 0;
24 | left: 0;
25 | height: 100%;
26 | position: absolute;
27 | }
28 |
29 | .navbar__link--active:not(:has(~ .navbar__link--active)) {
30 | --ifm-navbar-link-hover-color: var(--genai-color-blue-energy);
31 | }
32 |
33 | .navbar__search-input {
34 | border-radius: 3px;
35 | font-size: 14px;
36 | font-family: var(--genai-font-family);
37 | color: var(--genai-color-text-white);
38 | width: 238px;
39 | background: #122f84 url('../../static/img/magnifying-glass.svg') no-repeat 0.75rem center / 1rem
40 | 1rem;
41 |
42 | &::placeholder {
43 | color: var(--genai-color-text-white);
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/site/src/css/toc.css:
--------------------------------------------------------------------------------
1 | .table-of-contents__link {
2 | position: relative;
3 | padding: var(--ifm-menu-link-padding-vertical) var(--ifm-menu-link-padding-horizontal);
4 | }
5 |
6 | .table-of-contents__link--active {
7 | background-color: var(--ifm-menu-color-background-active);
8 | color: var(--genai-color-classic-blue);
9 | }
10 |
--------------------------------------------------------------------------------
/site/src/css/typography.css:
--------------------------------------------------------------------------------
1 | :root {
2 | --global-font-apple-system: -apple-system;
3 | --global-font-blink-mac-system-font: BlinkMacSystemFont;
4 | --global-font-segoe-ui: 'Segoe UI';
5 | --global-font-roboto: Roboto;
6 | --global-font-helvetica: Helvetica;
7 | --global-font-arial: Arial;
8 | --global-font-sans-serif: sans-serif;
9 | --global-font-apple-color-emoji: 'Apple Color Emoji';
10 | --global-font-segoe-ui-emoji: 'Segoe UI Emoji';
11 | --global-font-segoe-ui-symbol: 'Segoe UI Symbol';
12 |
13 | --genai-font-family: var(--global-font-apple-system), var(--global-font-blink-mac-system-font),
14 | var(--global-font-segoe-ui), var(--global-font-roboto), var(--global-font-helvetica),
15 | var(--global-font-arial), var(--global-font-sans-serif), var(--global-font-apple-color-emoji),
16 | var(--global-font-segoe-ui-emoji), var(--global-font-segoe-ui-symbol);
17 | }
18 |
--------------------------------------------------------------------------------
/site/src/hooks/use-screen-size.ts:
--------------------------------------------------------------------------------
1 | import ExecutionEnvironment from '@docusaurus/ExecutionEnvironment';
2 | import { useEffect, useState } from 'react';
3 |
4 | const useScreenSize = () => {
5 | if (!ExecutionEnvironment.canUseViewport) {
6 | return {
7 | width: 0,
8 | height: 0,
9 | };
10 | }
11 |
12 | const [screenSize, setScreenSize] = useState({
13 | width: window.innerWidth,
14 | height: window.innerHeight,
15 | });
16 |
17 | useEffect(() => {
18 | const handleResize = () => {
19 | setScreenSize({
20 | width: window.innerWidth,
21 | height: window.innerHeight,
22 | });
23 | };
24 |
25 | window.addEventListener('resize', handleResize);
26 |
27 | return () => {
28 | window.removeEventListener('resize', handleResize);
29 | };
30 | }, []);
31 |
32 | return screenSize;
33 | };
34 |
35 | export default useScreenSize;
36 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/FeaturesSection/FeatureItem/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import styles from './styles.module.css';
3 |
4 | import Heading from '@theme/Heading';
5 |
6 | type FeatureItemProps = {
7 | icon: string;
8 | title: string;
9 | children: React.ReactNode;
10 | };
11 |
12 | export const FeatureItem: React.FC<FeatureItemProps> = ({ icon, title, children }) => (
13 | <div className={styles.benefitItem}>
14 | <span className={styles.icon}>{icon}</span>
15 | <Heading as="h3" className={styles.title}>
16 | {title}
17 | </Heading>
18 | <p className={styles.description}>{children}</p>
19 | </div>
20 | );
21 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/FeaturesSection/FeatureItem/styles.module.css:
--------------------------------------------------------------------------------
1 | .benefitItem {
2 | display: flex;
3 | flex-direction: column;
4 | align-items: center;
5 | padding: 0rem 2rem;
6 |
7 | .icon {
8 | font-size: 3rem;
9 | }
10 |
11 | .title {
12 | margin-bottom: 0.75rem;
13 | }
14 |
15 | .description {
16 | margin: 0;
17 | text-align: center;
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/FeaturesSection/styles.module.css:
--------------------------------------------------------------------------------
1 | .featuresSection {
2 | composes: section from '../section-styles.module.css';
3 | }
4 |
5 | .sectionTitle {
6 | composes: sectionTitle from '../section-styles.module.css';
7 | }
8 |
9 | .sectionContent {
10 | composes: sectionContent from '../section-styles.module.css';
11 |
12 | display: grid;
13 | grid-template-columns: repeat(auto-fit, minmax(340px, 1fr));
14 | width: 100%;
15 | justify-content: center;
16 | gap: 1rem;
17 | }
18 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/HeroSection/PipelinesCarousel/styles.module.css:
--------------------------------------------------------------------------------
1 | .pipelineExample {
2 | display: flex;
3 | flex-direction: column;
4 | flex-grow: 1;
5 |
6 | .pipelineHeader {
7 | display: flex;
8 | align-items: center;
9 | gap: 0.5rem;
10 | margin-bottom: 0.75rem;
11 |
12 | .pipelineTitle {
13 | margin: 0;
14 | color: var(--genai-color-text-white);
15 | font-weight: 500;
16 | font-size: 14px;
17 | }
18 | }
19 |
20 | .pipelineCode {
21 | font-size: 0.75rem;
22 |
23 | pre code {
24 | padding: 0.5rem;
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/HeroSection/index.tsx:
--------------------------------------------------------------------------------
1 | import Heading from '@theme/Heading';
2 |
3 | import Button from '@site/src/components/Button';
4 | import OpenVINOLogo from '@site/static/img/openvino.svg';
5 |
6 | import PipelinesCarousel from './PipelinesCarousel';
7 | import styles from './styles.module.css';
8 |
9 | export const HeroSection = () => (
10 | <section className={styles.heroSection}>
11 | <Heading as="h1" className={styles.sectionTitle}>
12 | <OpenVINOLogo role="img" title="OpenVINO" />
13 | <span className={styles.genAITitle}>GenAI</span>
14 | </Heading>
15 | <div className={styles.sectionContent}>
16 | <p className={styles.subtitle}>Run Generative AI with ease</p>
17 | <p className={styles.description}>
18 | OpenVINO™ GenAI provides optimized pipelines for running generative AI models with maximum
19 | performance and minimal dependencies
20 | </p>
21 | <Button
22 | label="Get Started"
23 | link="/docs/getting-started/introduction"
24 | size="lg"
25 | variant="secondary"
26 | className={styles.getStartedButton}
27 | />
28 | <PipelinesCarousel className={styles.pipelinesCarousel} />
29 | </div>
30 | </section>
31 | );
32 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/InstallSection/styles.module.css:
--------------------------------------------------------------------------------
1 | .installSection {
2 | composes: section from '../section-styles.module.css';
3 | }
4 |
5 | .sectionTitle {
6 | composes: sectionTitle from '../section-styles.module.css';
7 | }
8 |
9 | .sectionContent {
10 | composes: sectionContent from '../section-styles.module.css';
11 |
12 | display: flex;
13 | flex-direction: column;
14 | gap: 2rem;
15 | align-items: center;
16 |
17 | .sectionDescription {
18 | margin-bottom: 0;
19 | text-align: center;
20 | }
21 |
22 | .quickInstall,
23 | .os {
24 | display: flex;
25 | flex-direction: column;
26 | align-items: center;
27 |
28 | h3 {
29 | font-weight: 500;
30 | }
31 | }
32 |
33 | .quickInstallCommand {
34 | margin-top: 0.5rem;
35 | margin-bottom: 0;
36 | min-width: 365px;
37 | }
38 |
39 | .osList {
40 | display: flex;
41 | flex-direction: row;
42 | gap: 2.5rem;
43 |
44 | .osItem {
45 | display: flex;
46 | flex-direction: column;
47 | align-items: center;
48 | padding: 1rem;
49 | gap: 0.5rem;
50 |
51 | .osItemIcon {
52 | height: 2.5rem;
53 | }
54 |
55 | .osItemTitle {
56 | font-weight: 500;
57 | }
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/UseCasesSection/index.tsx:
--------------------------------------------------------------------------------
1 | import styles from './styles.module.css';
2 |
3 | import Heading from '@theme/Heading';
4 |
5 | import Link from '@docusaurus/Link';
6 | import { ImageGeneration } from './components/image-generation';
7 | import { ImageProcessing } from './components/image-processing';
8 | import { SpeechRecognition } from './components/speech-recognition';
9 | import { TextGeneration } from './components/text-generation';
10 | import { TextRerank } from './components/text-rerank';
11 | import { TextEmbedding } from './components/text-embedding';
12 |
13 | export const UseCasesSection = () => (
14 | <section className={styles.useCasesSection}>
15 | <Heading as="h2" className={styles.sectionTitle}>
16 | Use Cases
17 | </Heading>
18 | <div className={styles.sectionContent}>
19 | <TextGeneration />
20 | <ImageGeneration />
21 | <SpeechRecognition />
22 | <ImageProcessing />
23 | <TextEmbedding />
24 | <TextRerank />
25 | </div>
26 | <div className={styles.useCasesFooter}>
27 | <strong>Looking for more?</strong> See all{' '}
28 | <Link to="docs/category/use-cases">supported use cases</Link>.
29 | </div>
30 | </section>
31 | );
32 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/UseCasesSection/styles.module.css:
--------------------------------------------------------------------------------
1 | .useCasesSection {
2 | composes: section from '../section-styles.module.css';
3 |
4 | background-color: var(--genai-color-section-bg);
5 | }
6 |
7 | .sectionTitle {
8 | composes: sectionTitle from '../section-styles.module.css';
9 | }
10 |
11 | .sectionContent {
12 | composes: sectionContent from '../section-styles.module.css';
13 |
14 | display: flex;
15 | flex-direction: column;
16 | gap: 2rem;
17 | }
18 |
19 | .useCasesFooter {
20 | margin-top: 1.5rem;
21 | font-size: 1rem;
22 |
23 | strong {
24 | font-weight: 600;
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/site/src/pages/_sections/section-styles.module.css:
--------------------------------------------------------------------------------
1 | .section {
2 | display: flex;
3 | flex-direction: column;
4 | align-items: center;
5 | padding: 2.5rem 2rem;
6 | }
7 |
8 | .sectionTitle {
9 | margin-bottom: 2rem;
10 | }
11 |
12 | .sectionContent {
13 | max-width: 1200px;
14 | margin: 0 auto;
15 | }
16 |
--------------------------------------------------------------------------------
/site/src/pages/index.tsx:
--------------------------------------------------------------------------------
1 | import Layout from '@theme/Layout';
2 |
3 | import { FeaturesSection } from './_sections/FeaturesSection';
4 | import { HeroSection } from './_sections/HeroSection';
5 | import { InstallSection } from './_sections/InstallSection';
6 | import { UseCasesSection } from './_sections/UseCasesSection';
7 |
8 | export default function Home() {
9 | return (
10 | <Layout description="Run Generative AI models with simple C++/Python API and using OpenVINO Runtime">
11 | <HeroSection />
12 | <FeaturesSection />
13 | <UseCasesSection />
14 | <InstallSection />
15 | </Layout>
16 | );
17 | }
18 |
--------------------------------------------------------------------------------
/site/src/theme/MDXComponents.tsx:
--------------------------------------------------------------------------------
1 | import Button from '@site/src/components/Button';
2 | import { LanguageTabs, TabItemCpp, TabItemPython } from '@site/src/components/LanguageTabs';
3 | import MDXComponents from '@theme-original/MDXComponents';
4 | import TabItem from '@theme/TabItem';
5 | import Tabs from '@theme/Tabs';
6 |
7 | export default {
8 | // Reusing the default mapping
9 | ...MDXComponents,
10 | // Theme components
11 | Tabs,
12 | TabItem,
13 | // Custom components
14 | Button,
15 | LanguageTabs,
16 | TabItemPython,
17 | TabItemCpp,
18 | };
19 |
--------------------------------------------------------------------------------
/site/src/types/images.d.ts:
--------------------------------------------------------------------------------
1 | declare module '*.webp' {
2 | const content: string;
3 | export default content;
4 | }
5 |
--------------------------------------------------------------------------------
/site/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/site/static/.nojekyll
--------------------------------------------------------------------------------
/site/static/img/background.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/site/static/img/background.webp
--------------------------------------------------------------------------------
/site/static/img/beam_idx-drop.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:701839c28ac1e05c1c9e23823c74a10149a343210192e51df36e563ff6e257e4
3 | size 5700875
4 |
--------------------------------------------------------------------------------
/site/static/img/beam_idx-fork.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:292753b30a2153c92cedf16672ba182a851ec30c95c309cdaca13173f00fe700
3 | size 6062552
4 |
--------------------------------------------------------------------------------
/site/static/img/chevron-right.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4fa971a830569f87b477186e7c96b36a2bb66cf76431e10027e2dcf92a2307e9
3 | size 513
4 |
--------------------------------------------------------------------------------
/site/static/img/chevron-up.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ad006a8bb6ea8f42b9c3bf6b73bf44f35379f5b03ccd2fd356caf1de8cb14b94
3 | size 697
4 |
--------------------------------------------------------------------------------
/site/static/img/favicon.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eef9d4468b26933cb3101f3ddcde43a05686152e286e38ecb986ae8755e589ae
3 | size 570
4 |
--------------------------------------------------------------------------------
/site/static/img/image.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f4d041afa0c579aa2274a785edd7cc89fc936f5b805aefae5e450c14295a250f
3 | size 954
4 |
--------------------------------------------------------------------------------
/site/static/img/intel-logo.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:53536031483e61bc6fd7024cfdcc59e24bb352ef28ce308af75a53b677fabf37
3 | size 4633
4 |
--------------------------------------------------------------------------------
/site/static/img/kv-cache-areas-diagram.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e2fa45a69b4db6e8293fd8e1da712c2970237ac98aab99d4b0d729379bbe49c6
3 | size 7143
4 |
--------------------------------------------------------------------------------
/site/static/img/linux-logo.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:6ad66b9369475567aac9ef50d3abf06fa3b9ae2d3ef7a392167862fbb3985068
3 | size 112029
4 |
--------------------------------------------------------------------------------
/site/static/img/lora.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:64a1113c00b6f37d78ce0e32713170b04a9367ca0a2a74b280d1d6f7ea9122e1
3 | size 18575
4 |
--------------------------------------------------------------------------------
/site/static/img/mac-os-logo.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:66717fe401ce5b14b7f0723088612c9c246e868948cec2c56fcf748e5dad7387
3 | size 2901
4 |
--------------------------------------------------------------------------------
/site/static/img/magnifying-glass.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4b37f714caeaf026cef1548bbd51c099419f617211a72bd1cb95b2c1c2fb2fca
3 | size 399
4 |
--------------------------------------------------------------------------------
/site/static/img/openvino-genai-workflow.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:703732cd6a85f2cbcfd0915d63c10483114f05b71b834d2228501700074d0053
3 | size 1053573
4 |
--------------------------------------------------------------------------------
/site/static/img/openvino.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:80a77851039175ddd926eebbe2a8b88ca7380a7bd0ee8b4028f103b873789425
3 | size 97028
4 |
--------------------------------------------------------------------------------
/site/static/img/sound-on.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7b5e5ec8be590d7b11399903a27eee1f94617f5c7c756b76f16f6dfc083768b5
3 | size 1391
4 |
--------------------------------------------------------------------------------
/site/static/img/stateful.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a6fb5ab9990c845eef8847bdf76799fcaefe0a9afa10fb9d07f6df4394a9e2ad
3 | size 129471
4 |
--------------------------------------------------------------------------------
/site/static/img/stateless.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:20904ff7a8793359b978cfcdc85c482e0764291af17b572936955f586e202ea9
3 | size 113440
4 |
--------------------------------------------------------------------------------
/site/static/img/structured_output_work_example.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2f97b3b73753dbe2849035b49fb20630131b6fae06972ecfb6b1b80680d4eeb6
3 | size 58388
4 |
--------------------------------------------------------------------------------
/site/static/img/text.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f5a1350d89c71005b91f06a78d7209891b41be871fcf83b6d574f76132a25870
3 | size 1449
4 |
--------------------------------------------------------------------------------
/site/static/img/trishape.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fd3ccb1d8489537c03832ad4f4f8d6ccacb29656787f3983e1f14663d1c0272a
3 | size 55058
4 |
--------------------------------------------------------------------------------
/site/static/img/windows-logo.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0ec8b8d5efed5d7f0b7f19271ecf40ecd5fe14bb5284dfc36ea340b6381206eb
3 | size 422
4 |
--------------------------------------------------------------------------------
/site/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | // This file is not used in compilation. It is here just for a nice editor experience.
3 | "extends": "@docusaurus/tsconfig",
4 | "compilerOptions": {
5 | "baseUrl": "."
6 | },
7 | "exclude": [
8 | ".docusaurus",
9 | "build"
10 | ]
11 | }
--------------------------------------------------------------------------------
/src/c/include/openvino/genai/c/visibility.h:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #ifndef OPENVINO_GENAI_EXTERN_C
7 | # ifdef __cplusplus
8 | # define OPENVINO_GENAI_EXTERN_C extern "C"
9 | # else
10 | # define OPENVINO_GENAI_EXTERN_C
11 | # endif
12 | #endif
13 |
14 | #if defined(_WIN32) || defined(__CYGWIN__)
15 | # ifdef openvino_genai_c_EXPORTS
16 | # define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C __declspec(dllexport)
17 | # else
18 | # define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C __declspec(dllimport)
19 | # endif
20 | #elif defined(__GNUC__) && (__GNUC__ >= 4) || defined(__clang__)
21 | # ifdef openvino_genai_c_EXPORTS
22 | # define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C __attribute__((visibility("default")))
23 | # else
24 | # define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C __attribute__((visibility("default")))
25 | # endif
26 | #else
27 | # define OPENVINO_GENAI_C_EXPORTS OPENVINO_GENAI_EXTERN_C
28 | #endif
29 |
--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/common_types.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <map>
7 |
8 | #include "openvino/core/core.hpp"
9 | #include <openvino/runtime/properties.hpp>
10 |
11 | namespace ov {
12 | namespace genai {
13 |
14 | /**
15 | * @brief A map of models for VLMPipeline constructor.
16 | * Key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
17 | * and value is a pair of model IR as string and weights as tensor.
18 | */
19 | using ModelsMap = std::map<std::string, std::pair<std::string, ov::Tensor>>;
20 |
21 | /**
22 | * @brief blob_path property defines a path to a directory containing compiled blobs previously exported with
23 | * `pipeline.export_model` method.
24 | *
25 | * Use of compiled blobs can significantly reduce model load time, especially for large models.
26 | */
27 | static constexpr ov::Property<std::filesystem::path> blob_path{"blob_path"};
28 |
29 | } // namespace genai
30 | } // namespace ov
31 |
--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "openvino/genai/image_generation/clip_text_model.hpp"
7 |
8 | namespace ov {
9 | namespace genai {
10 |
11 | class CLIPTextModelWithProjection : public CLIPTextModel {
12 | public:
13 | using CLIPTextModel::CLIPTextModel;
14 |
15 | std::shared_ptr<CLIPTextModel> clone() {
16 | OPENVINO_ASSERT((m_model != nullptr) ^ static_cast<bool>(m_request), "CLIPTextModelWithProjection must have exactly one of m_model or m_request initialized");
17 |
18 | std::shared_ptr<CLIPTextModelWithProjection> cloned = std::make_shared<CLIPTextModelWithProjection>(*this);
19 |
20 | if (m_model) {
21 | cloned->m_model = m_model->clone();
22 | } else {
23 | cloned->m_request = m_request.get_compiled_model().create_infer_request();
24 | }
25 |
26 | return cloned;
27 | }
28 |
29 | };
30 |
31 | } // namespace genai
32 | } // namespace ov
33 |
--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/image_generation/scheduler.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <filesystem>
7 |
8 | #include "openvino/genai/visibility.hpp"
9 | #include "openvino/core/deprecated.hpp"
10 |
11 | namespace ov {
12 | namespace genai {
13 |
14 | class OPENVINO_GENAI_EXPORTS Scheduler {
15 | public:
16 | enum Type {
17 | AUTO,
18 | LCM,
19 | DDIM,
20 | LMS_DISCRETE OPENVINO_ENUM_DEPRECATED("LMS_DISCRETE is deprecated. Please, select different scheduler type") = DDIM,
21 | EULER_DISCRETE,
22 | FLOW_MATCH_EULER_DISCRETE,
23 | PNDM,
24 | EULER_ANCESTRAL_DISCRETE
25 | };
26 |
27 | static std::shared_ptr<Scheduler> from_config(const std::filesystem::path& scheduler_config_path,
28 | Type scheduler_type = AUTO);
29 |
30 | virtual ~Scheduler();
31 | };
32 |
33 | } // namespace genai
34 | } // namespace ov
35 |
--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/speech_generation/speech_generation_perf_metrics.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <chrono>
7 | #include <map>
8 | #include <string>
9 | #include <vector>
10 |
11 | #include "openvino/genai/perf_metrics.hpp"
12 | #include "openvino/genai/visibility.hpp"
13 |
14 | namespace ov::genai {
15 |
16 | struct OPENVINO_GENAI_EXPORTS SpeechGenerationPerfMetrics : public PerfMetrics {
17 | size_t num_generated_samples = 0;
18 |
19 | void evaluate_statistics(std::optional<TimePoint> start_time = std::nullopt) override;
20 | };
21 | } // namespace ov::genai
22 |
--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/visibility.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "openvino/core/visibility.hpp"
7 |
8 | #ifdef openvino_genai_EXPORTS
9 | # define OPENVINO_GENAI_EXPORTS OPENVINO_CORE_EXPORTS
10 | #else
11 | # define OPENVINO_GENAI_EXPORTS OPENVINO_CORE_IMPORTS
12 | #endif // openvino_genai_EXPORTS
13 |
--------------------------------------------------------------------------------
/src/cpp/include/openvino/genai/visual_language/perf_metrics.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "openvino/genai/perf_metrics.hpp"
7 | #include "openvino/genai/visibility.hpp"
8 |
9 |
10 | namespace ov::genai {
11 |
12 | struct OPENVINO_GENAI_EXPORTS VLMRawPerfMetrics {
13 | /** @brief Duration of preparation of embeddings */
14 | std::vector<MicroSeconds> prepare_embeddings_durations;
15 | };
16 |
17 | struct OPENVINO_GENAI_EXPORTS VLMPerfMetrics : public PerfMetrics {
18 | /** @brief Mean and standard deviation of preparation of embeddings in milliseconds */
19 | MeanStdPair prepare_embeddings_duration;
20 |
21 | MeanStdPair get_prepare_embeddings_duration();
22 |
23 | VLMPerfMetrics() = default;
24 |
25 | VLMPerfMetrics(PerfMetrics& perf_metrics) : PerfMetrics(perf_metrics), prepare_embeddings_duration(){};
26 |
27 | void evaluate_statistics(std::optional<TimePoint> start_time = std::nullopt) override;
28 |
29 | VLMPerfMetrics operator+(const VLMPerfMetrics& metrics) const;
30 |
31 | VLMRawPerfMetrics vlm_raw_metrics;
32 | };
33 |
34 | }
--------------------------------------------------------------------------------
/src/cpp/src/continuous_batching/attention_output.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 | #include "openvino/openvino.hpp"
6 | using AttentionScoresForCacheOfSubsequence = ov::Tensor;
7 | using AttentionScoresForEachDecoderLayer = std::vector<AttentionScoresForCacheOfSubsequence>;
8 | using AttentionScoresForEachSubsequence = std::map<size_t, AttentionScoresForEachDecoderLayer>;
9 |
--------------------------------------------------------------------------------
/src/cpp/src/gguf_utils/gguf_modeling.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <cstring>
7 |
8 | #include "openvino/openvino.hpp"
9 |
10 | std::shared_ptr<ov::Model> create_from_gguf(const std::string& model_path, const bool enable_save_ov_model);
11 |
--------------------------------------------------------------------------------
/src/cpp/src/lm_encoding.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <optional>
4 | #include "openvino/genai/llm_pipeline.hpp"
5 | #include "visual_language/embedding_model.hpp"
6 | #include "sampling/sampler.hpp"
7 |
8 | namespace ov {
9 | namespace genai {
10 |
11 | ov::genai::utils::GenerationFinishInfo get_lm_encoded_results(ov::InferRequest& m_llm, const ov::Tensor& input_ids, const ov::Tensor& attention_mask,
12 | const std::shared_ptr<StreamerBase>& streamer_ptr, Sampler& sampler, std::vector<SequenceGroup::Ptr> sequence_groups,
13 | std::optional<ov::Tensor> position_ids, std::optional<ov::Tensor> token_type_ids, utils::KVCacheState& m_kv_cache_state, EmbeddingsModel::Ptr m_embedding,
14 | std::optional<int64_t> rope_delta = std::nullopt, const size_t max_kv_cache_size = std::numeric_limits<size_t>::max());
15 |
16 |
17 | void align_kv_cache_and_history(const ov::Tensor& new_chat_tokens, utils::KVCacheState& kv_cache_state);
18 |
19 |
20 | TokenizedInputs get_chat_encoded_input(const ov::Tensor& new_chat_tokens, utils::KVCacheState& kv_cache_state);
21 |
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/cpp/src/logger.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 | #include <iostream>
6 | #include <string>
7 |
8 | namespace ov::genai {
9 |
10 | class Logger {
11 | public:
12 | static void warn(const std::string& message) {
13 | std::cout << "[WARN] " << message << '\n';
14 | };
15 | };
16 |
17 | } // namespace ov::genai
18 |
--------------------------------------------------------------------------------
/src/cpp/src/lora/common.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <map>
7 | #include <memory>
8 | #include <string>
9 | #include <optional>
10 | #include <vector>
11 |
12 | #include "openvino/op/constant.hpp"
13 |
14 | namespace ov {
15 | namespace genai {
16 | namespace utils {
17 |
18 | template <typename T>
19 | struct LoRAParts {
20 | T alpha, A, B;
21 |
22 | LoRAParts() = default;
23 | LoRAParts(const T& alpha, const T& A, const T& B) : alpha(alpha), A(A), B(B) {}
24 |
25 | template <typename Other>
26 | LoRAParts(const LoRAParts<Other>& other) : alpha(other.alpha), A(other.A), B(other.B) {}
27 | };
28 |
29 |
30 | using LoRAWeight = LoRAParts<std::shared_ptr<ov::op::v0::Constant>>;
31 | using LoRATensors = std::map<std::string, LoRAWeight>;
32 |
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/cpp/src/lora/safetensors.c:
--------------------------------------------------------------------------------
1 | #define SAFETENSORS_IMPLEMENTATION
2 | #include "safetensors.h"
--------------------------------------------------------------------------------
/src/cpp/src/speculative_decoding/update_request_structs.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <vector>
7 | #include <map>
8 |
9 | namespace ov::genai {
10 | struct GeneratedSequence {
11 | std::vector<int64_t> token_ids;
12 | std::vector<float> log_probs;
13 |
14 | GeneratedSequence(const std::vector<int64_t>& generated_token_ids,
15 | const std::vector<float>& generated_log_probs) :
16 | token_ids(generated_token_ids),
17 | log_probs(generated_log_probs) {};
18 | };
19 |
20 | struct UpdateRequestResult {
21 | size_t inserted_tokens_cnt, removed_tokens_cnt;
22 |
23 | UpdateRequestResult(size_t to_insert = 0, size_t to_remove = 0) :
24 | inserted_tokens_cnt(to_insert),
25 | removed_tokens_cnt(to_remove) {};
26 | };
27 |
28 | // { sequence_id : generated_tokens_and_log_probs }
29 | using GeneratedSequences = std::map<uint64_t, GeneratedSequence>;
30 |
31 | // { request_id : generated_sequence }
32 | using GeneratedRequests = std::map<uint64_t, GeneratedSequences>;
33 | }
34 |
--------------------------------------------------------------------------------
/src/cpp/src/speech_generation/speech_generation_perf_metrics.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include "openvino/genai/speech_generation/speech_generation_perf_metrics.hpp"
5 |
6 | #include <cmath>
7 | #include <numeric>
8 |
9 | namespace ov {
10 | namespace genai {
11 |
12 | MeanStdPair calc_mean_and_std(const std::vector<MicroSeconds>& durations);
13 |
14 | void SpeechGenerationPerfMetrics::evaluate_statistics(std::optional<TimePoint> start_time) {
15 | if (m_evaluated) {
16 | return;
17 | }
18 |
19 | generate_duration = calc_mean_and_std(raw_metrics.generate_durations);
20 | tokenization_duration = calc_mean_and_std(raw_metrics.tokenization_durations);
21 |
22 | // tokens per second
23 |
24 | float throughput_mean = static_cast<float>(num_generated_samples) * 1000.0f / generate_duration.mean;
25 | float throughput_std = (generate_duration.std * 1000.0f * static_cast<float>(num_generated_samples)) /
26 | (generate_duration.mean * generate_duration.mean);
27 | throughput = {throughput_mean, throughput_std};
28 | m_evaluated = true;
29 | }
30 |
31 | } // namespace genai
32 | } // namespace ov
33 |
--------------------------------------------------------------------------------
/src/cpp/src/speech_generation/speecht5_tts_decoder.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <filesystem>
7 |
8 | #include "openvino/runtime/core.hpp"
9 |
10 | namespace ov::genai {
11 |
12 | class SpeechT5TTSDecoder {
13 | public:
14 | static std::shared_ptr<SpeechT5TTSDecoder> from_path(const std::filesystem::path& models_path,
15 | const std::string& device,
16 | const ov::AnyMap& properties);
17 |
18 | SpeechT5TTSDecoder(const std::filesystem::path& models_path,
19 | const std::string& device,
20 | const ov::AnyMap& properties);
21 |
22 | void start_async(const Tensor& inputs_embeds,
23 | const Tensor& speaker_embeddings,
24 | const Tensor& encoder_hidden_states,
25 | const Tensor& encoder_attention_mask,
26 | const Tensor& spectrogram);
27 |
28 | std::tuple<Tensor, Tensor, Tensor, Tensor> wait();
29 |
30 | void reset_state();
31 |
32 | ov::Tensor create_host_tensor(const element::Type element_type, const Shape& shape);
33 |
34 | private:
35 | ov::InferRequest m_request;
36 | Tensor m_beam_idx_tensor;
37 | };
38 | } // namespace ov::genai
39 |
--------------------------------------------------------------------------------
/src/cpp/src/speech_generation/text2speech_pipeline_impl.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include "text2speech_pipeline_impl.hpp"
5 |
6 | namespace ov {
7 | namespace genai {
8 |
9 | SpeechGenerationPerfMetrics Text2SpeechPipelineImpl::get_performance_metrics() {
10 | m_perf_metrics.load_time = m_load_time_ms;
11 | return m_perf_metrics;
12 | }
13 |
14 | void Text2SpeechPipelineImpl::save_load_time(std::chrono::steady_clock::time_point start_time) {
15 | auto stop_time = std::chrono::steady_clock::now();
16 | m_load_time_ms += std::chrono::duration_cast<std::chrono::milliseconds>(stop_time - start_time).count();
17 | }
18 | } // namespace genai
19 | } // namespace ov
20 |
--------------------------------------------------------------------------------
/src/cpp/src/synchronized_queue.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <queue>
7 | #include <mutex>
8 | #include <condition_variable>
9 |
10 | template <typename T>
11 | class SynchronizedQueue
12 | {
13 | std::queue<T> m_queue;
14 | std::mutex m_mutex;
15 | std::condition_variable m_cv;
16 |
17 | public:
18 | SynchronizedQueue() = default;
19 | SynchronizedQueue(const SynchronizedQueue&) = delete;
20 | SynchronizedQueue(const SynchronizedQueue&&) = delete;
21 | SynchronizedQueue& operator=(const SynchronizedQueue&) = delete;
22 |
23 | T back() {
24 | std::unique_lock<std::mutex> lock(m_mutex);
25 | m_cv.wait(lock, [this]{return !m_queue.empty(); });
26 | return m_queue.back();
27 | }
28 |
29 | T pull() {
30 | std::unique_lock<std::mutex> lock(m_mutex);
31 | m_cv.wait(lock, [this]{return !m_queue.empty();});
32 | auto val = m_queue.front();
33 | m_queue.pop();
34 | return val;
35 | }
36 |
37 | void push(const T& item) {
38 | std::unique_lock<std::mutex> lock(m_mutex);
39 | m_queue.push(item);
40 | m_cv.notify_one();
41 | }
42 |
43 | bool empty() {
44 | std::unique_lock<std::mutex> lock(m_mutex);
45 | return m_queue.empty();
46 | }
47 | };
48 |
--------------------------------------------------------------------------------
/src/cpp/src/visual_language/vl_sdpa_transformations.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include "visual_language/vl_sdpa_transformations.hpp"
5 |
6 | #include "utils.hpp"
7 |
8 | namespace ov {
9 | namespace genai {
10 | namespace utils {
11 |
12 | void request_vl_sdpa_transformations(std::shared_ptr<ov::Model> model) {
13 | model->set_rt_info("QWenVL", "model_type_hint");
14 | }
15 |
16 | bool check_vl_sdpa_transformations(const ov::CompiledModel& compiled_model) {
17 | const std::vector<std::string> target_names {"cu_seq_lens", "cu_window_seqlens"};
18 |
19 | bool exists = false;
20 | for (auto &input : compiled_model.inputs()) {
21 | const auto& names = input.get_names();
22 |
23 | for (const auto& target : target_names) {
24 | exists |= (names.find(target) != names.end());
25 | }
26 | }
27 |
28 | return exists;
29 | }
30 |
31 | } // namespace utils
32 | } // namespace genai
33 | } // namespace ov
34 |
--------------------------------------------------------------------------------
/src/cpp/src/visual_language/vl_sdpa_transformations.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <vector>
7 |
8 | #include "openvino/core/any.hpp"
9 | #include "openvino/core/model.hpp"
10 | #include "openvino/runtime/compiled_model.hpp"
11 |
12 | namespace ov {
13 | namespace genai {
14 |
15 | namespace utils {
16 |
17 | /** Requests transforming SDPA ov::Model to VLSDPA. It's up to a plugin to apply the transformation.
18 | * @param model Pointer to the ov::Model representing one of the supported VLM architectures.
19 | */
20 | void request_vl_sdpa_transformations(std::shared_ptr<ov::Model> model);
21 |
22 | bool check_vl_sdpa_transformations(const ov::CompiledModel& compiled_model);
23 |
24 | } // namespace utils
25 | } // namespace genai
26 | } // namespace ov
27 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/config.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #include "whisper/config.hpp"
5 |
6 | #include <fstream>
7 | #include <nlohmann/json.hpp>
8 |
9 | #include "openvino/core/except.hpp"
10 |
11 | #include "json_utils.hpp"
12 |
13 | namespace ov {
14 | namespace genai {
15 |
16 | WhisperConfig::WhisperConfig(const std::filesystem::path& json_path) {
17 | // preprocessor_config.json not found. Skip parameters initialization from file, use defaults.
18 | if (!std::filesystem::exists(json_path)) {
19 | return;
20 | }
21 |
22 | using ov::genai::utils::read_json_param;
23 |
24 | std::ifstream f(json_path);
25 | OPENVINO_ASSERT(f.is_open(), "Failed to open '", json_path, "' with config");
26 |
27 | nlohmann::json data = nlohmann::json::parse(f);
28 |
29 | read_json_param(data, "max_source_positions", max_source_positions);
30 | }
31 |
32 | } // namespace genai
33 | } // namespace ov
34 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/config.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <filesystem>
7 |
8 | namespace ov {
9 | namespace genai {
10 |
11 | /**
12 | * @brief Structure to keep whisper config parameters.
13 | */
14 | class WhisperConfig {
15 | public:
16 | explicit WhisperConfig(const std::filesystem::path& json_path);
17 |
18 | size_t max_source_positions = 1500;
19 | };
20 |
21 | } // namespace genai
22 | } // namespace ov
23 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/context_tokens.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "openvino/genai/perf_metrics.hpp"
7 | #include "openvino/genai/whisper_generation_config.hpp"
8 |
9 | namespace ov {
10 | namespace genai {
11 |
12 | struct WhisperContextTokens {
13 | std::vector<int64_t> initial_prompt;
14 | std::vector<int64_t> hotwords;
15 | };
16 |
17 | std::pair<WhisperContextTokens, float> prepare_context_tokens(const WhisperGenerationConfig& config,
18 | Tokenizer& tokenizer);
19 |
20 | std::vector<int64_t> get_prompt_tokens(const WhisperContextTokens& context_tokens,
21 | const WhisperGenerationConfig& config,
22 | size_t chunk_offset);
23 |
24 | } // namespace genai
25 | } // namespace ov
26 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/logit_processor.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <openvino/openvino.hpp>
7 |
8 | #include "openvino/genai/whisper_generation_config.hpp"
9 |
10 | namespace ov {
11 | namespace genai {
12 |
13 | void do_suppress_tokens(ov::Tensor& logits, const size_t batch_idx, const std::vector<int64_t>& suppress_tokens);
14 |
15 | void process_whisper_timestamp_logits(ov::Tensor& logits,
16 | const size_t batch_idx,
17 | const ov::genai::WhisperGenerationConfig& config,
18 | const std::vector<int64_t>& generated_tokens,
19 | bool initial_step = false);
20 |
21 | } // namespace genai
22 | } // namespace ov
23 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/models.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <openvino/runtime/core.hpp>
7 |
8 | namespace ov {
9 | namespace genai {
10 |
11 | struct WhisperInitializedModels {
12 | ov::InferRequest encoder;
13 | ov::InferRequest decoder;
14 | ov::InferRequest decoder_with_past;
15 | };
16 | } // namespace genai
17 | } // namespace ov
18 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/models/statefull_decoder.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2024-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "decoder.hpp"
7 | #include "openvino/runtime/core.hpp"
8 |
9 | namespace ov::genai {
10 |
11 | class WhisperStatefullDecoder : public WhisperDecoder {
12 | public:
13 | WhisperStatefullDecoder(const std::filesystem::path& models_path,
14 | const std::string& device,
15 | const ov::AnyMap& properties,
16 | const ov::PartialShape& lhs_shape);
17 |
18 | void start_async(const Tensor& encoder_hidden_state, const Tensor& input_ids, const Tensor& beam_idx) override;
19 |
20 | Tensor wait() override;
21 |
22 | void reset_state() override;
23 |
24 | ov::Tensor create_host_tensor(const element::Type element_type, const Shape& shape) override;
25 |
26 | private:
27 | void _set_cache_position_tensor(const size_t seq_len);
28 |
29 | private:
30 | ov::InferRequest m_request;
31 | bool m_has_cache_position = true;
32 | };
33 | } // namespace ov::genai
34 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/models/with_past_decoder.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "decoder.hpp"
7 | #include "openvino/runtime/core.hpp"
8 |
9 | namespace ov::genai {
10 |
11 | class WhisperWithPastDecoder : public WhisperDecoder {
12 | public:
13 | WhisperWithPastDecoder(const std::filesystem::path& models_path,
14 | const std::string& device,
15 | const ov::AnyMap& properties);
16 |
17 | void start_async(const Tensor& encoder_hidden_state, const Tensor& input_ids, const Tensor& beam_idx) override;
18 |
19 | Tensor wait() override;
20 |
21 | void reset_state() override;
22 |
23 | private:
24 | ov::InferRequest m_request_decoder;
25 | ov::InferRequest m_request_decoder_with_past;
26 | size_t m_cache_position = 0;
27 | bool m_initial_past_key_value_set = false;
28 | bool m_past_key_value_linked = false;
29 | bool m_past_decoder_has_cache_position = true;
30 |
31 | void _set_past_key_value(const Tensor& beam_idx);
32 | };
33 |
34 | } // namespace ov::genai
35 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/pipeline_base.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "openvino/genai/whisper_pipeline.hpp"
7 | #include "utils.hpp"
8 | #include "whisper/config.hpp"
9 | #include "whisper/feature_extractor.hpp"
10 |
11 | namespace ov {
12 | namespace genai {
13 |
14 | class WhisperPipeline::WhisperPipelineImplBase {
15 | public:
16 | WhisperGenerationConfig m_generation_config;
17 | Tokenizer m_tokenizer;
18 | WhisperFeatureExtractor m_feature_extractor;
19 | WhisperConfig m_model_config;
20 |
21 | float m_load_time_ms = 0;
22 |
23 | WhisperPipelineImplBase(const std::filesystem::path& models_path)
24 | : m_generation_config(utils::from_config_json_if_exists<WhisperGenerationConfig>(models_path)),
25 | m_tokenizer{models_path},
26 | m_feature_extractor{models_path / "preprocessor_config.json"},
27 | m_model_config{models_path / "config.json"} {}
28 |
29 | virtual WhisperDecodedResults generate(const RawSpeechInput& raw_speech_input,
30 | OptionalWhisperGenerationConfig generation_config,
31 | const std::shared_ptr<StreamerBase> streamer) = 0;
32 |
33 | virtual ~WhisperPipelineImplBase() = default;
34 | };
35 |
36 | } // namespace genai
37 | } // namespace ov
38 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/pipeline_static.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <filesystem>
7 | #include <string>
8 |
9 | #include "openvino/genai/streamer_base.hpp"
10 | #include "openvino/genai/tokenizer.hpp"
11 | #include "openvino/genai/whisper_pipeline.hpp"
12 | #include "whisper/models.hpp"
13 | #include "whisper/pipeline_base.hpp"
14 | #include "sampling/sampler.hpp"
15 |
16 | namespace ov {
17 | namespace genai {
18 |
19 | class WhisperPipeline::StaticWhisperPipeline : public WhisperPipeline::WhisperPipelineImplBase {
20 | public:
21 | StaticWhisperPipeline(const std::filesystem::path& model_path, const ov::AnyMap& properties);
22 |
23 | WhisperDecodedResults generate(const RawSpeechInput& raw_speech_input,
24 | OptionalWhisperGenerationConfig generation_config,
25 | const std::shared_ptr<StreamerBase> streamer) override;
26 |
27 | private:
28 | WhisperInitializedModels m_models;
29 | Sampler m_sampler;
30 | };
31 |
32 | } // namespace genai
33 | } // namespace ov
34 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/timestamps.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <openvino/openvino.hpp>
7 |
8 | #include "whisper.hpp"
9 |
10 | namespace ov {
11 | namespace genai {
12 |
13 | struct ExtractedSegments {
14 | std::vector<ov::genai::Segment> segments;
15 | size_t last_offset = 0;
16 | std::vector<int64_t> non_timestamp_tokens;
17 | std::vector<std::pair<size_t, size_t>> segment_ranges;
18 | };
19 |
20 | ExtractedSegments extract_segments(const std::vector<int64_t>& tokens,
21 | const ov::genai::WhisperGenerationConfig& config,
22 | const size_t nb_max_frames,
23 | const float time_precision,
24 | const float time_offset = 0.f);
25 |
26 | } // namespace genai
27 | } // namespace ov
28 |
--------------------------------------------------------------------------------
/src/cpp/src/whisper/whisper_utils.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <openvino/openvino.hpp>
7 |
8 | #include "openvino/genai/perf_metrics.hpp"
9 |
10 | namespace ov {
11 | namespace genai {
12 | namespace utils {
13 |
14 | void infer_with_perf_metrics(ov::InferRequest& request, ov::genai::RawPerfMetrics& raw_metrics);
15 |
16 | void filter_non_segment_metrics(ov::genai::RawPerfMetrics& raw_metrics,
17 | size_t offset,
18 | std::vector<std::pair<size_t, size_t>>& ranges);
19 |
20 | int64_t argmax(const ov::Tensor& logits, const size_t batch_idx);
21 |
22 | } // namespace utils
23 | } // namespace genai
24 | } // namespace ov
25 |
--------------------------------------------------------------------------------
/src/docs/beam_idx-drop.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:701839c28ac1e05c1c9e23823c74a10149a343210192e51df36e563ff6e257e4
3 | size 5700875
4 |
--------------------------------------------------------------------------------
/src/docs/beam_idx-fork.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:292753b30a2153c92cedf16672ba182a851ec30c95c309cdaca13173f00fe700
3 | size 6062552
4 |
--------------------------------------------------------------------------------
/src/docs/openvino_genai.svg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:703732cd6a85f2cbcfd0915d63c10483114f05b71b834d2228501700074d0053
3 | size 1053573
4 |
--------------------------------------------------------------------------------
/src/docs/stateful.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a6fb5ab9990c845eef8847bdf76799fcaefe0a9afa10fb9d07f6df4394a9e2ad
3 | size 129471
4 |
--------------------------------------------------------------------------------
/src/docs/stateless.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:20904ff7a8793359b978cfcdc85c482e0764291af17b572936955f586e202ea9
3 | size 113440
4 |
--------------------------------------------------------------------------------
/src/js/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | bin
3 | bin.*
4 | build
5 | dist
6 | node_modules
7 | tests/models
8 | types
9 |
--------------------------------------------------------------------------------
/src/js/.npmignore:
--------------------------------------------------------------------------------
1 | bin
2 | include
3 | lib
4 | src
5 | tests
6 | thirdparty
7 |
8 | CMakeLists.txt
9 | eslint.config.cjs
10 | .prettierrc
11 | tsconfig.json
12 | *.md
13 | !README.md
14 |
15 | **/*.tsbuildinfo
16 | *.tgz
17 |
--------------------------------------------------------------------------------
/src/js/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://json.schemastore.org/prettierrc",
3 | "semi": true,
4 | "printWidth": 100,
5 | "endOfLine": "lf",
6 | "tabWidth": 2,
7 | "singleQuote": false,
8 | "trailingComma": "all",
9 | "bracketSpacing": true,
10 | "proseWrap": "always"
11 | }
12 |
--------------------------------------------------------------------------------
/src/js/include/addon.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2018-2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include <napi.h>
7 |
8 | typedef Napi::Function (*Prototype)(Napi::Env);
9 |
10 | struct AddonData {
11 | Napi::FunctionReference core;
12 | Napi::FunctionReference tokenizer;
13 | Napi::FunctionReference perf_metrics;
14 | Napi::FunctionReference chat_history;
15 | };
16 |
17 | void init_class(Napi::Env env,
18 | Napi::Object exports,
19 | std::string class_name,
20 | Prototype func,
21 | Napi::FunctionReference& reference);
22 |
23 | Napi::Object init_module(Napi::Env env, Napi::Object exports);
24 |
--------------------------------------------------------------------------------
/src/js/include/chat_history.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/chat_history.hpp"
5 |
6 | class ChatHistoryWrap : public Napi::ObjectWrap<ChatHistoryWrap> {
7 | public:
8 | static Napi::Function get_class(Napi::Env env);
9 |
10 | ChatHistoryWrap(const Napi::CallbackInfo& info);
11 |
12 | ov::genai::ChatHistory& get_value();
13 |
14 | private:
15 | Napi::Value push_back(const Napi::CallbackInfo& info);
16 | void pop_back(const Napi::CallbackInfo& info);
17 | Napi::Value get_messages(const Napi::CallbackInfo& info);
18 | Napi::Value set_messages(const Napi::CallbackInfo& info);
19 | void clear(const Napi::CallbackInfo& info);
20 | Napi::Value size(const Napi::CallbackInfo& info);
21 | Napi::Value empty(const Napi::CallbackInfo& info);
22 | Napi::Value set_tools(const Napi::CallbackInfo& info);
23 | Napi::Value get_tools(const Napi::CallbackInfo& info);
24 | Napi::Value set_extra_context(const Napi::CallbackInfo& info);
25 | Napi::Value get_extra_context(const Napi::CallbackInfo& info);
26 |
27 | ov::genai::ChatHistory m_chat_history;
28 | };
29 |
--------------------------------------------------------------------------------
/src/js/include/llm_pipeline/finish_chat_worker.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/llm_pipeline.hpp"
5 |
6 | using namespace Napi;
7 |
8 | class FinishChatWorker : public AsyncWorker {
9 | public:
10 | FinishChatWorker(Function& callback, std::shared_ptr<ov::genai::LLMPipeline>& pipe);
11 | virtual ~FinishChatWorker(){}
12 |
13 | void Execute() override;
14 | void OnOK() override;
15 |
16 | private:
17 | std::shared_ptr<ov::genai::LLMPipeline>& pipe;
18 | };
19 |
--------------------------------------------------------------------------------
/src/js/include/llm_pipeline/init_worker.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/llm_pipeline.hpp"
5 |
6 | using namespace Napi;
7 |
8 | class InitWorker : public AsyncWorker {
9 | public:
10 | InitWorker(Function& callback,
11 | std::shared_ptr<ov::genai::LLMPipeline>& pipe,
12 | const std::string model_path,
13 | std::string device,
14 | ov::AnyMap properties);
15 | virtual ~InitWorker() {}
16 |
17 | void Execute() override;
18 | void OnOK() override;
19 |
20 | private:
21 | std::shared_ptr<ov::genai::LLMPipeline>& pipe;
22 | std::string model_path;
23 | std::string device;
24 | ov::AnyMap properties;
25 | };
26 |
--------------------------------------------------------------------------------
/src/js/include/llm_pipeline/llm_pipeline_wrapper.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <thread>
4 | #include <napi.h>
5 | #include "openvino/genai/llm_pipeline.hpp"
6 |
7 | class LLMPipelineWrapper : public Napi::ObjectWrap<LLMPipelineWrapper> {
8 | public:
9 | LLMPipelineWrapper(const Napi::CallbackInfo& info);
10 |
11 | static Napi::Function get_class(Napi::Env env);
12 |
13 | Napi::Value init(const Napi::CallbackInfo& info);
14 | Napi::Value generate(const Napi::CallbackInfo& info);
15 | Napi::Value start_chat(const Napi::CallbackInfo& info);
16 | Napi::Value finish_chat(const Napi::CallbackInfo& info);
17 | Napi::Value get_tokenizer(const Napi::CallbackInfo& info);
18 | private:
19 | bool is_loaded = false;
20 | bool is_initialized = false;
21 | bool is_running = false;
22 |
23 | std::string model_path;
24 | std::string device;
25 |
26 | std::shared_ptr<ov::genai::LLMPipeline> pipe = nullptr;
27 | std::function<bool(std::string)> streamer;
28 | };
29 |
--------------------------------------------------------------------------------
/src/js/include/llm_pipeline/start_chat_worker.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/llm_pipeline.hpp"
5 |
6 | using namespace Napi;
7 |
8 | class StartChatWorker : public AsyncWorker {
9 | public:
10 | StartChatWorker(Function& callback, std::shared_ptr<ov::genai::LLMPipeline>& pipe, std::string system_message);
11 | virtual ~StartChatWorker(){}
12 |
13 | void Execute() override;
14 | void OnOK() override;
15 |
16 | private:
17 | std::shared_ptr<ov::genai::LLMPipeline>& pipe;
18 | std::string system_message;
19 | };
20 |
--------------------------------------------------------------------------------
/src/js/include/text_embedding_pipeline/embed_documents_worker.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
5 |
6 | using namespace Napi;
7 |
8 | class EmbedDocumentsWorker : public AsyncWorker {
9 | public:
10 | EmbedDocumentsWorker(
11 | Function& callback,
12 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
13 | Array documents
14 | );
15 | virtual ~EmbedDocumentsWorker(){}
16 |
17 | void Execute() override;
18 | void OnOK() override;
19 | private:
20 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe;
21 | std::vector<std::string> documents;
22 | ov::genai::EmbeddingResults embed_results;
23 | };
24 |
--------------------------------------------------------------------------------
/src/js/include/text_embedding_pipeline/embed_query_worker.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
5 |
6 | using namespace Napi;
7 |
8 | class EmbedQueryWorker : public AsyncWorker {
9 | public:
10 | EmbedQueryWorker(
11 | Function& callback,
12 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
13 | String text);
14 | virtual ~EmbedQueryWorker(){}
15 |
16 | void Execute() override;
17 | void OnOK() override;
18 | private:
19 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe;
20 | std::string text;
21 | ov::genai::EmbeddingResult embed_result;
22 | };
23 |
--------------------------------------------------------------------------------
/src/js/include/text_embedding_pipeline/init_worker.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
5 |
6 | using namespace Napi;
7 |
8 | class EmbeddingInitWorker : public AsyncWorker {
9 | public:
10 | EmbeddingInitWorker(
11 | Function& callback,
12 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
13 | const std::string model_path,
14 | std::string device,
15 | Object config,
16 | Object properties
17 | );
18 | virtual ~EmbeddingInitWorker(){}
19 | void Execute() override;
20 | void OnOK() override;
21 | private:
22 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe;
23 | std::string model_path;
24 | std::string device;
25 | ov::AnyMap config;
26 | ov::AnyMap properties;
27 | };
28 |
--------------------------------------------------------------------------------
/src/js/include/text_embedding_pipeline/pipeline_wrapper.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/rag/text_embedding_pipeline.hpp"
5 |
6 | class TextEmbeddingPipelineWrapper : public Napi::ObjectWrap<TextEmbeddingPipelineWrapper> {
7 | public:
8 | TextEmbeddingPipelineWrapper(const Napi::CallbackInfo& info);
9 | static Napi::Function get_class(Napi::Env env);
10 | Napi::Value init(const Napi::CallbackInfo& info);
11 | Napi::Value embed_documents(const Napi::CallbackInfo& info);
12 | Napi::Value embed_documents_async(const Napi::CallbackInfo& info);
13 | Napi::Value embed_query(const Napi::CallbackInfo& info);
14 | Napi::Value embed_query_async(const Napi::CallbackInfo& info);
15 | private:
16 | std::shared_ptr<ov::genai::TextEmbeddingPipeline> pipe = nullptr;
17 | };
18 |
--------------------------------------------------------------------------------
/src/js/include/tokenizer.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include <napi.h>
4 | #include "openvino/genai/tokenizer.hpp"
5 |
6 | class TokenizerWrapper : public Napi::ObjectWrap<TokenizerWrapper> {
7 | public:
8 | TokenizerWrapper(const Napi::CallbackInfo& info);
9 | static Napi::Function get_class(Napi::Env env);
10 | static Napi::Object wrap(Napi::Env env, ov::genai::Tokenizer tokenizer);
11 | Napi::Value apply_chat_template(const Napi::CallbackInfo& info);
12 | Napi::Value get_bos_token(const Napi::CallbackInfo& info);
13 | Napi::Value get_bos_token_id(const Napi::CallbackInfo& info);
14 | Napi::Value get_eos_token(const Napi::CallbackInfo& info);
15 | Napi::Value get_eos_token_id(const Napi::CallbackInfo& info);
16 | Napi::Value get_pad_token(const Napi::CallbackInfo& info);
17 | Napi::Value get_pad_token_id(const Napi::CallbackInfo& info);
18 | private:
19 | ov::genai::Tokenizer _tokenizer;
20 | };
21 |
--------------------------------------------------------------------------------
/src/js/scripts/download-runtime.cjs:
--------------------------------------------------------------------------------
1 | const { join } = require("node:path");
2 | const BinaryManager = require("openvino-node/scripts/lib/binary-manager");
3 |
4 | const packageJson = require("../package.json");
5 |
6 | if (require.main === module) main();
7 |
8 | async function main() {
9 | if (!BinaryManager.isCompatible()) process.exit(1);
10 |
11 | const force = process.argv.includes("-f");
12 | const ignoreIfExists = process.argv.includes("--ignore-if-exists");
13 |
14 | const { env } = process;
15 | const proxy = env.http_proxy || env.HTTP_PROXY || env.npm_config_proxy;
16 |
17 | await BinaryManager.prepareBinary(
18 | join(__dirname, ".."),
19 | packageJson.binary.version || packageJson.version,
20 | packageJson.binary,
21 | { force, ignoreIfExists, proxy },
22 | );
23 | }
24 |
--------------------------------------------------------------------------------
/src/js/src/llm_pipeline/finish_chat_worker.cpp:
--------------------------------------------------------------------------------
1 | #include "include/llm_pipeline/finish_chat_worker.hpp"
2 | #include <chrono>
3 | #include <thread>
4 |
5 | FinishChatWorker::FinishChatWorker(Function& callback, std::shared_ptr<ov::genai::LLMPipeline>& pipe)
6 | : AsyncWorker(callback), pipe(pipe) {};
7 |
8 | void FinishChatWorker::Execute() {
9 | this->pipe->finish_chat();
10 | };
11 |
12 | void FinishChatWorker::OnOK() {
13 | Callback().Call({ Env().Null() });
14 | };
15 |
--------------------------------------------------------------------------------
/src/js/src/llm_pipeline/init_worker.cpp:
--------------------------------------------------------------------------------
1 | #include "include/llm_pipeline/init_worker.hpp"
2 | #include <chrono>
3 | #include <thread>
4 |
5 | InitWorker::InitWorker(
6 | Function& callback,
7 | std::shared_ptr<ov::genai::LLMPipeline>& pipe,
8 | const std::string model_path,
9 | const std::string device,
10 | const ov::AnyMap properties
11 | ) : AsyncWorker(callback), pipe(pipe), model_path(model_path), device(device), properties(properties) {};
12 |
13 | void InitWorker::Execute() {
14 | this->pipe = std::make_shared<ov::genai::LLMPipeline>(this->model_path, this->device, this->properties);
15 | };
16 |
17 | void InitWorker::OnOK() {
18 | Callback().Call({ Env().Null() });
19 | };
20 |
--------------------------------------------------------------------------------
/src/js/src/llm_pipeline/start_chat_worker.cpp:
--------------------------------------------------------------------------------
1 | #include "include/llm_pipeline/start_chat_worker.hpp"
2 | #include <chrono>
3 | #include <thread>
4 |
5 | StartChatWorker::StartChatWorker(Function& callback, std::shared_ptr<ov::genai::LLMPipeline>& pipe, std::string system_message)
6 | : AsyncWorker(callback), pipe(pipe), system_message(system_message) {};
7 |
8 | void StartChatWorker::Execute() {
9 | this->pipe->start_chat(this->system_message);
10 | };
11 |
12 | void StartChatWorker::OnOK() {
13 | Callback().Call({ Env().Null() });
14 | };
15 |
--------------------------------------------------------------------------------
/src/js/src/text_embedding_pipeline/embed_documents_worker.cpp:
--------------------------------------------------------------------------------
1 | #include "include/helper.hpp"
2 | #include "include/text_embedding_pipeline/embed_documents_worker.hpp"
3 |
4 | EmbedDocumentsWorker::EmbedDocumentsWorker(
5 | Function& callback,
6 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
7 | Array documents
8 | ) : AsyncWorker(callback), pipe(pipe), documents(js_to_cpp<std::vector<std::string>>(Env(), documents)) {};
9 |
10 | void EmbedDocumentsWorker::Execute() {
11 | try {
12 | this->embed_results = this->pipe->embed_documents(this->documents);
13 | } catch(const std::exception& ex) {
14 | SetError(ex.what());
15 | }
16 | };
17 |
18 | void EmbedDocumentsWorker::OnOK() {
19 | Callback().Call({
20 | Env().Null(), // Error result
21 | cpp_to_js<ov::genai::EmbeddingResults, Napi::Value>(Env(), this->embed_results) // Ok result
22 | });
23 | };
24 |
--------------------------------------------------------------------------------
/src/js/src/text_embedding_pipeline/embed_query_worker.cpp:
--------------------------------------------------------------------------------
1 | #include "include/helper.hpp"
2 | #include "include/text_embedding_pipeline/embed_query_worker.hpp"
3 |
4 | EmbedQueryWorker::EmbedQueryWorker(
5 | Function& callback,
6 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
7 | String text
8 | ) : AsyncWorker(callback), pipe(pipe), text(text.ToString()) {};
9 |
10 | void EmbedQueryWorker::Execute() {
11 | try {
12 | this->embed_result = this->pipe->embed_query(this->text);
13 | } catch(const std::exception& ex) {
14 | SetError(ex.what());
15 | }
16 | };
17 |
18 | void EmbedQueryWorker::OnOK() {
19 | Callback().Call({
20 | Env().Null(), // Error result
21 | cpp_to_js<ov::genai::EmbeddingResult, Napi::Value>(Env(), this->embed_result) // Ok result
22 | });
23 | };
--------------------------------------------------------------------------------
/src/js/src/text_embedding_pipeline/init_worker.cpp:
--------------------------------------------------------------------------------
1 | #include "include/text_embedding_pipeline/init_worker.hpp"
2 | #include "include/helper.hpp"
3 | #include <chrono>
4 | #include <thread>
5 |
6 | EmbeddingInitWorker::EmbeddingInitWorker(
7 | Function& callback,
8 | std::shared_ptr<ov::genai::TextEmbeddingPipeline>& pipe,
9 | const std::string model_path,
10 | const std::string device,
11 | Object config,
12 | Object properties
13 | ) : AsyncWorker(callback),
14 | pipe(pipe),
15 | model_path(model_path),
16 | device(device),
17 | config(js_to_cpp<ov::AnyMap>(Env(), config)),
18 | properties(js_to_cpp<ov::AnyMap>(Env(), properties)) {};
19 |
20 | void EmbeddingInitWorker::Execute() {
21 | try {
22 | ov::genai::TextEmbeddingPipeline::Config config(this->config);
23 | this->pipe = std::make_shared<ov::genai::TextEmbeddingPipeline>(this->model_path, this->device, config, this->properties);
24 | } catch(const std::exception& ex) {
25 | SetError(ex.what());
26 | }
27 | };
28 |
29 | void EmbeddingInitWorker::OnOK() {
30 | Callback().Call({
31 | Env().Null() // Error result
32 | });
33 | };
34 |
--------------------------------------------------------------------------------
/src/js/tests/models.js:
--------------------------------------------------------------------------------
1 | export const models = {
2 | LLM: "OpenVINO/Llama-3.1-8B-Instruct-FastDraft-150M-int8-ov",
3 | InstructLLM: "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
4 | Embedding: "OpenVINO/bge-base-en-v1.5-fp16-ov",
5 | };
6 |
--------------------------------------------------------------------------------
/src/js/tests/setup.js:
--------------------------------------------------------------------------------
1 | import { downloadModel } from "./utils.js";
2 | import { models } from "./models.js";
3 |
4 | for (const model of Object.values(models)) {
5 | await downloadModel(model);
6 | }
7 |
--------------------------------------------------------------------------------
/src/js/tests/utils.js:
--------------------------------------------------------------------------------
1 | import { bootstrap } from "global-agent";
2 | import { promises as fs } from "node:fs";
3 | import { listFiles, downloadFile } from "@huggingface/hub";
4 |
5 | const BASE_DIR = "./tests/models/";
6 |
7 | bootstrap();
8 |
9 | export async function downloadModel(repo) {
10 | console.log(`Downloading model '${repo}'`);
11 |
12 | const fetch = await import("node-fetch");
13 | const modelName = repo.split("/")[1];
14 | const destDir = `${BASE_DIR}${modelName}`;
15 |
16 | await fs.mkdir(destDir, { recursive: true });
17 |
18 | const fileList = await listFiles({
19 | repo,
20 | fetch: fetch.default,
21 | });
22 | const fileNames = [];
23 | for await (const file of fileList) {
24 | fileNames.push(file.path);
25 | }
26 |
27 | for (const path of fileNames) {
28 | console.log(`Downloading file '${path}'`);
29 | const response = await downloadFile({
30 | repo,
31 | path,
32 | fetch: fetch.default,
33 | });
34 | const filename = `${destDir}/${path}`;
35 |
36 | await saveFile(filename, response);
37 | console.log(`File '${path}' downloaded`);
38 | }
39 |
40 | console.log(`Model '${repo}' downloaded`);
41 | }
42 |
43 | async function saveFile(file, response) {
44 | const arrayBuffer = await response.arrayBuffer();
45 |
46 | await fs.writeFile(file, Buffer.from(arrayBuffer));
47 | }
48 |
--------------------------------------------------------------------------------
/src/python/clean_version.cmake:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2024 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 | #
4 |
5 | foreach(var IN ITEMS init_pyi_file)
6 | if(NOT DEFINED ${var})
7 | message(FATAL_ERROR "Variable ${var} is not defined")
8 | endif()
9 | endforeach()
10 |
11 | file(STRINGS ${init_pyi_file} file_lines)
12 |
13 | foreach(file_line IN LISTS file_lines)
14 | if(file_line MATCHES "^__version__.*")
15 | set(file_line "__version__: str")
16 | endif()
17 |
18 | set(file_content "${file_content}${file_line}\n")
19 | endforeach()
20 |
21 | file(WRITE ${init_pyi_file} ${file_content})
22 |
--------------------------------------------------------------------------------
/src/python/remove_abi_specific_info.cmake:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | foreach(var IN ITEMS init_pyi_file)
5 | if(NOT DEFINED ${var})
6 | message(FATAL_ERROR "Variable ${var} is not defined")
7 | endif()
8 | endforeach()
9 |
10 | file(STRINGS ${init_pyi_file} file_lines)
11 |
12 | foreach(file_line IN LISTS file_lines)
13 | if(file_line MATCHES "^from openvino_genai\\.py_openvino_genai\\..* import draft_modelquot;)
14 | set(file_line "from openvino_genai.py_openvino_genai import draft_model")
15 | endif()
16 | if(file_line MATCHES "^from openvino_genai\\.py_openvino_genai\\..* import get_versionquot;)
17 | set(file_line "from openvino_genai.py_openvino_genai import get_version")
18 | endif()
19 |
20 | set(file_content "${file_content}${file_line}\n")
21 | endforeach()
22 |
23 | file(WRITE ${init_pyi_file} ${file_content})
24 |
--------------------------------------------------------------------------------
/tests/cpp/helper.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2023-2024 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 |
4 | #pragma once
5 |
6 | #include "openvino/runtime/core.hpp"
7 |
8 | std::shared_ptr<ov::Model> get_dummy_model(ov::Core core, size_t num_layers);
--------------------------------------------------------------------------------
/tests/cpp/utils.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (C) 2018-2025 Intel Corporation
2 | // SPDX-License-Identifier: Apache-2.0
3 | //
4 |
5 | #include <gtest/gtest.h>
6 | #include "utils.hpp"
7 |
8 |
9 | using namespace ov::genai::utils;
10 | using map_type = std::map<std::string, int64_t>;
11 |
12 | TEST(TestIsContainer, test_is_container) {
13 | EXPECT_EQ(is_container<int>, false);
14 | EXPECT_EQ(is_container<int64_t>, false);
15 | EXPECT_EQ(is_container<float>, false);
16 | EXPECT_EQ(is_container<size_t>, false);
17 | EXPECT_EQ(is_container<std::string>, true);
18 | EXPECT_EQ(is_container<std::vector<float>>, true);
19 | EXPECT_EQ(is_container<map_type>, true);
20 | EXPECT_EQ(is_container<std::set<int64_t>>, true);
21 | }
--------------------------------------------------------------------------------
/tests/python_tests/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
--------------------------------------------------------------------------------
/tests/python_tests/data/short_prompts.txt:
--------------------------------------------------------------------------------
1 | The Earth revolves around the Sun.
2 | Water is essential for all known forms of life.
3 | The human body is composed of about 60% water.
4 | Photosynthesis allows plants to convert sunlight into energy.
5 | The speed of light is approximately 299,792 kilometers per second.
6 | Ice is less dense than liquid water.
7 | The brain contains around 86 billion neurons.
8 | Honey never spoils due to its low moisture content.
9 | A group of lions is called a pride.
10 | The Great Wall of China is visible from space.
11 | Humans share 99.9% of their DNA with chimpanzees.
12 | The average adult has 206 bones in their body.
13 | Bananas are berries, while strawberries are not.
14 | The Pacific Ocean is the largest ocean on Earth.
15 | Sound travels faster in water than in air.
16 | The Eiffel Tower can be 15 cm taller during the summer.
17 |
--------------------------------------------------------------------------------
/tests/python_tests/data/test_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2024 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | from openvino_genai import GenerationConfig
5 | from utils.generation_config import get_greedy, get_beam_search, get_multinomial_temperature
6 |
7 | def get_test_dataset() -> tuple[list[str], list[GenerationConfig]]:
8 | prompts = [
9 | "What is OpenVINO?",
10 | "How are you?",
11 | "What is your name?",
12 | "Tell me something about Canada"
13 | ]
14 | generation_configs = [
15 | get_greedy(),
16 | get_beam_search(),
17 | get_greedy(),
18 | get_beam_search(),
19 | ]
20 | return (prompts, generation_configs)
21 |
--------------------------------------------------------------------------------
/tests/python_tests/models/precommit:
--------------------------------------------------------------------------------
1 | hf-tiny-model-private/tiny-random-CodeGenForCausalLM
2 | hf-tiny-model-private/tiny-random-GPT2LMHeadModel
3 | hf-tiny-model-private/tiny-random-OPTForCausalLM
--------------------------------------------------------------------------------
/tests/python_tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 |
3 | markers =
4 | ; The following markers are defined for categorizing tests:
5 | ; precommit - Tests that should be run before committing code.
6 | ; real_models - Tests that involve execution of the models from models/real_models file
7 | ; samples - Tests related to the sample models.
8 | ; llm - Tests related to large language models.
9 | ; whisper - Tests related to the Whisper model.
10 | ; dreamlike_anime_1_0 - Image generation tests subset with dreamlike-anime-1.0.
11 | ; LCM_Dreamshaper_v7_int8_ov - Image generation tests subset with LCM_Dreamshaper_v7-int8-ov.
12 | ; vlm - Tests related to the VLM model.
13 | ; rag - Tests related to the RAG components.
14 | ; speech_generation - Tests related to text-to-speech generation
15 | precommit
16 | real_models
17 | samples
18 | llm
19 | whisper
20 | dreamlike_anime_1_0
21 | LCM_Dreamshaper_v7_int8_ov
22 | vlm
23 | agent
24 | rag
25 | speech_generation
26 |
27 | addopts = -m precommit
28 |
--------------------------------------------------------------------------------
/tests/python_tests/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://download.pytorch.org/whl/cpu
2 | diffusers==0.35.2
3 | optimum-intel==1.26.0
4 | numpy==1.26.4; platform_system == "Darwin" and platform_machine == "x86_64"
5 | safetensors==0.6.2; platform_system == "Darwin" and platform_machine == "x86_64"
6 | pytest==8.4.2
7 | transformers==4.55.4
8 | hf_transfer==0.1.9
9 | gguf==0.17.1
10 |
11 | # rag requirements
12 | langchain_community==0.4
13 | langchain-core==1.0.3
14 |
15 | # requirements for specific models
16 | # - Qwen/Qwen-7B
17 | # - Qwen/Qwen-7B-Chat
18 | einops==0.8.1
19 | # - openbmb/MiniCPM-V-2
20 | torchvision==0.17.2; platform_system == "Darwin" and platform_machine == "x86_64"
21 | torchvision==0.23.0; platform_system != "Darwin" or platform_machine != "x86_64"
22 | # - openbmb/MiniCPM-V-2
23 | timm==1.0.22
24 | # - openai/whisper-base
25 | librosa==0.11.0
26 | soundfile==0.13.1
27 | datasets==4.1.1; sys_platform == "linux"
28 | datasets==3.6.0; sys_platform != "linux"
29 | torchcodec==0.7.0; sys_platform == "linux"
30 | rouge==1.0.1
31 | # - microsoft/Phi-4-multimodal-instruct
32 | peft==0.17.1
33 |
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_benchmark_vlm.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | import os
5 | import pytest
6 | import sys
7 |
8 | from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
9 | from test_utils import run_sample
10 |
11 | class TestBenchmarkVLM:
12 | @pytest.mark.vlm
13 | @pytest.mark.samples
14 | @pytest.mark.parametrize(
15 | "convert_model, download_test_content",
16 | [
17 | pytest.param("tiny-random-minicpmv-2_6", "images/image.png"),
18 | ],
19 | indirect=["convert_model", "download_test_content"],
20 | )
21 | def test_sample_benchmark_vlm(self, convert_model, download_test_content):
22 | num_iter = "3"
23 | # Run C++ benchmark sample
24 | benchmark_sample = os.path.join(SAMPLES_CPP_DIR, 'benchmark_vlm')
25 | benchmark_cpp_command = [benchmark_sample, "-m" , convert_model, "-i", download_test_content, "-n", num_iter]
26 | run_sample(benchmark_cpp_command)
27 |
28 | # Run Python benchmark sample
29 | benchmark_script = os.path.join(SAMPLES_PY_DIR, 'visual_language_chat/benchmark_vlm.py')
30 | benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model, "-i", download_test_content, "-n", num_iter]
31 | run_sample(benchmark_py_command)
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_compound_grammar_sample.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | import pytest
5 | import sys
6 |
7 | from conftest import SAMPLES_PY_DIR, SAMPLES_JS_DIR
8 | from test_utils import run_sample
9 |
10 |
11 | @pytest.mark.llm
12 | @pytest.mark.samples
13 | @pytest.mark.parametrize("convert_model", ["TinyLlama-1.1B-Chat-v1.0"], indirect=True)
14 | def test_structured_output_sample(convert_model):
15 | # Test PY sample
16 | py_script = SAMPLES_PY_DIR / "text_generation" / "compound_grammar_generation.py"
17 | py_command = [sys.executable, py_script, convert_model]
18 | py_result = run_sample(py_command)
19 | py_predictions = py_result.stdout
20 |
21 | # Test JS sample
22 | js_sample = SAMPLES_JS_DIR / "text_generation" / "compound_grammar_generation.js"
23 | js_command = ["node", js_sample, convert_model]
24 | js_result = run_sample(js_command)
25 | js_predictions = js_result.stdout
26 |
27 | # Compare results
28 | assert py_predictions == js_predictions, "Python and JS results should match"
29 |
30 |
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_encrypted_model_causal_lm.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | import os
5 | import pytest
6 | import sys
7 |
8 | from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
9 | from test_utils import run_sample
10 |
11 | class TestEncryptedLM:
12 | @pytest.mark.llm
13 | @pytest.mark.samples
14 | @pytest.mark.parametrize("convert_model", ["Qwen2.5-0.5B-Instruct"], indirect=True)
15 | @pytest.mark.parametrize("prompt", ["Why is the sun yellow?"])
16 |
17 | def test_sample_encrypted_lm(self, convert_model, prompt):
18 | # Test CPP sample
19 | cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'encrypted_model_causal_lm')
20 | cpp_command =[cpp_sample, convert_model, prompt]
21 | cpp_result = run_sample(cpp_command)
22 |
23 | # Test Python sample
24 | py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/encrypted_model_causal_lm.py")
25 | py_command = [sys.executable, py_script, convert_model, prompt]
26 | py_result = run_sample(py_command)
27 |
28 | # Compare results
29 | assert py_result.stdout == cpp_result.stdout, f"Results should match"
30 |
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_heterogeneous_stable_diffusion.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | import pytest
5 | import sys
6 |
7 | from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
8 | from test_utils import run_sample
9 |
10 | class TestHeterogeneousStableDiffusion:
11 | @pytest.mark.samples
12 | @pytest.mark.LCM_Dreamshaper_v7_int8_ov
13 | @pytest.mark.parametrize("executable", [
14 | [SAMPLES_CPP_DIR / "heterogeneous_stable_diffusion"],
15 | [sys.executable, SAMPLES_PY_DIR / "image_generation/heterogeneous_stable_diffusion.py"],
16 | ])
17 | @pytest.mark.parametrize(
18 | "download_model, prompt",
19 | [
20 | pytest.param("LCM_Dreamshaper_v7-int8-ov", "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"),
21 | ],
22 | indirect=["download_model"],
23 | )
24 | def test_sample_heterogeneous_stable_diffusion(self, executable, download_model, prompt):
25 | run_sample(executable + [download_model, '"' + prompt + '"'])
26 |
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_lora.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2024 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | import os
5 | import pytest
6 | import sys
7 |
8 | from conftest import SAMPLES_PY_DIR
9 | from test_utils import run_sample
10 |
11 | class TestLora:
12 | @pytest.mark.llm
13 | @pytest.mark.samples
14 | @pytest.mark.parametrize("convert_model", ["TinyStories-1M"], indirect=True)
15 | @pytest.mark.parametrize("sample_args", ["How to create a table with two columns, one of them has type float, another one has type int?"])
16 | @pytest.mark.parametrize("download_test_content", ["adapter_model.safetensors"], indirect=True)
17 | def test_python_sample_lora(self, convert_model, download_test_content, sample_args):
18 | py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/lora_greedy_causal_lm.py")
19 | py_command = [sys.executable, py_script, convert_model, download_test_content, sample_args]
20 | run_sample(py_command)
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_lora_text2image.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | import pytest
5 | import sys
6 |
7 | from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
8 | from test_utils import run_sample
9 |
10 | class TestLoraText2Image:
11 | @pytest.mark.samples
12 | @pytest.mark.dreamlike_anime_1_0
13 | @pytest.mark.parametrize(
14 | "convert_model, prompt, sample_args",
15 | [
16 | pytest.param("dreamlike-anime-1.0", "curly-haired unicorn in the forest, anime, line", "0.7"),
17 | ],
18 | indirect=["convert_model"],
19 | )
20 | @pytest.mark.parametrize("download_test_content", ["soulcard.safetensors"], indirect=True)
21 | @pytest.mark.parametrize("executable", [
22 | [SAMPLES_CPP_DIR / 'lora_text2image'],
23 | [sys.executable, SAMPLES_PY_DIR / "image_generation/lora_text2image.py"],
24 | ])
25 | def test_sample_lora_text2image(self, convert_model, prompt, download_test_content, sample_args, executable):
26 | run_sample(executable + [convert_model, prompt, download_test_content, sample_args])
27 |
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_react_sample.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | import os
5 | import pytest
6 | import sys
7 |
8 | from conftest import SAMPLES_PY_DIR, SAMPLES_JS_DIR
9 | from test_utils import run_sample
10 |
11 | class TestReactSample:
12 | @pytest.mark.llm
13 | @pytest.mark.agent
14 | @pytest.mark.samples
15 | @pytest.mark.parametrize("convert_model", ["TinyLlama-1.1B-Chat-v1.0"], indirect=True)
16 | def test_react_sample_refs(self, request, convert_model):
17 | if sys.platform == 'darwin':
18 | pytest.xfail("Ticket 173586")
19 | # Python test
20 | py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/react_sample.py")
21 | py_command = [sys.executable, py_script, convert_model]
22 | py_result = run_sample(py_command)
23 |
24 | # Test JS sample
25 | js_sample = os.path.join(SAMPLES_JS_DIR, "text_generation/react_sample.js")
26 | js_command =['node', js_sample, convert_model]
27 | js_result = run_sample(js_command)
28 |
29 | assert py_result.stdout == js_result.stdout, f"Results should match"
30 |
31 |
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_structural_tag_generation.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 | import pytest
5 | import sys
6 |
7 | from conftest import SAMPLES_PY_DIR, SAMPLES_JS_DIR
8 | from test_utils import run_sample
9 |
10 |
11 | @pytest.mark.llm
12 | @pytest.mark.samples
13 | @pytest.mark.parametrize("convert_model", ["TinyLlama-1.1B-Chat-v1.0"], indirect=True)
14 | def test_structured_output_sample(convert_model):
15 | # Python test
16 | py_script = SAMPLES_PY_DIR / "text_generation" / "structural_tags_generation.py"
17 | py_command = [sys.executable, py_script, convert_model]
18 | py_result = run_sample(py_command)
19 | py_predictions = py_result.stdout
20 |
21 | # JS test
22 | js_script = SAMPLES_JS_DIR / "text_generation" / "structural_tags_generation.js"
23 | js_command = ["node", js_script, convert_model]
24 | js_result = run_sample(js_command)
25 | js_predictions = js_result.stdout
26 |
27 | # Compare results
28 | assert py_predictions == js_predictions, "Python and JS results should match"
29 |
--------------------------------------------------------------------------------
/tests/python_tests/samples/test_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 | from conftest import logger
4 | import os
5 | import subprocess # nosec B404
6 |
7 | def run_sample(command, input_data=None, env=os.environ):
8 | logger.info(f"Running sample command: {' '.join(map(str, command))}")
9 | if input_data:
10 | logger.info(f"Input data: {input_data}")
11 | try:
12 | result = subprocess.run(command, text=True, check=True, encoding='utf-8', env=env, input=input_data, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
13 | except subprocess.CalledProcessError as error:
14 | logger.error(f"Sample returned {error.returncode}. Output:\n{error.output}")
15 | raise
16 | logger.info(f"Sample output: {result.stdout}")
17 | return result
18 |
--------------------------------------------------------------------------------
/tests/python_tests/test_kv_cache_eviction/kv_cache_eviction_utils.py:
--------------------------------------------------------------------------------
1 | from openvino_genai import SchedulerConfig
2 |
3 | def get_scheduler_config(num_kv_blocks: int) -> SchedulerConfig:
4 | scheduler_config = SchedulerConfig()
5 | scheduler_config.num_kv_blocks = num_kv_blocks
6 | scheduler_config.dynamic_split_fuse = True
7 | scheduler_config.max_num_batched_tokens = 256
8 | scheduler_config.max_num_seqs = 256
9 | scheduler_config.use_cache_eviction = False
10 | return scheduler_config
--------------------------------------------------------------------------------
/tests/python_tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
--------------------------------------------------------------------------------
/tests/python_tests/utils/qwen3_reranker_utils.py:
--------------------------------------------------------------------------------
1 | def qwen3_reranker_format_queries(query, instruction=None):
2 | prefix = '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n'
3 | if instruction is None:
4 | instruction = "Given a web search query, retrieve relevant passages that answer the query"
5 | return f"{prefix}<Instruct>: {instruction}\n<Query>: {query}\n"
6 |
7 |
8 | def qwen3_reranker_format_document(document):
9 | suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
10 | return f"<Document>: {document}{suffix}"
11 |
--------------------------------------------------------------------------------
/thirdparty/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2024 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 | #
4 |
5 | option(BUILD_TOKENIZERS "Build OpenVINO Tokenizers together with OpenVINO GenAI" ON)
6 |
7 | if(BUILD_TOKENIZERS)
8 | add_subdirectory(./openvino_tokenizers/ "${CMAKE_BINARY_DIR}/openvino_tokenizers/")
9 | # Put binaries to a single dir to mimic package structure.
10 | set_target_properties(openvino_tokenizers PROPERTIES
11 | # Generator expressions to disable appending a per-configuration subdirectory (Release, Debug).
12 | # ARCHIVE_OUTPUT is irrelevant. It's here just to keep all the artifacts in one place.
13 | ARCHIVE_OUTPUT_DIRECTORY "lt;1:${CMAKE_BINARY_DIR}/openvino_genai/>"
14 | LIBRARY_OUTPUT_DIRECTORY "lt;1:${CMAKE_BINARY_DIR}/openvino_genai/>"
15 | RUNTIME_OUTPUT_DIRECTORY "lt;1:${CMAKE_BINARY_DIR}/openvino_genai/>"
16 | )
17 | endif()
18 |
--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/tools/__init__.py
--------------------------------------------------------------------------------
/tools/cacheviz/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 |
4 |
--------------------------------------------------------------------------------
/tools/cacheviz/requirements.txt:
--------------------------------------------------------------------------------
1 | argparse
2 | matplotlib
--------------------------------------------------------------------------------
/tools/continuous_batching/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2018-2025 Intel Corporation
2 | # SPDX-License-Identifier: Apache-2.0
3 | #
4 |
5 | add_subdirectory(accuracy)
6 | add_subdirectory(benchmark)
7 |
--------------------------------------------------------------------------------
/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/tools/llm_bench/llm_bench_utils/llm_hook_beam_search/__init__.py
--------------------------------------------------------------------------------
/tools/llm_bench/llm_bench_utils/llm_hook_sample/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvinotoolkit/openvino.genai/91dc71e0ded5a4624289723266f7b3811369f0e9/tools/llm_bench/llm_bench_utils/llm_hook_sample/__init__.py
--------------------------------------------------------------------------------
/tools/llm_bench/llm_bench_utils/prompt_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (C) 2023-2025 Intel Corporation
3 | # SPDX-License-Identifier: Apache-2.0
4 |
5 | from .model_utils import get_param_from_file
6 | from .parse_json_data import parse_text_json_data
7 |
8 |
9 | def get_text_prompt(args):
10 | text_list = []
11 | output_data_list, is_json_data = get_param_from_file(args, 'prompt')
12 | if is_json_data is True:
13 | text_param_list = parse_text_json_data(output_data_list)
14 | if len(text_param_list) > 0:
15 | for text in text_param_list:
16 | text_list.append(text)
17 | else:
18 | text_list.append(output_data_list[0])
19 | return text_list
20 |
--------------------------------------------------------------------------------
/tools/llm_bench/prompts/llava-1.5-7b.jsonl:
--------------------------------------------------------------------------------
1 | {"prompt": "Describe this image in details", "media": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11"}
--------------------------------------------------------------------------------
/tools/llm_bench/prompts/scheduler_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "num_kv_blocks": 300,
3 | "dynamic_split_fuse": true,
4 | "max_num_batched_tokens": 256,
5 | "max_num_seqs": 256,
6 | "use_cache_eviction": true,
7 | "enable_prefix_caching": false,
8 | "cache_eviction_config": {
9 | "start_size": 32,
10 | "recent_size": 32,
11 | "max_cache_size": 128,
12 | "aggregation_mode": "NORM_SUM",
13 | "apply_rotation": false,
14 | "snapkv_window_size": 8,
15 | "kvcrush_config": {"budget": 0, "anchor_point_mode": "RANDOM", "rng_seed": 0}
16 | },
17 | "sparse_attention_config": {
18 | "mode": "TRISHAPE",
19 | "num_last_dense_tokens_in_prefill": 100,
20 | "num_retained_start_tokens_in_cache": 128,
21 | "num_retained_recent_tokens_in_cache": 1920,
22 | "xattention_threshold": 0.8,
23 | "xattention_block_size": 64,
24 | "xattention_stride": 8
25 | }
26 | }
--------------------------------------------------------------------------------
/tools/llm_bench/prompts/stable-diffusion-i2i.jsonl:
--------------------------------------------------------------------------------
1 | {"steps":"30", "width":"256", "height":"256", "guidance_scale":"0.8", "prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", "media": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"}
2 | {"prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", "media": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"}
3 |
--------------------------------------------------------------------------------
/tools/llm_bench/prompts/stable-diffusion-inpainting.jsonl:
--------------------------------------------------------------------------------
1 | {"steps":"30", "width":"256", "height":"256", "guidance_scale":"0.8", "prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", "media": "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png", "mask_image": "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"}
2 | {"prompt": "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k", "media": "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png", "mask_image": "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"}
--------------------------------------------------------------------------------
/tools/llm_bench/prompts/stable-diffusion.jsonl:
--------------------------------------------------------------------------------
1 | {"steps":"30", "width":"256", "height":"256", "guidance_scale":"1.0", "prompt": "side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"}
2 | {"prompt": "side profile centered painted portrait, Gandhi rolling a blunt, Gloomhaven, matte painting concept art, art nouveau, 8K HD Resolution, beautifully background"}
3 |
--------------------------------------------------------------------------------
/tools/llm_bench/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://download.pytorch.org/whl/cpu
2 | numpy
3 | --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
4 | openvino
5 | openvino-tokenizers
6 | openvino_genai
7 | pillow
8 | torch
9 | transformers[sentencepiece]>=4.40.0
10 | diffusers>=0.22.0
11 | #optimum is in dependency list of optimum-intel
12 | optimum-intel[nncf]>=1.25.0
13 | packaging
14 | psutil
15 | timm
16 | tiktoken
17 | librosa # For Whisper
18 | matplotlib
19 | jinja2>=3.1.0
20 | scipy
21 | gguf_parser
22 | gguf>=0.10
23 | num2words
24 |
--------------------------------------------------------------------------------
/tools/llm_bench/requirements/requirements_conversion.txt:
--------------------------------------------------------------------------------
1 | einops
2 | transformers_stream_generator
3 | backoff
4 | bitsandbytes
5 | -r ../requirements.txt
6 |
--------------------------------------------------------------------------------
/tools/llm_bench/requirements/requirements_gptq.txt:
--------------------------------------------------------------------------------
1 | auto-gptq>=0.5.1 # for gptq
--------------------------------------------------------------------------------
/tools/llm_bench/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # ignore:
3 | # D100 - Missing docstring in public module
4 | # D101 - Missing docstring in public class
5 | # D103 - Missing docstring in public function
6 | # VNE001 - Single letter variable names are not allowed
7 | # W503 - https://www.flake8rules.com/rules/W503.html conflicts with W504
8 | filename = *.py
9 | max-line-length = 160
10 | ignore = E203,D100,D101,D103,VNE001,W503
11 | max-parameters-amount = 8
12 | show_source = True
13 | docstring-convention = google
14 | enable-extensions = G
15 |
16 | [pydocstyle]
17 | convention = google
18 |
19 | [mypy]
20 | ignore_missing_imports = True
21 | disable_error_code = attr-defined
22 | show_column_numbers = True
23 | show_error_context = True
24 | show_absolute_path = True
25 | pretty = True
26 |
--------------------------------------------------------------------------------
/tools/who_what_benchmark/examples/gptq_eval.py:
--------------------------------------------------------------------------------
1 | import whowhatbench
2 | from transformers import AutoModelForCausalLM, AutoTokenizer
3 |
4 | model_id = "meta-llama/Llama-2-7b-chat-hf"
5 | model_gptq_id = "TheBloke/Llama-2-7B-Chat-GPTQ"
6 |
7 | model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
8 | tokenizer = AutoTokenizer.from_pretrained(model_id)
9 |
10 |
11 | evaluator = whowhatbench.Evaluator(base_model=model, tokenizer=tokenizer)
12 |
13 | model_int4 = AutoModelForCausalLM.from_pretrained(model_gptq_id, device_map="auto")
14 | all_metrics_per_question, all_metrics = evaluator.score(model_int4)
15 |
16 | print(all_metrics_per_question)
17 | print(all_metrics)
18 |
19 | metrics = ["similarity", "SDT norm"]
20 |
21 | for metric in metrics:
22 | worst_examples = evaluator.worst_examples(top_k=5, metric=metric)
23 | print("Metric: ", metric)
24 | for e in worst_examples:
25 | print("\t=========================")
26 | print(f"\t{metric}: ", e[metric])
27 | print("\tPrompt: ", e["prompt"])
28 | print("\tSource Model:\n ", "\t" + e["source_model"])
29 | print("\tOptimized Model:\n ", "\t" + e["optimized_model"])
30 |
--------------------------------------------------------------------------------
/tools/who_what_benchmark/examples/huggingface_eval.py:
--------------------------------------------------------------------------------
1 | import whowhatbench
2 | from transformers import AutoModelForCausalLM, AutoTokenizer
3 |
4 | model_id = "meta-llama/Llama-2-7b-chat-hf"
5 |
6 | model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
7 | tokenizer = AutoTokenizer.from_pretrained(model_id)
8 |
9 |
10 | evaluator = whowhatbench.Evaluator(base_model=model, tokenizer=tokenizer)
11 |
12 | model_int4 = AutoModelForCausalLM.from_pretrained(
13 | model_id, load_in_4bit=True, device_map="auto"
14 | )
15 | all_metrics_per_question, all_metrics = evaluator.score(model_int4)
16 |
17 | print(all_metrics_per_question)
18 | print(all_metrics)
19 |
20 | metrics = ["similarity", "SDT norm"]
21 |
22 | for metric in metrics:
23 | worst_examples = evaluator.worst_examples(top_k=5, metric=metric)
24 | print("Metric: ", metric)
25 | for e in worst_examples:
26 | print("\t=========================")
27 | print(f"\t{metric}: ", e[metric])
28 | print("\tPrompt: ", e["prompt"])
29 | print("\tSource Model:\n ", "\t" + e["source_model"])
30 | print("\tOptimized Model:\n ", "\t" + e["optimized_model"])
31 |
--------------------------------------------------------------------------------
/tools/who_what_benchmark/examples/openvino_eval.py:
--------------------------------------------------------------------------------
1 | import whowhatbench
2 | from optimum.intel.openvino import OVModelForCausalLM
3 | from transformers import AutoTokenizer
4 |
5 | model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
6 |
7 | model = OVModelForCausalLM.from_pretrained(model_id, load_in_8bit=False, export=True)
8 | tokenizer = AutoTokenizer.from_pretrained(model_id)
9 |
10 |
11 | evaluator = whowhatbench.Evaluator(base_model=model, tokenizer=tokenizer)
12 |
13 | model_int8 = OVModelForCausalLM.from_pretrained(
14 | model_id, load_in_8bit=True, export=True
15 | )
16 | all_metrics_per_question, all_metrics = evaluator.score(model_int8)
17 |
18 | print(all_metrics_per_question)
19 | print(all_metrics)
20 |
21 | metrics = ["similarity", "SDT norm"]
22 |
23 | for metric in metrics:
24 | worst_examples = evaluator.worst_examples(top_k=5, metric=metric)
25 | print("Metric: ", metric)
26 | for e in worst_examples:
27 | print("\t=========================")
28 | print(f"\t{metric}: ", e[metric])
29 | print("\tPrompt: ", e["prompt"])
30 | print("\tSource Model:\n ", "\t" + e["source_model"])
31 | print("\tOptimized Model:\n ", "\t" + e["optimized_model"])
32 |
--------------------------------------------------------------------------------
/tools/who_what_benchmark/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.26.0
2 | transformers>=4.35.2
3 | sentence-transformers>=2.2.2
4 | openvino-genai
5 | optimum-intel[nncf]>=1.19.0
6 | pandas>=2.0.3
7 | numpy>=1.23.5
8 | tqdm>=4.66.1
9 | diffusers
10 | datasets>=3.6.0
11 | auto-gptq; sys_platform == "linux"
12 | autoawq<0.2.8; sys_platform == "linux"
13 | sentencepiece
14 | jinja2>=3.1.0
15 | scipy
--------------------------------------------------------------------------------
/tools/who_what_benchmark/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # ignore:
3 | # D100 - Missing docstring in public module
4 | # D101 - Missing docstring in public class
5 | # D103 - Missing docstring in public function
6 | # VNE001 - Single letter variable names are not allowed
7 | # W503 - https://www.flake8rules.com/rules/W503.html conflicts with W504
8 | filename = *.py
9 | max-line-length = 160
10 | ignore = E203,D100,D101,D103,VNE001,W503
11 | max-parameters-amount = 8
12 | show_source = True
13 | docstring-convention = google
14 | enable-extensions = G
15 | per-file-ignores =
16 | # imports order
17 | tools/who_what_benchmark/whowhatbench/wwb.py: E402
18 |
19 | [pydocstyle]
20 | convention = google
21 |
22 | [mypy]
23 | ignore_missing_imports = True
24 | disable_error_code = attr-defined
25 | show_column_numbers = True
26 | show_error_context = True
27 | show_absolute_path = True
28 | pretty = True
29 |
--------------------------------------------------------------------------------
/tools/who_what_benchmark/whowhatbench/__init__.py:
--------------------------------------------------------------------------------
1 | from .registry import register_evaluator, EVALUATOR_REGISTRY
2 | from .text_evaluator import TextEvaluator
3 | from .text_evaluator import TextEvaluator as Evaluator
4 | from .text2image_evaluator import Text2ImageEvaluator
5 | from .visualtext_evaluator import VisualTextEvaluator
6 | from .im2im_evaluator import Image2ImageEvaluator
7 | from .inpaint_evaluator import InpaintingEvaluator
8 | from .embeddings_evaluator import EmbeddingsEvaluator
9 | from .reranking_evaluator import RerankingEvaluator
10 |
11 |
12 | __all__ = [
13 | "Evaluator",
14 | "register_evaluator",
15 | "TextEvaluator",
16 | "Text2ImageEvaluator",
17 | "VisualTextEvaluator",
18 | "Image2ImageEvaluator",
19 | "InpaintingEvaluator",
20 | "EmbeddingsEvaluator",
21 | "RerankingEvaluator",
22 | "EVALUATOR_REGISTRY",
23 | ]
24 |
--------------------------------------------------------------------------------
/tools/who_what_benchmark/whowhatbench/registry.py:
--------------------------------------------------------------------------------
1 |
2 | from abc import ABC, abstractmethod
3 |
4 |
5 | # Registry for evaluators
6 | EVALUATOR_REGISTRY = {}
7 |
8 |
9 | def register_evaluator(*names):
10 | def decorate(cls):
11 | for name in names:
12 | assert (
13 | name not in EVALUATOR_REGISTRY
14 | ), f"Evaluator named '{name}' conflicts with existing evaluators! Please register with a non-conflicting alias instead."
15 |
16 | EVALUATOR_REGISTRY[name] = cls
17 | return cls
18 |
19 | return decorate
20 |
21 |
22 | class Evaluator(ABC):
23 | @abstractmethod
24 | def dump_gt(self, csv_name: str):
25 | pass
26 |
27 | @abstractmethod
28 | def dump_predictions(self, csv_name: str):
29 | pass
30 |
31 | @abstractmethod
32 | def score(self, model_or_data, **kwargs):
33 | pass
34 |
35 | @abstractmethod
36 | def worst_examples(self, top_k: int = 5, metric="similarity"):
37 | pass
38 |
39 | @abstractmethod
40 | def get_generation_fn(self):
41 | raise NotImplementedError("generation_fn should be returned")
42 |
43 |
44 | class BaseEvaluator(Evaluator):
45 | def dump_gt(self, csv_name: str):
46 | self.gt_data.to_csv(csv_name)
47 |
48 | def dump_predictions(self, csv_name: str):
49 | self.predictions.to_csv(csv_name)
50 |
--------------------------------------------------------------------------------